Winnovative PDF to Text Converter can be used in any type of .NET application
to extract the text from a PDF document and to search text in PDF documents. The integration with existing .NET applications
is extremely easy and no installation is necessary in order to run the converter.
The downloaded archive contains the assembly for .NET and demo applications for text extraction and text search.
The result of conversion is a .NET String object that you can further manipulate
The PDF to Text Converter does not require Adobe Reader or other third party tools.
- Extract text from PDF documents
- Search text in PDF documents
- Save the extracted text using various text encodings
- Case sensitive and whole word options for text search
- Support for password protected PDF documents
- Extract the text or search only a range of PDF pages
- Extract text preserving the original PDF layout
- Extract text in PDF reading order or PDF internal order
- Get the number of pages in a PDF document
- Get the PDF document title, keywords, author and description
- Does not require Adobe Reader or other third party tools
- Support for .NET 4.0 framework and later
- Documentation and C# samples for all the features
Code Sample for PDF to Text Conversion
The code below was taken from the PDF to Text demo application available for download in
the PDF to Text Converter archive. In this example an instance of the PdfToTextConverter
class is constructed and used to extract the text from a
PDF document into a .NET String object. The resulted text is saved in a file on disk using the UTF-8 encoding.
private void btnFindText_Click(object sender, EventArgs e)
{
if (pdfFileTextBox.Text.Trim().Equals(String.Empty))
{
MessageBox.Show("Please choose a PDF file to search", "Choose PDF file", MessageBoxButtons.OK);
return;
}
if (textToFindTextBox.Text.Trim().Equals(String.Empty))
{
MessageBox.Show("Please enter the text to find", "Text to Find", MessageBoxButtons.OK);
return;
}
// the pdf file to search
string pdfFileName = pdfFileTextBox.Text.Trim();
// start page number
int startPageNumber = int.Parse(textBoxStartPage.Text.Trim());
// end page number
// when it is 0 the extraction will continue up to the end of document
int endPageNumber = 0;
if (textBoxEndPage.Text.Trim() != String.Empty)
endPageNumber = int.Parse(textBoxEndPage.Text.Trim());
Cursor = Cursors.WaitCursor;
string outputFileName = System.IO.Path.Combine(Application.StartupPath, @"DemoFiles\Output",
System.IO.Path.GetFileNameWithoutExtension(pdfFileName) + "_Highlighted.pdf");
Document pdfDocument = null;
try
{
// create the PDF to Text converter
PdfToTextConverter pdfToTextConverter = new PdfToTextConverter();
pdfToTextConverter.LicenseKey = "C4WUhJaRhJSEkoqUhJeVipWWip2dnZ2ElA==";
// search text in PDF
FindTextLocation[] findTextLocations = pdfToTextConverter.FindText(pdfFileName, textToFindTextBox.Text,
startPageNumber, endPageNumber, cbCaseSensitive.Checked, cbWholeWord.Checked);
// open the PDF to search in PDF library
pdfDocument = new Document(pdfFileName);
// highlight the found text in PDF
foreach (FindTextLocation findTextLocation in findTextLocations)
{
RectangleElement highlightRectangle = new RectangleElement(findTextLocation.X, findTextLocation.Y,
findTextLocation.Width, findTextLocation.Height);
highlightRectangle.BackColor = Color.Yellow;
highlightRectangle.Opacity = 50;
pdfDocument.Pages[findTextLocation.PageNumber - 1].AddElement(highlightRectangle);
}
// Save the modified PDF document in a memory buffer
byte[] outPdfBuffer = pdfDocument.Save();
// Write the memory buffer in a PDF file
System.IO.File.WriteAllBytes(outputFileName, outPdfBuffer);
}
catch (Exception ex)
{
// The search failed
MessageBox.Show(String.Format("An error occurred. {0}", ex.Message), "Error");
return;
}
finally
{
// Close the PDF document
if (pdfDocument != null)
pdfDocument.Close();
Cursor = Cursors.Arrow;
}
// Open the modified PDF document in default PDF viewer
try
{
System.Diagnostics.Process.Start(outputFileName);
}
catch (Exception ex)
{
MessageBox.Show(String.Format("Cannot open highlighted PDF file '{0}'. {1}", outputFileName, ex.Message));
}
}
Code Sample for Text Search in PDF
The code below was taken from the Find Text demo application available for download in
the PDF to Text Converter archive. In this example an instance of the PdfToTextConverter
class is constructed and used to search a given text in a
PDF document and highlight that text in PDF document.
private void btnFindText_Click(object sender, EventArgs e)
{
if (pdfFileTextBox.Text.Trim().Equals(String.Empty))
{
MessageBox.Show("Please choose a PDF file to search", "Choose PDF file", MessageBoxButtons.OK);
return;
}
if (textToFindTextBox.Text.Trim().Equals(String.Empty))
{
MessageBox.Show("Please enter the text to find", "Text to Find", MessageBoxButtons.OK);
return;
}
// the pdf file to search
string pdfFileName = pdfFileTextBox.Text.Trim();
// start page number
int startPageNumber = int.Parse(textBoxStartPage.Text.Trim());
// end page number
// when it is 0 the extraction will continue up to the end of document
int endPageNumber = 0;
if (textBoxEndPage.Text.Trim() != String.Empty)
endPageNumber = int.Parse(textBoxEndPage.Text.Trim());
Cursor = Cursors.WaitCursor;
string outputFileName = System.IO.Path.Combine(Application.StartupPath, @"DemoFiles\Output",
System.IO.Path.GetFileNameWithoutExtension(pdfFileName) + "_Highlighted.pdf");
Document pdfDocument = null;
try
{
// create the PDF to Text converter
PdfToTextConverter pdfToTextConverter = new PdfToTextConverter();
pdfToTextConverter.LicenseKey = "C4WUhJaRhJSEkoqUhJeVipWWip2dnZ2ElA==";
// search text in PDF
FindTextLocation[] findTextLocations = pdfToTextConverter.FindText(pdfFileName, textToFindTextBox.Text,
startPageNumber, endPageNumber, cbCaseSensitive.Checked, cbWholeWord.Checked);
// open the PDF to search in PDF library
pdfDocument = new Document(pdfFileName);
// highlight the found text in PDF
foreach (FindTextLocation findTextLocation in findTextLocations)
{
RectangleElement highlightRectangle = new RectangleElement(findTextLocation.X, findTextLocation.Y,
findTextLocation.Width, findTextLocation.Height);
highlightRectangle.BackColor = Color.Yellow;
highlightRectangle.Opacity = 50;
pdfDocument.Pages[findTextLocation.PageNumber - 1].AddElement(highlightRectangle);
}
// Save the modified PDF document in a memory buffer
byte[] outPdfBuffer = pdfDocument.Save();
// Write the memory buffer in a PDF file
System.IO.File.WriteAllBytes(outputFileName, outPdfBuffer);
}
catch (Exception ex)
{
// The search failed
MessageBox.Show(String.Format("An error occurred. {0}", ex.Message), "Error");
return;
}
finally
{
// Close the PDF document
if (pdfDocument != null)
pdfDocument.Close();
Cursor = Cursors.Arrow;
}
// Open the modified PDF document in default PDF viewer
try
{
System.Diagnostics.Process.Start(outputFileName);
}
catch (Exception ex)
{
MessageBox.Show(String.Format("Cannot open highlighted PDF file '{0}'. {1}", outputFileName, ex.Message));
}
}