OCRProcessor Class
Represents a logic to process OCR for the Loaded PDF document.
Inheritance
Implements
Namespace: Syncfusion.OCRProcessor
Assembly: Syncfusion.OCRProcessor.Base.dll
Syntax
public class OCRProcessor : Object, IDisposable
Constructors
OCRProcessor()
Initializes a new instance of the class.
Declaration
public OCRProcessor()
OCRProcessor(String)
Initializes a new instance of the OCRProcessor class with the specified tesseract binary file path.
Declaration
public OCRProcessor(string tesseractPath)
Parameters
| Type | Name | Description |
|---|---|---|
| System.String | tesseractPath | Tesseract binary path. |
Properties
ExternalEngine
Sets the external OCR engine to perform the OCR on a PDF or Image.
Declaration
public IOcrEngine ExternalEngine { set; }
Property Value
| Type |
|---|
| IOcrEngine |
Examples
// Initialize the OCR processor.
OCRProcessor processor = new OCRProcessor();
//loading the input image.
FileStream stream = new FileStream(@"Input.pdf", FileMode.Open);
PdfLoadedDocument document = new PdfLoadedDocument(stream);
//Create custom OCR engine to process the OCR.
IOcrEngine azureOcrEngine = new AzureExternalOcrEngine();
//Set external OCR engine.
processor.ExternalEngine = azureOcrEngine;
document = processor.PerformOCR(document);
FileStream outputStream = new FileStream("Output.pdf", FileMode.CreateNew);
//Save the document into stream.
document.Save(outputStream);
document.Close(true);
ImageEnhancementMode
Gets or sets the image enhancement mode used for OCR processor
Declaration
public OcrImageEnhancementMode ImageEnhancementMode { get; set; }
Property Value
| Type |
|---|
| OcrImageEnhancementMode |
Examples
// Initialize the OCR processor.
OCRProcessor processor = new OCRProcessor();
//loading the input image.
FileStream stream = new FileStream(@"Input.pdf", FileMode.Open);
PdfLoadedDocument document = new PdfLoadedDocument(stream);
sets the image enhancement mode used for OCR processor
processor.ImageEnhancementMode = OcrImageEnhancementMode.EnhanceForRecognitionOnly;
document = processor.PerformOCR(document);
FileStream outputStream = new FileStream("Output.pdf", FileMode.CreateNew);
//Save the document into stream.
document.Save(outputStream);
document.Close(true);
ImageProcessor
Gets or sets the image processor used by the OCR processor
Declaration
public IImageProcessor ImageProcessor { get; set; }
Property Value
| Type |
|---|
| IImageProcessor |
Examples
// Initialize the OCR processor.
OCRProcessor processor = new OCRProcessor();
//loading the input image.
FileStream stream = new FileStream(@"Input.pdf", FileMode.Open);
PdfLoadedDocument document = new PdfLoadedDocument(stream);
sets the image processor used by the OCR processor
processor.ImageProcessor = new ImageProcessor();
document = processor.PerformOCR(document);
FileStream outputStream = new FileStream("Output.pdf", FileMode.CreateNew);
//Save the document into stream.
document.Save(outputStream);
document.Close(true);
Settings
Gets or sets the OCR settings to a document.
Declaration
public OCRSettings Settings { get; set; }
Property Value
| Type |
|---|
| OCRSettings |
See Also
TessDataPath
Gets or sets the tessdata folder path to process the OCR.
Declaration
public string TessDataPath { get; set; }
Property Value
| Type |
|---|
| System.String |
Examples
// Initialize the OCR processor
OCRProcessor processor = new OCRProcessor();
//loading the input image
FileStream stream = new FileStream(@"Input.jpeg ", FileMode.Open);
Bitmap image = new Bitmap(stream);
// Set OCR language to process
processor.Settings.Language = Languages.English;
FileStream fontStream = new FileStream(@"ARIALUNI.ttf", FileMode.Open);
processor.UnicodeFont = new PdfTrueTypeFont(fontStream, true, PdfFontStyle.Regular, 10);
processor.Settings.Conformance = PdfConformanceLevel.Pdf_A1B;
processor.TesseractPath = TesseractBinariesPath;
processor.TessDataPath = TessdataPath;
PdfDocument document = processor.PerformOCR(image);
document.Save("Output.pdf");
document.Close(true);
TesseractPath
Gets or sets the Tesseractbinaries folder path to process the OCR.
Declaration
public string TesseractPath { get; set; }
Property Value
| Type |
|---|
| System.String |
Examples
// Initialize the OCR processor
OCRProcessor processor = new OCRProcessor();
//loading the input image
FileStream stream = new FileStream(@"Input.jpeg ", FileMode.Open);
Bitmap image = new Bitmap(stream);
// Set OCR language to process
processor.Settings.Language = Languages.English;
FileStream fontStream = new FileStream(@"ARIALUNI.ttf", FileMode.Open);
processor.UnicodeFont = new PdfTrueTypeFont(fontStream, true, PdfFontStyle.Regular, 10);
processor.Settings.Conformance = PdfConformanceLevel.Pdf_A1B;
processor.TesseractPath = TesseractBinariesPath;
processor.TessDataPath = TessdataPath;
PdfDocument document = processor.PerformOCR(image);
document.Save("Output.pdf");
document.Close(true);
UnicodeFont
Sets Unicode font to preserve the Unicode characters in a PDF document.
Declaration
public PdfTrueTypeFont UnicodeFont { set; }
Property Value
| Type |
|---|
| PdfTrueTypeFont |
See Also
Methods
CompressJPEGImage(Image)
Declaration
public Image CompressJPEGImage(Image bmp1)
Parameters
| Type | Name | Description |
|---|---|---|
| System.Drawing.Image | bmp1 |
Returns
| Type |
|---|
| System.Drawing.Image |
Dispose()
Releases the unmanaged and optionally managed resources.
Declaration
public void Dispose()
Finalize()
Releases unmanaged resources and performs other cleanup operations before the OCRProcessor is reclaimed by garbage collection.
Declaration
protected override void Finalize()
PerformOCR(PdfLoadedDocument)
Perform the OCR on a PDF document.
Declaration
public string PerformOCR(PdfLoadedDocument lDoc)
Parameters
| Type | Name | Description |
|---|---|---|
| PdfLoadedDocument | lDoc | PdfLoadedDocument |
Returns
| Type | Description |
|---|---|
| System.String | Returns the OCRed texts |
Examples
// Initialize the OCR processor.
OCRProcessor processor = new OCRProcessor();
//loading the input image.
FileStream stream = new FileStream(@"Input.pdf", FileMode.Open);
PdfLoadedDocument document = new PdfLoadedDocument(stream);
document = processor.PerformOCR(document);
FileStream outputStream = new FileStream("Output.pdf", FileMode.CreateNew);
//Save the document into stream.
document.Save(outputStream);
document.Close(true);
PerformOCR(PdfLoadedDocument, Int32, Int32, String)
Perform the OCR process for a PdfLoadedDocument.
Declaration
public string PerformOCR(PdfLoadedDocument lDoc, int startIndex, int endIndex, string dataPath)
Parameters
| Type | Name | Description |
|---|---|---|
| PdfLoadedDocument | lDoc | PdfLoadedDocument |
| System.Int32 | startIndex | The number that is use as a start point for the OCR process. |
| System.Int32 | endIndex | The number that is use as a end point for the OCR process. |
| System.String | dataPath | Tesseract data path |
Returns
| Type | Description |
|---|---|
| System.String | Returns the OCRed texts |
PerformOCR(PdfLoadedDocument, Int32, Int32, String, out OCRLayoutResult)
Perform the OCR process for a PdfLoadedDocument.
Declaration
public string PerformOCR(PdfLoadedDocument lDoc, int startIndex, int endIndex, string dataPath, out OCRLayoutResult hocrBounds)
Parameters
| Type | Name | Description |
|---|---|---|
| PdfLoadedDocument | lDoc | PdfLoadedDocument |
| System.Int32 | startIndex | The number that is use as a start point for the OCR process. |
| System.Int32 | endIndex | The number that is use as a end point for the OCR process. |
| System.String | dataPath | Tesseract data path |
| OCRLayoutResult | hocrBounds | When this method returns, Layout result of the OCR'ed document |
Returns
| Type | Description |
|---|---|
| System.String | Returns the OCRed texts |
PerformOCR(PdfLoadedDocument, String)
Perform the OCR process for a PdfLoadedDocument.
Declaration
public string PerformOCR(PdfLoadedDocument lDoc, string dataPath)
Parameters
| Type | Name | Description |
|---|---|---|
| PdfLoadedDocument | lDoc | PdfLoadedDocument |
| System.String | dataPath | Tesseract data path |
Returns
| Type | Description |
|---|---|
| System.String | Returns the OCRed texts |
PerformOCR(PdfLoadedDocument, String, out OCRLayoutResult)
Perform the OCR process for a PdfLoadedDocument.
Declaration
public string PerformOCR(PdfLoadedDocument lDoc, string dataPath, out OCRLayoutResult hocrBounds)
Parameters
| Type | Name | Description |
|---|---|---|
| PdfLoadedDocument | lDoc | PdfLoadedDocument |
| System.String | dataPath | Tesseract data path |
| OCRLayoutResult | hocrBounds | When this method returns, Layout result of the OCR'ed document |
Returns
| Type | Description |
|---|---|
| System.String | Returns the OCRed texts |
PerformOCR(Bitmap, String)
Declaration
public string PerformOCR(Bitmap img, string dataPath)
Parameters
| Type | Name | Description |
|---|---|---|
| System.Drawing.Bitmap | img | |
| System.String | dataPath |
Returns
| Type |
|---|
| System.String |
PerformOCR(Bitmap, String, out OCRLayoutResult)
Perform the OCR process for an image.
Declaration
public string PerformOCR(Bitmap img, string dataPath, out OCRLayoutResult hocrBounds)
Parameters
| Type | Name | Description |
|---|---|---|
| System.Drawing.Bitmap | img | Source image to process OCR. |
| System.String | dataPath | Tesseract data path |
| OCRLayoutResult | hocrBounds | When this method returns, Layout results of the OCR'ed image |
Returns
| Type | Description |
|---|---|
| System.String | Returns the OCRed texts |
PerformOCR(Stream)
Perform OCR on the image stream and create a searchable PDF document.
Declaration
public PdfDocument PerformOCR(Stream imgStream)
Parameters
| Type | Name | Description |
|---|---|---|
| System.IO.Stream | imgStream | Source image to process OCR. |
Returns
| Type | Description |
|---|---|
| PdfDocument | Returns the OCRed texts in Pdfdocument |
Examples
// Initialize the OCR processor.
OCRProcessor processor = new OCRProcessor();
//loading the input image.
FileStream image = new FileStream(@"Input.jpeg ", FileMode.Open);
// Set OCR language to process.
processor.Settings.Language = Languages.English;
FileStream fontStream = new FileStream(@"ARIALUNI.ttf", FileMode.Open);
processor.UnicodeFont = new PdfTrueTypeFont(fontStream, true, PdfFontStyle.Regular, 10);
processor.Settings.Conformance = PdfConformanceLevel.Pdf_A1B;
processor.TesseractPath = TesseractBinariesPath;
processor.TessDataPath = TessdataPath;
PdfDocument document = processor.PerformOCR(image);
document.Save("Output.pdf");
document.Close(true);