Class OCRProcessor
Represents a logic to process OCR for the Loaded PDF document.
Inheritance
Implements
Inherited Members
Namespace: Syncfusion.OCRProcessor
Assembly: Syncfusion.OCRProcessor.Base.dll
Syntax
public class OCRProcessor : IDisposable
Constructors
OCRProcessor()
Initializes a new instance of the class.
Declaration
public OCRProcessor()
OCRProcessor(String)
Initializes a new instance of the OCRProcessor class with the specified tesseract binary file path.
Declaration
public OCRProcessor(string tesseractPath)
Parameters
Type | Name | Description |
---|---|---|
System.String | tesseractPath | Tesseract binary path. |
Properties
ExternalEngine
Sets the external OCR engine to perform the OCR on a PDF or Image.
Declaration
public IOcrEngine ExternalEngine { set; }
Property Value
Type | Description |
---|---|
IOcrEngine |
Examples
// Initialize the OCR processor.
OCRProcessor processor = new OCRProcessor();
//loading the input image.
FileStream stream = new FileStream(@"Input.pdf", FileMode.Open);
PdfLoadedDocument document = new PdfLoadedDocument(stream);
//Create custom OCR engine to process the OCR.
IOcrEngine azureOcrEngine = new AzureExternalOcrEngine();
//Set external OCR engine.
processor.ExternalEngine = azureOcrEngine;
document = processor.PerformOCR(document);
FileStream outputStream = new FileStream("Output.pdf", FileMode.CreateNew);
//Save the document into stream.
document.Save(outputStream);
document.Close(true);
HasUnsupportedImages
Gets the Boolean value to find the document contains unsupported image or not
Declaration
public bool HasUnsupportedImages { get; }
Property Value
Type | Description |
---|---|
System.Boolean |
Settings
Gets or sets OCR settings to the document
Declaration
public OCRSettings Settings { get; set; }
Property Value
Type | Description |
---|---|
OCRSettings |
See Also
TessDataPath
Gets or sets the tessdata folder path to process the OCR
Declaration
public string TessDataPath { get; set; }
Property Value
Type | Description |
---|---|
System.String |
TesseractPath
Gets or sets the Tesseractbinaries folder path to process the OCR
Declaration
public string TesseractPath { get; set; }
Property Value
Type | Description |
---|---|
System.String |
Methods
CompressJPEGImage(Image)
Declaration
public Image CompressJPEGImage(Image bmp1)
Parameters
Type | Name | Description |
---|---|---|
System.Drawing.Image | bmp1 |
Returns
Type | Description |
---|---|
System.Drawing.Image |
Dispose()
Releases unmanaged and - optionally - managed resources
Declaration
public void Dispose()
Finalize()
Releases unmanaged resources and performs other cleanup operations before the OCRProcessor is reclaimed by garbage collection.
Declaration
protected void Finalize()
PerformOCR(PdfLoadedDocument)
Perform the OCR on a PDF document.
Declaration
public string PerformOCR(PdfLoadedDocument lDoc)
Parameters
Type | Name | Description |
---|---|---|
PdfLoadedDocument | lDoc | PdfLoadedDocument |
Returns
Type | Description |
---|---|
System.String | Returns the OCRed texts |
Examples
// Initialize the OCR processor.
OCRProcessor processor = new OCRProcessor();
//loading the input image.
FileStream stream = new FileStream(@"Input.pdf", FileMode.Open);
PdfLoadedDocument document = new PdfLoadedDocument(stream);
document = processor.PerformOCR(document);
FileStream outputStream = new FileStream("Output.pdf", FileMode.CreateNew);
//Save the document into stream.
document.Save(outputStream);
document.Close(true);
PerformOCR(PdfLoadedDocument, Int32, Int32, String)
Perform OCR process for the PdfLoadedDocument
Declaration
public string PerformOCR(PdfLoadedDocument lDoc, int startIndex, int endIndex, string dataPath)
Parameters
Type | Name | Description |
---|---|---|
PdfLoadedDocument | lDoc | PdfLoadedDocument |
System.Int32 | startIndex | The number that is use as a start point for the OCR process. |
System.Int32 | endIndex | The number that is use as a end point for the OCR process. |
System.String | dataPath | Tesseract data path |
Returns
Type | Description |
---|---|
System.String | Returns the OCRed texts |
PerformOCR(PdfLoadedDocument, Int32, Int32, String, out OCRLayoutResult)
Perform OCR process for the PdfLoadedDocument
Declaration
public string PerformOCR(PdfLoadedDocument lDoc, int startIndex, int endIndex, string dataPath, out OCRLayoutResult hocrBounds)
Parameters
Type | Name | Description |
---|---|---|
PdfLoadedDocument | lDoc | PdfLoadedDocument |
System.Int32 | startIndex | The number that is use as a start point for the OCR process. |
System.Int32 | endIndex | The number that is use as a end point for the OCR process. |
System.String | dataPath | Tesseract data path |
OCRLayoutResult | hocrBounds | When this method returns, Layout result of the OCR'ed document |
Returns
Type | Description |
---|---|
System.String | Returns the OCRed texts |
PerformOCR(PdfLoadedDocument, Int32, Int32, String, Boolean)
Perform OCR process for the PdfLoadedDocument
Declaration
public string PerformOCR(PdfLoadedDocument lDoc, int startIndex, int endIndex, string dataPath, bool isMemoryOptimized)
Parameters
Type | Name | Description |
---|---|---|
PdfLoadedDocument | lDoc | PdfLoadedDocument |
System.Int32 | startIndex | The number that is use as a start point for the OCR process. |
System.Int32 | endIndex | The number that is use as a end point for the OCR process. |
System.String | dataPath | Tesseract data path |
System.Boolean | isMemoryOptimized | Enable this parameter for memory optimization of larger PDF documents |
Returns
Type | Description |
---|---|
System.String | Returns the OCRed texts |
PerformOCR(PdfLoadedDocument, Int32, Int32, String, Boolean, out OCRLayoutResult)
Perform OCR process for the PdfLoadedDocument
Declaration
public string PerformOCR(PdfLoadedDocument lDoc, int startIndex, int endIndex, string dataPath, bool isMemoryOptimized, out OCRLayoutResult hocrBounds)
Parameters
Type | Name | Description |
---|---|---|
PdfLoadedDocument | lDoc | PdfLoadedDocument |
System.Int32 | startIndex | The number that is use as a start point for the OCR process. |
System.Int32 | endIndex | The number that is use as a end point for the OCR process. |
System.String | dataPath | Tesseract data path |
System.Boolean | isMemoryOptimized | Enable this parameter for memory optimization of larger PDF documents |
OCRLayoutResult | hocrBounds | When this method returns, Layout result of the OCR'ed document |
Returns
Type | Description |
---|---|
System.String | Returns the OCRed texts |
PerformOCR(PdfLoadedDocument, String)
Perform OCR process for the PdfLoadedDocument
Declaration
public string PerformOCR(PdfLoadedDocument lDoc, string dataPath)
Parameters
Type | Name | Description |
---|---|---|
PdfLoadedDocument | lDoc | PdfLoadedDocument |
System.String | dataPath | Tesseract data path |
Returns
Type | Description |
---|---|
System.String | Returns the OCRed texts |
PerformOCR(PdfLoadedDocument, String, out OCRLayoutResult)
Perform OCR process for the PdfLoadedDocument
Declaration
public string PerformOCR(PdfLoadedDocument lDoc, string dataPath, out OCRLayoutResult hocrBounds)
Parameters
Type | Name | Description |
---|---|---|
PdfLoadedDocument | lDoc | PdfLoadedDocument |
System.String | dataPath | Tesseract data path |
OCRLayoutResult | hocrBounds | When this method returns, Layout result of the OCR'ed document |
Returns
Type | Description |
---|---|
System.String | Returns the OCRed texts |
PerformOCR(PdfLoadedDocument, String, Boolean)
Perform best OCR process for the PdfLoadedDocument if memory optimization is enabled.
Declaration
public string PerformOCR(PdfLoadedDocument lDoc, string dataPath, bool isMemoryOptimized)
Parameters
Type | Name | Description |
---|---|---|
PdfLoadedDocument | lDoc | PdfLoadedDocument |
System.String | dataPath | Tesseract data path |
System.Boolean | isMemoryOptimized | Enable this parameter for memory optimization of larger PDF documents |
Returns
Type | Description |
---|---|
System.String | Returns the OCRed texts |
PerformOCR(PdfLoadedDocument, String, Boolean, out OCRLayoutResult)
Perform OCR process for the PdfLoadedDocument
Declaration
public string PerformOCR(PdfLoadedDocument lDoc, string dataPath, bool isMemoryOptimized, out OCRLayoutResult hocrBounds)
Parameters
Type | Name | Description |
---|---|---|
PdfLoadedDocument | lDoc | PdfLoadedDocument |
System.String | dataPath | Tesseract data path |
System.Boolean | isMemoryOptimized | Enable this parameter for memory optimization of larger PDF documents |
OCRLayoutResult | hocrBounds | When this method returns, Layout result of the OCR'ed document |
Returns
Type | Description |
---|---|
System.String | Returns the OCRed texts |
PerformOCR(Bitmap)
Perform OCR on the image and create searchable PDF document.
Declaration
public PdfDocument PerformOCR(Bitmap image)
Parameters
Type | Name | Description |
---|---|---|
System.Drawing.Bitmap | image | Source image to process OCR. |
Returns
Type | Description |
---|---|
PdfDocument | Returns the OCRed texts in Pdfdocument |
PerformOCR(Bitmap, String)
Perform OCR process for the Image
Declaration
public string PerformOCR(Bitmap img, string dataPath)
Parameters
Type | Name | Description |
---|---|---|
System.Drawing.Bitmap | img | Source image to process OCR. |
System.String | dataPath | Tesseract data path |
Returns
Type | Description |
---|---|
System.String | Returns the OCRed texts |
PerformOCR(Bitmap, String, out OCRLayoutResult)
Perform OCR process for the Image
Declaration
public string PerformOCR(Bitmap img, string dataPath, out OCRLayoutResult hocrBounds)
Parameters
Type | Name | Description |
---|---|---|
System.Drawing.Bitmap | img | Source image to process OCR. |
System.String | dataPath | Tesseract data path |
OCRLayoutResult | hocrBounds | When this method returns, Layout results of the OCR'ed image |
Returns
Type | Description |
---|---|
System.String | Returns the OCRed texts |
PerformOCR(Stream)
Perform OCR on the image and create searchable PDF document.
Declaration
public PdfDocument PerformOCR(Stream imgStream)
Parameters
Type | Name | Description |
---|---|---|
System.IO.Stream | imgStream | Source image to process OCR. |
Returns
Type | Description |
---|---|
PdfDocument | Returns the OCRed texts in Pdfdocument |