alexa
menu

Document Processing

    Show / Hide Table of Contents

    OCRProcessor Class

    Represents a logic to process OCR for the Loaded PDF document.

    Inheritance
    System.Object
    OCRProcessor
    Implements
    System.IDisposable
    Namespace: Syncfusion.OCRProcessor
    Assembly: Syncfusion.OCRProcessor.Base.dll
    Syntax
    public class OCRProcessor : Object, IDisposable

    Constructors

    OCRProcessor()

    Initializes a new instance of the class.

    Declaration
    public OCRProcessor()

    OCRProcessor(String)

    Initializes a new instance of the OCRProcessor class with the specified tesseract binary file path.

    Declaration
    public OCRProcessor(string tesseractPath)
    Parameters
    Type Name Description
    System.String tesseractPath

    Tesseract binary path.

    Properties

    ExternalEngine

    Sets the external OCR engine to perform the OCR on a PDF or Image.

    Declaration
    public IOcrEngine ExternalEngine { set; }
    Property Value
    Type
    IOcrEngine
    Examples
    // Initialize the OCR processor.
    OCRProcessor processor = new OCRProcessor();
    //loading the input image.
    FileStream stream = new FileStream(@"Input.pdf", FileMode.Open);
    PdfLoadedDocument document = new PdfLoadedDocument(stream);
    //Create custom OCR engine to process the OCR. 
    IOcrEngine azureOcrEngine = new AzureExternalOcrEngine();
    //Set external OCR engine.
    processor.ExternalEngine = azureOcrEngine;
    document = processor.PerformOCR(document);
     FileStream outputStream = new FileStream("Output.pdf", FileMode.CreateNew);
    //Save the document into stream.
    document.Save(outputStream);
    document.Close(true);

    ImageEnhancementMode

    Gets or sets the image enhancement mode used for OCR processor

    Declaration
    public OcrImageEnhancementMode ImageEnhancementMode { get; set; }
    Property Value
    Type
    OcrImageEnhancementMode
    Examples
    // Initialize the OCR processor.
    OCRProcessor processor = new OCRProcessor();
    //loading the input image.
    FileStream stream = new FileStream(@"Input.pdf", FileMode.Open);
    PdfLoadedDocument document = new PdfLoadedDocument(stream);
    sets the image enhancement mode used for OCR processor
    processor.ImageEnhancementMode = OcrImageEnhancementMode.EnhanceForRecognitionOnly;
    document = processor.PerformOCR(document);
    FileStream outputStream = new FileStream("Output.pdf", FileMode.CreateNew);
    //Save the document into stream.
    document.Save(outputStream);
    document.Close(true);

    ImageProcessor

    Gets or sets the image processor used by the OCR processor

    Declaration
    public IImageProcessor ImageProcessor { get; set; }
    Property Value
    Type
    IImageProcessor
    Examples
    // Initialize the OCR processor.
    OCRProcessor processor = new OCRProcessor();
    //loading the input image.
    FileStream stream = new FileStream(@"Input.pdf", FileMode.Open);
    PdfLoadedDocument document = new PdfLoadedDocument(stream);
    sets the image processor used by the OCR processor
    processor.ImageProcessor = new ImageProcessor();
    document = processor.PerformOCR(document);
    FileStream outputStream = new FileStream("Output.pdf", FileMode.CreateNew);
    //Save the document into stream.
    document.Save(outputStream);
    document.Close(true);

    Settings

    Gets or sets the OCR settings to a document.

    Declaration
    public OCRSettings Settings { get; set; }
    Property Value
    Type
    OCRSettings
    See Also
    OCRSettings

    TessDataPath

    Gets or sets the tessdata folder path to process the OCR.

    Declaration
    public string TessDataPath { get; set; }
    Property Value
    Type
    System.String
    Examples
    // Initialize the OCR processor
    OCRProcessor processor = new OCRProcessor();
    //loading the input image
    FileStream stream = new FileStream(@"Input.jpeg ", FileMode.Open);
    Bitmap image = new Bitmap(stream);
    // Set OCR language to process
    processor.Settings.Language = Languages.English;
    FileStream fontStream = new FileStream(@"ARIALUNI.ttf", FileMode.Open);
    processor.UnicodeFont = new PdfTrueTypeFont(fontStream, true, PdfFontStyle.Regular, 10);
    processor.Settings.Conformance = PdfConformanceLevel.Pdf_A1B;
    processor.TesseractPath = TesseractBinariesPath;
    processor.TessDataPath = TessdataPath;
    PdfDocument document = processor.PerformOCR(image);
    document.Save("Output.pdf");
    document.Close(true);

    TesseractPath

    Gets or sets the Tesseractbinaries folder path to process the OCR.

    Declaration
    public string TesseractPath { get; set; }
    Property Value
    Type
    System.String
    Examples
    // Initialize the OCR processor
    OCRProcessor processor = new OCRProcessor();
    //loading the input image
    FileStream stream = new FileStream(@"Input.jpeg ", FileMode.Open);
    Bitmap image = new Bitmap(stream);
    // Set OCR language to process
    processor.Settings.Language = Languages.English;
    FileStream fontStream = new FileStream(@"ARIALUNI.ttf", FileMode.Open);
    processor.UnicodeFont = new PdfTrueTypeFont(fontStream, true, PdfFontStyle.Regular, 10);
    processor.Settings.Conformance = PdfConformanceLevel.Pdf_A1B;
    processor.TesseractPath = TesseractBinariesPath;
    processor.TessDataPath = TessdataPath;
    PdfDocument document = processor.PerformOCR(image);
    document.Save("Output.pdf");
    document.Close(true);

    UnicodeFont

    Sets Unicode font to preserve the Unicode characters in a PDF document.

    Declaration
    public PdfTrueTypeFont UnicodeFont { set; }
    Property Value
    Type
    PdfTrueTypeFont
    See Also
    PdfTrueTypeFont

    Methods

    CompressJPEGImage(Image)

    Declaration
    public Image CompressJPEGImage(Image bmp1)
    Parameters
    Type Name Description
    System.Drawing.Image bmp1
    Returns
    Type
    System.Drawing.Image

    Dispose()

    Releases the unmanaged and optionally managed resources.

    Declaration
    public void Dispose()

    Finalize()

    Releases unmanaged resources and performs other cleanup operations before the OCRProcessor is reclaimed by garbage collection.

    Declaration
    protected override void Finalize()

    PerformOCR(PdfLoadedDocument)

    Perform the OCR on a PDF document.

    Declaration
    public string PerformOCR(PdfLoadedDocument lDoc)
    Parameters
    Type Name Description
    PdfLoadedDocument lDoc

    PdfLoadedDocument

    Returns
    Type Description
    System.String

    Returns the OCRed texts

    Examples
    // Initialize the OCR processor.
    OCRProcessor processor = new OCRProcessor();
    //loading the input image.
    FileStream stream = new FileStream(@"Input.pdf", FileMode.Open);
    PdfLoadedDocument document = new PdfLoadedDocument(stream);
    document = processor.PerformOCR(document);
     FileStream outputStream = new FileStream("Output.pdf", FileMode.CreateNew);
    //Save the document into stream.
    document.Save(outputStream);
    document.Close(true);

    PerformOCR(PdfLoadedDocument, Int32, Int32, String)

    Perform the OCR process for a PdfLoadedDocument.

    Declaration
    public string PerformOCR(PdfLoadedDocument lDoc, int startIndex, int endIndex, string dataPath)
    Parameters
    Type Name Description
    PdfLoadedDocument lDoc

    PdfLoadedDocument

    System.Int32 startIndex

    The number that is use as a start point for the OCR process.

    System.Int32 endIndex

    The number that is use as a end point for the OCR process.

    System.String dataPath

    Tesseract data path

    Returns
    Type Description
    System.String

    Returns the OCRed texts

    PerformOCR(PdfLoadedDocument, Int32, Int32, String, out OCRLayoutResult)

    Perform the OCR process for a PdfLoadedDocument.

    Declaration
    public string PerformOCR(PdfLoadedDocument lDoc, int startIndex, int endIndex, string dataPath, out OCRLayoutResult hocrBounds)
    Parameters
    Type Name Description
    PdfLoadedDocument lDoc

    PdfLoadedDocument

    System.Int32 startIndex

    The number that is use as a start point for the OCR process.

    System.Int32 endIndex

    The number that is use as a end point for the OCR process.

    System.String dataPath

    Tesseract data path

    OCRLayoutResult hocrBounds

    When this method returns, Layout result of the OCR'ed document

    Returns
    Type Description
    System.String

    Returns the OCRed texts

    PerformOCR(PdfLoadedDocument, String)

    Perform the OCR process for a PdfLoadedDocument.

    Declaration
    public string PerformOCR(PdfLoadedDocument lDoc, string dataPath)
    Parameters
    Type Name Description
    PdfLoadedDocument lDoc

    PdfLoadedDocument

    System.String dataPath

    Tesseract data path

    Returns
    Type Description
    System.String

    Returns the OCRed texts

    PerformOCR(PdfLoadedDocument, String, out OCRLayoutResult)

    Perform the OCR process for a PdfLoadedDocument.

    Declaration
    public string PerformOCR(PdfLoadedDocument lDoc, string dataPath, out OCRLayoutResult hocrBounds)
    Parameters
    Type Name Description
    PdfLoadedDocument lDoc

    PdfLoadedDocument

    System.String dataPath

    Tesseract data path

    OCRLayoutResult hocrBounds

    When this method returns, Layout result of the OCR'ed document

    Returns
    Type Description
    System.String

    Returns the OCRed texts

    PerformOCR(Bitmap, String)

    Declaration
    public string PerformOCR(Bitmap img, string dataPath)
    Parameters
    Type Name Description
    System.Drawing.Bitmap img
    System.String dataPath
    Returns
    Type
    System.String

    PerformOCR(Bitmap, String, out OCRLayoutResult)

    Perform the OCR process for an image.

    Declaration
    public string PerformOCR(Bitmap img, string dataPath, out OCRLayoutResult hocrBounds)
    Parameters
    Type Name Description
    System.Drawing.Bitmap img

    Source image to process OCR.

    System.String dataPath

    Tesseract data path

    OCRLayoutResult hocrBounds

    When this method returns, Layout results of the OCR'ed image

    Returns
    Type Description
    System.String

    Returns the OCRed texts

    PerformOCR(Stream)

    Perform OCR on the image stream and create a searchable PDF document.

    Declaration
    public PdfDocument PerformOCR(Stream imgStream)
    Parameters
    Type Name Description
    System.IO.Stream imgStream

    Source image to process OCR.

    Returns
    Type Description
    PdfDocument

    Returns the OCRed texts in Pdfdocument

    Examples
    // Initialize the OCR processor.
    OCRProcessor processor = new OCRProcessor();
    //loading the input image.
    FileStream image = new FileStream(@"Input.jpeg ", FileMode.Open);
    // Set OCR language to process.
    processor.Settings.Language = Languages.English;
    FileStream fontStream = new FileStream(@"ARIALUNI.ttf", FileMode.Open);
    processor.UnicodeFont = new PdfTrueTypeFont(fontStream, true, PdfFontStyle.Regular, 10);
    processor.Settings.Conformance = PdfConformanceLevel.Pdf_A1B;
    processor.TesseractPath = TesseractBinariesPath;
    processor.TessDataPath = TessdataPath;
    PdfDocument document = processor.PerformOCR(image);
    document.Save("Output.pdf");
    document.Close(true);

    Implements

    System.IDisposable
    Back to top Generated by DocFX
    Copyright © 2001 - 2026 Syncfusion Inc. All Rights Reserved