Class PdfDocumentExtractor

The PdfDocumentExtractor class represents a utility class designed to extract images from PDF documents with improved memory consumption and performance.

Inheritance

System.Object

PdfDocumentExtractor

Implements

System.IDisposable

Namespace: Syncfusion.Pdf.Parsing

Assembly: Syncfusion.Pdf.Imaging.NET.dll

Syntax

public class PdfDocumentExtractor : Object, IDisposable

Constructors

PdfDocumentExtractor()

Declaration

public PdfDocumentExtractor()

Properties

PageCount

Gets the number of pages present in the document.

Declaration

public int PageCount { get; }

Property Value

Type
System.Int32

Examples

//Initialize the PDF document extractor
 PdfDocumentExtractor documentExtractor = new PdfDocumentExtractor();
//Gets the number of pages present in the document
int pageCount = documentExtractor.PageCount;
//Loads a PDF document from a Stream.
documentExtractor.Load(fileStream);
// Extracts all the images from the PDF document
Stream[] streams = documentExtractor.ExtractImages();
//Extracts images from the specified range of pages in the PDF document
Stream[] stream = documentExtractor.ExtractImages(2, 8);
// Release all resources used by the PDF document extractor.
documentExtractor.Dispose();

'Initialize the PDF document extractor
Dim documentExtractor As PdfDocumentExtractor = New PdfDocumentExtractor
'Gets the number of pages present in the document
Dim pageCount As Integer = documentExtractor.PageCount
'Loads a PDF document from a Stream.
documentExtractor.Load(fileStream)
' Extracts all the images from the PDF document
Dim streams() As Stream = documentExtractor.ExtractImages
'Extracts images from the specified range of pages in the PDF document
Dim stream() As Stream = documentExtractor.ExtractImages(2, 8)
' Release all resources used by the PDF document extractor.
documentExtractor.Dispose

Methods

Dispose()

Release all resources used by the PDF document extractor.

Declaration

public void Dispose()

Examples

//Initialize the PDF document extractor
 PdfDocumentExtractor documentExtractor = new PdfDocumentExtractor();
//Gets the number of pages present in the document
int pageCount = documentExtractor.PageCount;
//Loads a PDF document from a Stream.
documentExtractor.Load(fileStream);
// Extracts all the images from the PDF document
Stream[] streams = documentExtractor.ExtractImages();
//Extracts images from the specified range of pages in the PDF document
Stream[] stream = documentExtractor.ExtractImages(2, 8);
// Release all resources used by the PDF document extractor.
documentExtractor.Dispose();

'Initialize the PDF document extractor
Dim documentExtractor As PdfDocumentExtractor = New PdfDocumentExtractor
'Gets the number of pages present in the document
Dim pageCount As Integer = documentExtractor.PageCount
'Loads a PDF document from a Stream.
documentExtractor.Load(fileStream)
' Extracts all the images from the PDF document
Dim streams() As Stream = documentExtractor.ExtractImages
'Extracts images from the specified range of pages in the PDF document
Dim stream() As Stream = documentExtractor.ExtractImages(2, 8)
' Release all resources used by the PDF document extractor.
documentExtractor.Dispose

ExtractImages()

Extracts all the images from the PDF document and returns an array of Stream objects representing the image data.

Declaration

public Stream[] ExtractImages()

Returns

Type	Description
System.IO.Stream[]	Returns the array of image streams

Examples

//Initialize the PDF document extractor
 PdfDocumentExtractor documentExtractor = new PdfDocumentExtractor();
//Gets the number of pages present in the document
int pageCount = documentExtractor.PageCount;
//Loads a PDF document from a Stream.
documentExtractor.Load(fileStream);
// Extracts all the images from the PDF document
Stream[] streams = documentExtractor.ExtractImages();
//Extracts images from the specified range of pages in the PDF document
Stream[] stream = documentExtractor.ExtractImages(2, 8);
// Release all resources used by the PDF document extractor.
documentExtractor.Dispose();

'Initialize the PDF document extractor
Dim documentExtractor As PdfDocumentExtractor = New PdfDocumentExtractor
'Gets the number of pages present in the document
Dim pageCount As Integer = documentExtractor.PageCount
'Loads a PDF document from a Stream.
documentExtractor.Load(fileStream)
' Extracts all the images from the PDF document
Dim streams() As Stream = documentExtractor.ExtractImages
'Extracts images from the specified range of pages in the PDF document
Dim stream() As Stream = documentExtractor.ExtractImages(2, 8)
' Release all resources used by the PDF document extractor.
documentExtractor.Dispose

ExtractImages(Int32, Int32)

Extracts images from the specified range of pages in the PDF document and returns an array of Stream objects representing the image data.

Declaration

public Stream[] ExtractImages(int startPageIndex, int endPageIndex)

Parameters

Type	Name	Description
System.Int32	startPageIndex
System.Int32	endPageIndex

Returns

Type	Description
System.IO.Stream[]	Returns the array of image streams

Examples

//Initialize the PDF document extractor
 PdfDocumentExtractor documentExtractor = new PdfDocumentExtractor();
//Gets the number of pages present in the document
int pageCount = documentExtractor.PageCount;
//Loads a PDF document from a Stream.
documentExtractor.Load(fileStream);
// Extracts all the images from the PDF document
Stream[] streams = documentExtractor.ExtractImages();
//Extracts images from the specified range of pages in the PDF document
Stream[] stream = documentExtractor.ExtractImages(2, 8);
// Release all resources used by the PDF document extractor.
documentExtractor.Dispose();

'Initialize the PDF document extractor
Dim documentExtractor As PdfDocumentExtractor = New PdfDocumentExtractor
'Gets the number of pages present in the document
Dim pageCount As Integer = documentExtractor.PageCount
'Loads a PDF document from a Stream.
documentExtractor.Load(fileStream)
' Extracts all the images from the PDF document
Dim streams() As Stream = documentExtractor.ExtractImages
'Extracts images from the specified range of pages in the PDF document
Dim stream() As Stream = documentExtractor.ExtractImages(2, 8)
' Release all resources used by the PDF document extractor.
documentExtractor.Dispose

Load(Stream, String)

Loads a PDF document from a Stream. The method allows you to provide a password as a string parameter to decrypt the document if it is password-protected.

Declaration

public void Load(Stream stream, string password = null)

Parameters

Type	Name	Description
System.IO.Stream	stream	The stream containing the PDF document to load
System.String	password	The password (user or owner) of the encrypted document.

Examples

//Initialize the PDF document extractor
 PdfDocumentExtractor documentExtractor = new PdfDocumentExtractor();
//Gets the number of pages present in the document
int pageCount = documentExtractor.PageCount;
//Loads a PDF document from a Stream.
documentExtractor.Load(fileStream);
// Extracts all the images from the PDF document
Stream[] streams = documentExtractor.ExtractImages();
//Extracts images from the specified range of pages in the PDF document
Stream[] stream = documentExtractor.ExtractImages(2, 8);
// Release all resources used by the PDF document extractor.
documentExtractor.Dispose();

'Initialize the PDF document extractor
Dim documentExtractor As PdfDocumentExtractor = New PdfDocumentExtractor
'Gets the number of pages present in the document
Dim pageCount As Integer = documentExtractor.PageCount
'Loads a PDF document from a Stream.
documentExtractor.Load(fileStream)
' Extracts all the images from the PDF document
Dim streams() As Stream = documentExtractor.ExtractImages
'Extracts images from the specified range of pages in the PDF document
Dim stream() As Stream = documentExtractor.ExtractImages(2, 8)
' Release all resources used by the PDF document extractor.
documentExtractor.Dispose

Implements

System.IDisposable