Iterating Word document elements
29 Nov 202424 minutes to read
The following are the important points to be remembered while iterating the document elements
- Document consists of one or more sections.
- Section contains the contents present in Headers, Footers and main document through the instances of WTextBody.
- WTextBody contains three type of elements – either paragraph, table or block content control.
Remove paragraph with style
The following code example shows how to iterate throughout the Word document and remove the paragraph with a particular style.
NOTE
Refer to the appropriate tabs in the code snippets section: C# [Cross-platform] for ASP.NET Core, Blazor, Xamarin, UWP, .NET MAUI, and WinUI; C# [Windows-specific] for WinForms and WPF; VB.NET [Windows-specific] for VB.NET applications.
FileStream fileStreamPath = new FileStream("Template.docx", FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
//Opens an existing document from file system through constructor of WordDocument class
using (WordDocument document = new WordDocument(fileStreamPath, FormatType.Automatic))
{
foreach (WSection section in document.Sections)
{
//Accesses the Body of section where all the contents in document are apart
WTextBody sectionBody = section.Body;
IterateTextBody(sectionBody);
WHeadersFooters headersFooters = section.HeadersFooters;
//Consider that OddHeader and OddFooter are applied to this document
//Iterates through the TextBody of OddHeader and OddFooter
IterateTextBody(headersFooters.OddHeader);
IterateTextBody(headersFooters.OddFooter);
}
MemoryStream stream = new MemoryStream();
document.Save(stream, FormatType.Docx);
//Closes the Word document
document.Close();
}
//Opens an existing document from file system through constructor of WordDocument class
WordDocument document = new WordDocument("Template.docx");
//Processes the body contents for each section in the Word document
foreach (WSection section in document.Sections)
{
//Accesses the Body of section where all the contents in document are apart
WTextBody sectionBody = section.Body;
IterateTextBody(sectionBody);
WHeadersFooters headersFooters = section.HeadersFooters;
//Consider that OddHeader and OddFooter are applied to this document
//Iterates through the TextBody of OddHeader and OddFooter
IterateTextBody(headersFooters.OddHeader);
IterateTextBody(headersFooters.OddFooter);
}
//Saves and closes the document instance
document.Save("Result.docx");
document.Close();
'Opens an existing document from file system through constructor of WordDocument class
Dim document As New WordDocument("Template.docx")
'Processes the body contents for each section in the Word document
For Each section As WSection In document.Sections
'Accesses the Body of section where all the contents in document are apart
Dim sectionBody As WTextBody = section.Body
IterateTextBody(sectionBody)
Dim headersFooters As WHeadersFooters = section.HeadersFooters
'Consider that OddHeader and OddFooter are applied to this document
'Iterates through the text body of OddHeader and OddFooter
IterateTextBody(headersFooters.OddHeader)
IterateTextBody(headersFooters.OddFooter)
Next
'Saves and closes the document instance
document.Save("Result.docx")
document.Close()
The following code example provides supporting methods for the above code.
private static void IterateTextBody(WTextBody textBody)
{
//Iterates through each of the child items of WTextBody
for (int i = 0; i < textBody.ChildEntities.Count; i++)
{
//IEntity is the basic unit in DocIO DOM.
//Accesses the body items (should be either paragraph, table or block content control) as IEntity
IEntity bodyItemEntity = textBody.ChildEntities[i];
//A Text body has 3 types of elements - Paragraph, Table and Block Content Control
//Decides the element type by using EntityType
switch (bodyItemEntity.EntityType)
{
case EntityType.Paragraph:
WParagraph paragraph = bodyItemEntity as WParagraph;
//Checks for particular style name and removes the paragraph from DOM
if (paragraph.StyleName == "MyStyle")
{
int index = textBody.ChildEntities.IndexOf(paragraph);
textBody.ChildEntities.RemoveAt(index);
}
break;
case EntityType.Table:
//Table is a collection of rows and cells
//Iterates through table's DOM
IterateTable(bodyItemEntity as WTable);
break;
case EntityType.BlockContentControl:
BlockContentControl blockContentControl = bodyItemEntity as BlockContentControl;
//Iterates to the body items of Block Content Control.
IterateTextBody(blockContentControl.TextBody);
break;
}
}
}
private static void IterateTextBody(WTextBody textBody)
{
//Iterates through each of the child items of WTextBody
for (int i = 0; i < textBody.ChildEntities.Count; i++)
{
//IEntity is the basic unit in DocIO DOM.
//Accesses the body items (should be either paragraph, table or block content control) as IEntity
IEntity bodyItemEntity = textBody.ChildEntities[i];
//A Text body has 3 types of elements - Paragraph, Table and Block Content Control
//Decides the element type by using EntityType
switch (bodyItemEntity.EntityType)
{
case EntityType.Paragraph:
WParagraph paragraph = bodyItemEntity as WParagraph;
//Checks for particular style name and removes the paragraph from DOM
if (paragraph.StyleName == "MyStyle")
{
int index = textBody.ChildEntities.IndexOf(paragraph);
textBody.ChildEntities.RemoveAt(index);
}
break;
case EntityType.Table:
//Table is a collection of rows and cells
//Iterates through table's DOM
IterateTable(bodyItemEntity as WTable);
break;
case EntityType.BlockContentControl:
BlockContentControl blockContentControl = bodyItemEntity as BlockContentControl;
//Iterates to the body items of Block Content Control.
IterateTextBody(blockContentControl.TextBody);
break;
}
}
}
Private Shared Sub IterateTextBody(textBody As WTextBody)
'Iterates through the each of the child items of WTextBody
For i As Integer = 0 To textBody.ChildEntities.Count - 1
'IEntity is the basic unit in DocIO DOM.
'Accesses the body items (should be either paragraph, table or block content control) as IEntity
Dim bodyItemEntity As IEntity = textBody.ChildEntities(i)
'A Text body has 3 types of elements - Paragraph, Table and Block Content Control
'decide the element type using EntityType
Select Case bodyItemEntity.EntityType
Case EntityType.Paragraph
Dim paragraph As WParagraph = TryCast(bodyItemEntity, WParagraph)
'Checks for a particular style name and removes the paragraph from DOM
If paragraph.StyleName = "MyStyle" Then
Dim index As Integer = textBody.ChildEntities.IndexOf(paragraph)
textBody.ChildEntities.RemoveAt(index)
End If
Exit Select
Case EntityType.Table
'Table is a collection of rows and cells
'Iterates through table's DOM
IterateTable(TryCast(bodyItemEntity, WTable))
Exit Select
Case EntityType.BlockContentControl
Dim BlockContentControl As BlockContentControl = TryCast(bodyItemEntity, BlockContentControl)
'Iterates to the body items of Block Content Control.
IterateTextBody(BlockContentControl.TextBody)
Exit Select
End Select
Next
End Sub
The following code example provides supporting methods for the above code.
private static void IterateTable(WTable table)
{
//Iterates the row collection in a table
foreach (WTableRow row in table.Rows)
{
//Iterates the cell collection in a table row
foreach (WTableCell cell in row.Cells)
{
//Table cell is derived from (also a) TextBody
//Reusing the code meant for iterating TextBody
IterateTextBody(cell);
}
}
}
private static void IterateTable(WTable table)
{
//Iterates the row collection in a table
foreach (WTableRow row in table.Rows)
{
//Iterates the cell collection in a table row
foreach (WTableCell cell in row.Cells)
{
//Table cell is derived from (also a) TextBody
//Reusing the code meant for iterating TextBody
IterateTextBody(cell);
}
}
}
Private Shared Sub IterateTable(table As WTable)
'Iterates the row collection in a table
For Each row As WTableRow In table.Rows
'Iterates the cell collection in a table row
For Each cell As WTableCell In row.Cells
'Table cell is derived from (also a) TextBody
'Reusing the code meant for iterating TextBody
IterateTextBody(cell)
Next
Next
End Sub
You can download a complete working sample from GitHub.
Modify Hyperlink Uri
The following code example shows how to iterate throughout the paragraph and modify the hyperlink (Hyperlink) Uri and specific text (WTextRange) with another.
FileStream fileStreamPath = new FileStream("Template.docx", FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
//Opens an existing document from file system through constructor of WordDocument class
using (WordDocument document = new WordDocument(fileStreamPath, FormatType.Automatic))
{
foreach (WSection section in document.Sections)
{
//Accesses the Body of section where all the contents in document are apart
WTextBody sectionBody = section.Body;
IterateTextBody(sectionBody);
WHeadersFooters headersFooters = section.HeadersFooters;
//Consider that OddHeader and OddFooter are applied to this document
//Iterates through the TextBody of OddHeader and OddFooter
IterateTextBody(headersFooters.OddHeader);
IterateTextBody(headersFooters.OddFooter);
}
MemoryStream stream = new MemoryStream();
document.Save(stream, FormatType.Docx);
//Closes the Word document
document.Close();
}
//Opens an existing document from file system through constructor of WordDocument class
WordDocument document = new WordDocument("Template.docx");
//Processes the body contents for each section in the Word document
foreach (WSection section in document.Sections)
{
//Accesses the Body of section where all the contents in document are apart
WTextBody sectionBody = section.Body;
IterateTextBody(sectionBody);
WHeadersFooters headersFooters = section.HeadersFooters;
//consider that OddHeader & OddFooter are applied to this document
//Iterates through the TextBody of OddHeader and OddFooter
IterateTextBody(headersFooters.OddHeader);
IterateTextBody(headersFooters.OddFooter);
}
//Saves and closes the document instance
document.Save("Result.docx");
document.Close();
Dim document As New WordDocument("Template.docx")
'Processes the body contents for each section in the Word document
For Each section As WSection In document.Sections
'Accesses the Body of section where all the contents in document are apart
Dim sectionBody As WTextBody = section.Body
IterateTextBody(sectionBody)
Dim headersFooters As WHeadersFooters = section.HeadersFooters
'Considers that OddHeader and OddFooter are applied to this document
'Iterates through the TextBody of OddHeader and OddFooterIterateTextBody(headersFooters.OddHeader)
IterateTextBody(headersFooters.OddFooter)
Next
'Saves and closes the document instance
document.Save("Result.docx")
document.Close()
The following code example provides supporting methods for the above code.
private static void IterateTextBody(WTextBody textBody)
{
//Iterates through each of the child items of WTextBody
for (int i = 0; i < textBody.ChildEntities.Count; i++)
{
//IEntity is the basic unit in DocIO DOM.
//Accesses the body items (should be either paragraph, table or block content control) as IEntity
IEntity bodyItemEntity = textBody.ChildEntities[i];
//A Text body has 3 types of elements - Paragraph, Table and Block Content Control
//Decides the element type by using EntityType
switch (bodyItemEntity.EntityType)
{
case EntityType.Paragraph:
WParagraph paragraph = bodyItemEntity as WParagraph;
//Processes the paragraph contents
//Iterates through the paragraph's DOM
IterateParagraph(paragraph.Items);
break;
case EntityType.Table:
//Table is a collection of rows and cells
//Iterates through table's DOM
IterateTable(bodyItemEntity as WTable);
break;
case EntityType.BlockContentControl:
BlockContentControl blockContentControl = bodyItemEntity as BlockContentControl;
//Iterates to the body items of Block Content Control.
IterateTextBody(blockContentControl.TextBody);
break;
}
}
}
private static void IterateTextBody(WTextBody textBody)
{
//Iterates through each of the child items of WTextBody
for (int i = 0; i < textBody.ChildEntities.Count; i++)
{
//IEntity is the basic unit in DocIO DOM.
//Accesses the body items (should be either paragraph, table or block content control) as IEntity
IEntity bodyItemEntity = textBody.ChildEntities[i];
//A Text body has 3 types of elements - Paragraph, Table and Block Content Control
//Decides the element type by using EntityType
switch (bodyItemEntity.EntityType)
{
case EntityType.Paragraph:
WParagraph paragraph = bodyItemEntity as WParagraph;
//Processes the paragraph contents
//Iterates through the paragraph's DOM
IterateParagraph(paragraph.Items);
break;
case EntityType.Table:
//Table is a collection of rows and cells
//Iterates through table's DOM
IterateTable(bodyItemEntity as WTable);
break;
case EntityType.BlockContentControl:
BlockContentControl blockContentControl = bodyItemEntity as BlockContentControl;
//Iterates to the body items of Block Content Control.
IterateTextBody(blockContentControl.TextBody);
break;
}
}
}
Private Shared Sub IterateTextBody(textBody As WTextBody)
'Iterates through each of the child items of WTextBody
For i As Integer = 0 To textBody.ChildEntities.Count - 1
'IEntity is the basic unit in DocIO DOM.
'Accesses the body items (should be either paragraph, table or block content control) as IEntity
Dim bodyItemEntity As IEntity = textBody.ChildEntities(i)
'A Text body has 3 types of elements - Paragraph, Table and Block Content Control
'Decides the element type by using EntityType
Select Case bodyItemEntity.EntityType
Case EntityType.Paragraph
Dim paragraph As WParagraph = TryCast(bodyItemEntity, WParagraph)
'Processes the paragraph contents
'Iterates through the paragraph's DOM
IterateParagraph(paragraph.Items)
Exit Select
Case EntityType.Table
'Table is a collection of rows and cells
'Iterates through table's DOM
IterateTable(TryCast(bodyItemEntity, WTable))
Exit Select
Case EntityType.BlockContentControl
Dim BlockContentControl As BlockContentControl = TryCast(bodyItemEntity, BlockContentControl)
'Iterates to the body items of Block Content Control.
IterateTextBody(BlockContentControl.TextBody)
Exit Select
End Select
Next
End Sub
The following code example provides supporting methods for the above code.
private static void IterateTable(WTable table)
{
//Iterates the row collection in a table
foreach (WTableRow row in table.Rows)
{
//Iterates the cell collection in a table row
foreach (WTableCell cell in row.Cells)
{
//Table cell is derived from (also a) TextBody
//Reusing the code meant for iterating TextBody
IterateTextBody(cell);
}
}
}
private static void IterateTable(WTable table)
{
//Iterates the row collection in a table
foreach (WTableRow row in table.Rows)
{
//Iterates the cell collection in a table row
foreach (WTableCell cell in row.Cells)
{
//Table cell is derived from (also a) TextBody
//Reusing the code meant for iterating TextBody
IterateTextBody(cell);
}
}
}
Private Shared Sub IterateTable(table As WTable)
'Iterates the row collection in a table
For Each row As WTableRow In table.Rows
'Iterates the cell collection in a table row
For Each cell As WTableCell In row.Cells
'Table cell is derived from (also a) TextBody
'Reusing the code meant for iterating TextBody
IterateTextBody(cell)
Next
Next
End Sub
The following code example provides supporting methods for the above code.
private static void IterateParagraph(ParagraphItemCollection paraItems)
{
for (int i = 0; i < paraItems.Count; i++)
{
Entity entity = paraItems[i];
//A paragraph can have child elements such as text, image, hyperlink, symbols, etc.,
//Decides the element type by using EntityType
switch (entity.EntityType)
{
case EntityType.TextRange:
//Replaces the text with another
WTextRange textRange = entity as WTextRange;
if (textRange.Text == "Andrew")
{
(entity as WTextRange).Text = "Fuller";
}
break;
case EntityType.Field:
WField field = entity as WField;
if (field.FieldType == FieldType.FieldHyperlink)
{
//Creates hyperlink instance from field to manipulate the hyperlink
Hyperlink hyperlink = new Hyperlink(entity as WField);
//Modifies the Uri of the hyperlink
if (hyperlink.Type == HyperlinkType.WebLink && hyperlink.TextToDisplay == "HTML")
{
hyperlink.Uri = "http://www.w3schools.com/";
}
}
break;
case EntityType.TextBox:
//Iterates to the body items of textbox.
WTextBox textBox = entity as WTextBox;
IterateTextBody(textBox.TextBoxBody);
break;
case EntityType.Shape:
//Iterates to the body items of shape.
Shape shape = entity as Shape;
IterateTextBody(shape.TextBody);
break;
case EntityType.InlineContentControl:
//Iterates to the paragraph items of inline content control.
InlineContentControl inlineContentControl = entity as InlineContentControl;
IterateParagraph(inlineContentControl.ParagraphItems);
break;
}
}
}
private static void IterateParagraph(ParagraphItemCollection paraItems)
{
for (int i = 0; i < paraItems.Count; i++)
{
Entity entity = paraItems[i];
//A paragraph can have child elements such as text, image, hyperlink, symbols, etc.,
//Decides the element type by using EntityType
switch (entity.EntityType)
{
case EntityType.TextRange:
//Replaces the text with another
WTextRange textRange = entity as WTextRange;
if (textRange.Text == "Andrew")
{
(entity as WTextRange).Text = "Fuller";
}
break;
case EntityType.Field:
WField field = entity as WField;
if (field.FieldType == FieldType.FieldHyperlink)
{
//Creates hyperlink instance from field to manipulate the hyperlink
Hyperlink hyperlink = new Hyperlink(entity as WField);
//Modifies the Uri of the hyperlink
if (hyperlink.Type == HyperlinkType.WebLink && hyperlink.TextToDisplay == "HTML")
{
hyperlink.Uri = "http://www.w3schools.com/";
}
}
break;
case EntityType.TextBox:
//Iterates to the body items of textbox.
WTextBox textBox = entity as WTextBox;
IterateTextBody(textBox.TextBoxBody);
break;
case EntityType.Shape:
//Iterates to the body items of shape.
Shape shape = entity as Shape;
IterateTextBody(shape.TextBody);
break;
case EntityType.InlineContentControl:
//Iterates to the paragraph items of inline content control.
InlineContentControl inlineContentControl = entity as InlineContentControl;
IterateParagraph(inlineContentControl.ParagraphItems);
break;
}
}
}
Private Shared Sub IterateParagraph(paraItems As ParagraphItemCollection)
For i As Integer = 0 To paraItems.Count - 1
Dim entity As Entity = paraItems(i)
'A Paragraph has child elements such as text, image, hyperlink, symbols, etc.,
'Decides the element type by using EntityType
Select Case entity.EntityType
Case EntityType.TextRange
'Replaces the text with another
Dim textRange As WTextRange = TryCast(entity, WTextRange)
If textRange.Text = "Andrew" Then
TryCast(entity, WTextRange).Text = "Fuller"
End If
Exit Select
Case EntityType.Field
Dim field As WField = TryCast(entity, WField)
If field.FieldType = FieldType.FieldHyperlink Then
'Creates Hyperlink instance from field to manipulate the Hyperlink
Dim hyperlink As New Hyperlink(TryCast(entity, WField))
'Modifies the Uri of the hyperlink
If hyperlink.Type = HyperlinkType.WebLink AndAlso hyperlink.TextToDisplay = "HTML" Then
hyperlink.Uri = "http://www.w3schools.com/"
End If
End If
Exit Select
Case EntityType.TextBox
'Iterates to the body items of textbox.
Dim textBox As WTextBox = TryCast(entity, WTextBox)
IterateTextBody(textBox.TextBoxBody)
Exit Select
Case EntityType.Shape
'Iterates to the body items of shape.
Dim shape As Shape = TryCast(entity, Shape)
IterateTextBody(shape.TextBody)
Exit Select
Case EntityType.InlineContentControl
'Iterates to the paragraph items of inline content control.
Dim inlineContentControl As InlineContentControl = TryCast(entity, InlineContentControl)
IterateParagraph(inlineContentControl.ParagraphItems)
Exit Select
End Select
Next
End Sub
You can download a complete working sample from GitHub.
TIPS
If you wish to find an item in a Word document rather than iterating through each element one by one, you can use finding the item functionality to achieve it.