C#.Net Code to Normalize Nested Table with the help of HTmlAgilityPack
Description: Nested table will result in
Separate multiple tables without any nesting after using ProcessNestedTables() OR ProcessIndividualNestedTable() function given in
post.
1)
ProcessNestedTables()
: Using given method all nested tables present in html document can
be normalized. Need to pass Html document object created using
HtmlAgilityPack
2)
ProcessIndvidualNestedTable():Using given method
individual nested table can be normalized. Need to pass individual nested table
object as string
Before Use (Output)
A
|
The
ABC
|
||||||
First
Table
|
|
||||||
/////text
|
/////text
|
After Use (Output)
A
|
The
ABC
|
First
Table
|
|
/////text
|
/////text
|
Second
Table
|
|
/////text
|
Third
Table
|
/////text
|
Methods to Use:
1) For Complete Html Document:
static HtmlAgilityPack.HtmlDocument ProcessNestedTables(ref HtmlAgilityPack.HtmlDocument objHTMLdoc)
{
try
{
HtmlAgilityPack.HtmlNodeCollection tableList =
objHTMLdoc.DocumentNode.SelectNodes("//table");
if (tableList
!= null)
{
foreach (HtmlAgilityPack.HtmlNode table in tableList)
{
#region formatting of nested tables by Shrikant
HtmlAgilityPack.HtmlNode ParentNode = table.ParentNode;
if (ParentNode
!= null)
{
int NestedTableCount
= 1;
HtmlAgilityPack.HtmlNode HTMLNode = HtmlAgilityPack.HtmlNode.CreateNode("<div></div>");
HtmlAgilityPack.HtmlNode htmlbreak = HtmlAgilityPack.HtmlNode.CreateNode("<br>");
HTMLNode.AppendChild(htmlbreak);
GenerateTablesFromNestedTables(table, ref HTMLNode, ref NestedTableCount);
if (HTMLNode.InnerHtml.Contains("<table"))
ParentNode.InsertAfter(HTMLNode, table);
}
#endregion
}
}
}
catch (Exception ex)
{
}
return objHTMLdoc;//OutPut
}
2) For Individual Html
Table:
public string ProcessIndividualNestedTable(string tableHtml)
{
HtmlAgilityPack.HtmlDocument objHTMLdoc = new HtmlAgilityPack.HtmlDocument();
try
{
divString = HttpUtility.UrlDecode(tableHtml).Replace("
", "").Replace("\t", "").Replace("\n", "").Replace("\r", "");
objHTMLdoc.LoadHtml(tableHtml);
objHTMLdoc =
ProcessNestedTables(ref objHTMLdoc);
}
catch (Exception ex)
{}
return objHTMLdoc.DocumentNode.OuterHtml; //Output
}
Supporting methods used inside for processing:
1) static void
GenerateTablesFromNestedTables(HtmlNode
table, ref HtmlNode
HTMLNode, ref int
NestedLevel)
{
try
{
HtmlNode
invalidInnerTable;
if
(table.ChildNodes != null &&
table.ChildNodes.Count > 0)
{
invalidInnerTable =
table.ChildNodes[0];
if
(invalidInnerTable.Name == "table")
{
table.InnerHtml =
invalidInnerTable.InnerHtml;
}
}
for
(int trchild = 0; trchild <
table.ChildNodes.Count; trchild++)
{
HtmlNode
trElement = table.ChildNodes[trchild];
for
(int tdchild = 0; tdchild <
trElement.ChildNodes.Count; tdchild++)
{
HtmlNode tdElement = trElement.ChildNodes[tdchild];
HtmlNode ChildTableNode = GetTableObject(ref tdElement);
if (ChildTableNode != null)
{
GenerateTablesFromNestedTables(ChildTableNode, ref
HTMLNode, ref NestedLevel); //Recursive
calling is involved here
HtmlNode FormattedChildTableNode = HtmlNode.CreateNode("<div><span>T-"
+ NestedLevel.ToString() + "</span>"
+ ChildTableNode.OuterHtml + "</div>");
HtmlNode htmlbreak = HtmlNode.CreateNode("<br>");
FormattedChildTableNode.AppendChild(htmlbreak);
if (HTMLNode.ChildNodes.Count > 0)
{
HTMLNode.InsertBefore(FormattedChildTableNode, HTMLNode.LastChild);
}
else
{
HTMLNode.AppendChild(FormattedChildTableNode);
}
HtmlNode htmlNestedMapping = HtmlNode.CreateNode("<span>T-"
+ NestedLevel.ToString() + "</span>");
NestedLevel += 1;
HtmlNode ParentChildTableNode =
ChildTableNode.ParentNode;
if
(ParentChildTableNode != null)
{
ParentChildTableNode.ReplaceChild(htmlNestedMapping, ChildTableNode);
}
}
}
}
}
catch
(Exception ex)
{
throw
ex;
}
}
2) static HtmlNode
GetTableObject(ref HtmlNode
tdNode)
{
try
{
if
(tdNode.ChildNodes.Count > 0)
{
for
(int child = 0; child <
tdNode.ChildNodes.Count; child++)
{
{
return tdNode.ChildNodes[child];
}
else if (tdNode.ChildNodes[child].InnerHtml.Contains("<table"))
{
HtmlNode ChildNode = tdNode.ChildNodes[child];
return GetTableObject(ref
ChildNode);
}
}
}
else
if (tdNode.Name == "table")
{
return
tdNode;
}
}
catch
(Exception ex)
{
throw
ex;
}
return
null;
}
This comment has been removed by a blog administrator.
ReplyDeleteThis comment has been removed by the author.
ReplyDelete