Monday, 17 February 2014

How to convert hard text in html table header to soft text using html agility pack ,C# Dot Net

Description:
·         Hard text means text contents are present in two different paragraph tags and are separated by break tag (<BR>).
Eg.  <td>
     <p>
         I like to do RND
     </p>
     <br />
      <p>
         in DotNet
     </p>
     </td>

·         Soft text means text contents are present in same paragraph tag and are separated by break tag (<BR>).
Eg.  <td>
     <p>
        I like to do RND
         <br />
         in DotNet
     </p>    
     </td>


·         Advantage of Soft Header: it merges the content of two or multiple paragraph tags into one which make it easier for user to edit the content and format content with html styles using the same paragraph tag.

Methods to be used:
1.     ProcessTableHeaders()
 It process html table select its <head> tag OR First Row. By default it processes  first row considering it header which can be changed for multiple header row’s cases
2.    ReformLineEndings()
 It merges content of two paragraphs inside table cell (<td>) with <br> separator.
3.     GetCleanText()
Returns only text inside html tag.



Methods definations

1)    static string ProcessTableHeaders(HtmlNode table)
        { 
            try
            {
                HtmlDocument tabledoc = new HtmlDocument();
                tabledoc.LoadHtml(table.OuterHtml);
                HtmlNodeCollection thead = tabledoc.DocumentNode.SelectNodes("//thead//tr");
                if (thead == null)
                {
                    thead = tabledoc.DocumentNode.SelectNodes("//tr");
                }
                if (thead != null)
                {
                    if (thead.Count > 1)
                    {
                         //By Default it will process first row of every html table considering it head
                        for (int trchild = 0; trchild < thead.Count; trchild++)
                        {
                            HtmlNode trnode = thead[trchild];
                            for (int tdchild = 0; tdchild < trnode.ChildNodes.Count; tdchild++)
                            {
                                HtmlNode tdnode = trnode.ChildNodes[tdchild];
                                if (tdnode.ChildNodes.Count == 1 && tdnode.ChildNodes[0].Name == "div")
                                    tdnode.ChildNodes[0].InnerHtml = FormatLineEndings(tdnode.ChildNodes[0], "Header");
                                else if (tdnode.ChildNodes.Count > 1)
                                    tdnode.InnerHtml = FormatLineEndings(tdnode, "Header");

                                HtmlNode newNode = HtmlNode.CreateNode(tdnode.OuterHtml.Replace("<th", "<td").Replace("</th>", "</td>"));
                                trnode.ReplaceChild(newNode, tdnode);                               
                            }
                            if (trchild > 1) break;                          
                        }
                        tabledoc.DocumentNode.InnerHtml = tabledoc.DocumentNode.InnerHtml.Replace("<thead>", "").Replace("</thead>", "").Replace("<tbody>", "").Replace("</tbody>", "");
                        HtmlNode NewHTMLtable = HtmlNode.CreateNode(tabledoc.DocumentNode.OuterHtml);
                        table.InnerHtml = NewHTMLtable.InnerHtml;
                    }
                }
            }
            catch (Exception ex)
            {

            }
            return table.InnerHtml;
        }


2)    static string ReformLineEndings(HtmlNode tdnode, string sType)
        {
            HtmlNode breakTag = null,NewHTMLTag = null;
            int child = 0, subchild = 0, ParentNodeIndex = 0;           
            string CellContent = string.Empty;
            bool isTextFound = false,isStyleSet = false;  
            try
            {
                if (tdnode.ChildNodes.Count > 0)
                {
                    ParentNodeIndex = -1;
                    for (child = 0; child < tdnode.ChildNodes.Count; child++)
                    {
                        if (tdnode.ChildNodes[child].Name == "p")
                        {
                            if (ParentNodeIndex > -1)
                            {
                                tdnode.ChildNodes[child].InnerHtml = tdnode.ChildNodes[child].InnerHtml + " ";

                                if (GetCleanText(tdnode.ChildNodes[child].InnerText) != "" && !isTextFound)
                                    isTextFound = true;

                                if (!isStyleSet && isTextFound)
                                {
                                    if (tdnode.ChildNodes[ParentNodeIndex].Attributes["style"] != null)
                                    {
                                        tdnode.ChildNodes[ParentNodeIndex].Attributes["style"].Value = tdnode.ChildNodes[child].Attributes["style"].Value;
                                        isStyleSet = true;
                                    }
                                }
                                breakTag = HtmlNode.CreateNode("<br>");                           
                                tdnode.ChildNodes[ParentNodeIndex].AppendChild(breakTag);
                                for (subchild = 0; subchild < tdnode.ChildNodes[child].ChildNodes.Count; subchild++)
                                {
                                 
                                   CellContent = tdnode.ChildNodes[child].ChildNodes[subchild].InnerText;
                                    if (CellContent != "" || tdnode.ChildNodes[child].ChildNodes[subchild].Name == "br")
                                    {
                                        NewHTMLTag = HtmlNode.CreateNode(tdnode.ChildNodes[child].ChildNodes[subchild].OuterHtml);
                                        tdnode.ChildNodes[ParentNodeIndex].AppendChild(NewHTMLTag);
                                    }

                                    if (GetCleanText(tdnode.ChildNodes[child].ChildNodes[subchild].InnerText) != "" && !isTextFound)
                                        isTextFound = true;
                                }
                                tdnode.ChildNodes[child].Remove();
                                child--;
                            }
                            else
                            {
                                ParentNodeIndex = child;                               
                            }
                            if (GetCleanText(tdnode.ChildNodes[child].InnerText) != "")
                            {
                                isStyleSet = true;
                            }
                        }
                    }
                }             
            }
            catch (Exception ex)
            {
            }
            return tdnode.InnerHtml;
        }


3)    static string GetCleanText(string Text)
        {
            return Text.Replace("&#160;", "").Replace("&nbsp;", "").Replace("&#xa0;", "").Replace("@#double#@", "").Replace("@#single#@", "").Replace("@#doubletop#@", "").Replace("@#singletop#@", "").Replace("@#u#@", "").Replace(" ", "").Replace(" ", "").Replace("\r", "").Replace("\n", "");
        }


No comments:

Post a Comment