Description: In
This tutorial, we will learn Converting excel embedded word documents to HTML
Document with proper conversion of embedded excel documents to their respective
HTML content also with proper placement as that in Word Document using ASPOSE.WORD and HtmlAgilityPack
(Note: Aspose is third party tool used to work on
Microsoft documents like Word, Excel, and PDF etc. Also, this logic never works for linked Excel file)
Diagrammatic
representation:
Word Doc with Excel Objects
|
|
V
|
Conversion Process
|
|
V
|
HTML Doc with Excel contents
|
Steps in
Conversion Process
|
1. Mark Excel Object
Positioning using Aspose.Word
2. Convert Main word
document into html using Aspose.Word
conversion
3. Now convert each
excel object in word document into respective html doc using Aspose.cells and replace that
excel derived html content in main
word derived html content using marked excel position
|
Word
Doc as Input:
Word Content 1
Excel object 1
Word Content 2
Excel object 2
|
HTML
Doc as Output:
Word HTML Content 1
Excel HTML Content 1
Word HTML Content 2
Excel HTML Content 2
|
Method
execution Order:
1) objDocument = ExcelTableMarking(objDocument); //First Method
2) objDocument = GetHtmlDataOfAllExcelFiles(objHTMLdoc, objDocument); //Second Method
Both methods are used in ExampleRun method for example purpose as give below
string OutputHTML = ExampleRun(WordDocumentFilePath);
Method
definitions:
public static string ExampleRun(string
WordDocumentFilePath)
{
string
html = string.Empty;
HtmlAgilityPack.HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument();
using
(MemoryStream htmlStream = new MemoryStream())
{
//Create
Aspose word document object
Aspose.Words.Document objDocument = new
Aspose.Words.Document(WordDocumentFilePath);
//provide
the setting for image objects in word document
Aspose.Words.Saving.HtmlSaveOptions lSaveOptions = new Aspose.Words.Saving.HtmlSaveOptions(Aspose.Words.SaveFormat.Html);
lSaveOptions.ExportImagesAsBase64 = true;//For Embedded image | false to download image to a folder
after conversion
lSaveOptions.ImagesFolder =
Server.MapPath("~/Images");
lSaveOptions.ImagesFolderAlias = "../../Images";
//Mark
Excel objects in word document
objDocument =
ExcelTableMarking(objDocument,null,null); //Main
objDocument.AcceptAllRevisions();
//Convert Word document to html
objDocument.Save(htmlStream,
lSaveOptions);// Get Html string
//Read
html string from HTML memoryStream
html = System.Text.Encoding.UTF8.GetString(htmlStream.GetBuffer(), 0,
(int)htmlStream.Length);
//Load
Html document using HtmlAgilityPack
htmlDocument.LoadHtml(html);
//Replace
all excel objects with their respective HTML contents
htmlDocument =
GetHtmlDataOfAllExcelFiles(htmlDocument, objDocument);
}
return
htmlDocument.DocumentNode.OuterHtml;
}
public static Document ExcelTableMarking(Document doc)
{
string
extension = string.Empty;
string
FileName = string.Empty;
string
FilePath = string.Empty;
DocumentBuilder
builder = new DocumentBuilder(doc);
NodeCollection
Shapes = doc.GetChildNodes(NodeType.Shape, true);
int
ExcelNo = 1;
try
{
foreach (Aspose.Words.Drawing.Shape shape in
Shapes)
{
if
(shape.OleFormat != null)
{
try
{
extension = shape.OleFormat.SuggestedExtension.ToString();
if ((extension.Contains(".xls")
|| extension.Contains(".xlsx"))
&& shape.OleFormat.IsLink == false)
{
shape.AlternativeText = "$EXCEL$"
+ ExcelNo;
ExcelNo += 1;
}
}
catch
{
}
}
}
}
catch
(Exception ex)
{
}
return
doc;
}
static HtmlDocument
GetHtmlDataOfAllExcelFiles(HtmlDocument doc,
Document Maindoc)
{
string
html = doc.DocumentNode.OuterHtml;
string
singleFileHtml = string.Empty;
try
{
HtmlNodeCollection
nc = doc.DocumentNode.SelectNodes(".//img");
if
(nc != null)
{
foreach (HtmlNode node in
nc)
{
string ImageSource = node.OuterHtml;
if (Regex.IsMatch(ImageSource, @"$.*$"))
{
string FileName = node.Attributes["alt"].Value;
if (FileName.Contains("$"))
{
FileName =
FileName.Replace("$", "");
singleFileHtml
= GetHtmlDataOfExcelFile(FileName, Maindoc);
html =
html.Replace(ImageSource, singleFileHtml + "<br/>");
}
}
}
}
}
catch
(Exception ex)
{
}
doc.LoadHtml(html);
return
doc;
}
static string GetHtmlDataOfExcelFile(string ExcelLinkName, Document
Maindoc)
{
string
html = string.Empty;
try
{
MemoryStream
FileStream = new MemoryStream();
string
extension = string.Empty;
NodeCollection
Shapes = Maindoc.GetChildNodes(NodeType.Shape,
true);
foreach
(Aspose.Words.Drawing.Shape shape in Shapes)
{
if
(shape.OleFormat != null)
{
extension =
shape.OleFormat.SuggestedExtension.ToString();
if ((extension.Contains(".xls")
|| extension.Contains(".xlsx"))
&& shape.OleFormat.IsLink == false)
{
string ExcelName = shape.AlternativeText.Replace("$", "");
if (ExcelName == ExcelLinkName)
{
shape.OleFormat.Save(FileStream); break;
}
}
}
}
if
(FileStream != null)
{
Aspose.Cells.Workbook
workbook = new Aspose.Cells.Workbook(FileStream);
using
(MemoryStream htmlStream = new MemoryStream())
{
workbook.Save(htmlStream, Aspose.Cells.SaveFormat.Html);
html = System.Text.Encoding.UTF8.GetString(htmlStream.GetBuffer(), 0,
(int)htmlStream.Length);
while (html[0] != '<')
html =
html.Substring(1);
}
}
if
(html != "")
{
HtmlDocument
doc = new HtmlDocument();
HtmlNodeCollection
nc = null;
doc.LoadHtml(html);
#region Body Capture
nc =
doc.DocumentNode.SelectNodes("//body");
{
if (nc != null)
{
foreach (HtmlNode
node in nc)
{
html =
node.InnerHtml;
break;
}
}
}
doc.LoadHtml(html);
#endregion
html =
doc.DocumentNode.OuterHtml;
}
}
catch
{
}
return
html;
}
No comments:
Post a Comment