1. 程式人生 > >word文件轉html字符串(包涵格式和圖片)

word文件轉html字符串(包涵格式和圖片)

bst read default sum orm mis tex ssi int

首先引用 Microsoft.Office.Interop.Word.dll

轉換方法:

using System;
using System.Text;
using MSWord = Microsoft.Office.Interop.Word;
using System.IO;
using System.Reflection;


namespace ReadWord
{
public class GetHtmlString
{
/// <summary>
/// word轉html字符串 --Will.Wang
/// </summary>
/// <param name="wordPath">word文件絕對路徑</param>
/// <returns>html字符串</returns>
public static string GetProceHtmlString(String wordPath)
{
string htmlPath = GetHtml(wordPath);
string htmlString = ProceHtmlString(htmlPath);
return htmlString;
}
/// <summary>
/// word轉html並返回html文件地址
/// </summary>
/// <returns></returns>
private static string GetHtml(Object path)
{

MSWord.Application wordApp;
MSWord.Document wordDoc;
Object Nothing = Missing.Value;

wordApp = new MSWord.Application();
wordDoc = wordApp.Documents.Add(ref path, ref Nothing, ref Nothing, ref Nothing);

object format = MSWord.WdSaveFormat.wdFormatFilteredHTML;
Object newPath = path.ToString().Substring(0, path.ToString().LastIndexOf(‘.‘))+".html";//html文件路徑

wordDoc.SaveAs(ref newPath, ref format, ref Nothing, ref Nothing, ref Nothing,
ref Nothing, ref Nothing, ref Nothing, ref Nothing, ref Nothing, ref Nothing,
ref Nothing, ref Nothing, ref Nothing, ref Nothing, ref Nothing);

wordDoc.Close(ref Nothing, ref Nothing, ref Nothing);
wordApp.Quit(ref Nothing, ref Nothing, ref Nothing);

return newPath.ToString();
}


/// <summary>
/// 讀取html字符串
/// </summary>
/// <param name="htmlPath"></param>
/// <returns></returns>
private static string ProceHtmlString(String htmlPath)
{
FileStream fs = new FileStream(htmlPath, FileMode.OpenOrCreate, FileAccess.Read);
StreamReader sr = new StreamReader(fs, Encoding.Default);
string htmlString = sr.ReadToEnd();

sr.Close();
fs.Close();
return htmlString;
}
}
}

word文件轉html字符串(包涵格式和圖片)