在在線CHM閱讀器(1)一文中已提到,CHM其實(shí)就是一個(gè)結(jié)構(gòu)化存儲(chǔ)文件(Structured Storage),如果也閱讀CHM文檔,就必須將文件,圖片等從CHM文件中提取出來,提取需要用到Structured Storage的StgOpenStorage函數(shù)以及IStorage和IStream接口,不過這些在.NET中都不能直接使用,需要先“包裝”一下。如何使用IStorage和IStream可以參考這篇文章:
不過這篇文章提供的源代碼是用于反編譯出CHM的所有文件的,開發(fā)在線CHM閱讀器并不需要先反編譯出所有的文件,只需要把瀏覽器當(dāng)前請(qǐng)求的那個(gè)文件提取出來發(fā)送到客戶端即可。提取文件的代碼如下:
public class CHH{ /// <summary> /// 提取CHM中的文件 /// </summary> /// <param name="chm">chm文件的路徑</param> /// <param name="res">要提取的文件的全路徑</param> /// <returns></returns> public static Stream Find(string chm, string res) { IStorage storage = ((ITStorage)new ITStorageClass()).StgOpenStorage(chm, IntPtr.Zero, 0x20, IntPtr.Zero, 0); try { DateTime s = DateTime.Now; IStream stream = Find(storage, res.Split(new char[] { '/' }, StringSplitOptions.RemoveEmptyEntries), 0); double e = (DateTime.Now - s).TotalMilliseconds; if (stream == null) return null; if (res.ToUpper() == "#SYSTEM") { //#System是二進(jìn)制文件,javascript無法處理,因此處理成json格式的文本,數(shù)據(jù)格式為: //{ // HomePage:起始頁, // Encoding:編碼, // Title:標(biāo)題 //} using (Stream cs = new ComStream(stream)) { try { return (new ChmInfo(cs)).MakeStream(); } finally { cs.Close(); } } } else { return new ComStream(stream); } } finally { Marshal.ReleaseComObject(storage); } } static IStream Find(IStorage storage, string[] res, int first) { if (first == res.Length - 1) { IStream stream = null; try { //找到對(duì)應(yīng)的文件 stream = storage.OpenStream(res[first], IntPtr.Zero, 0x20, 0); } catch { } if (stream == null && res[first].ToUpper() == ".HHC") { //由于目錄文件的文件名一般不確定,因此做特殊處理 System.Runtime.InteropServices.ComTypes.STATSTG stats; IEnumSTATSTG enumStats; int i = 0; storage.EnumElements(0, IntPtr.Zero, 0, out enumStats); try { enumStats.Reset(); //枚舉所有文件查找 while (enumStats.Next(1, out stats, out i) == 0) { if (System.IO.Path.GetExtension(stats.pwcsName).ToUpper() == ".HHC" && stats.type == 2) { stream = storage.OpenStream(stats.pwcsName, IntPtr.Zero, 0x20, 0); return stream; } } } finally { Marshal.ReleaseComObject(enumStats); } } return stream; } else { //在文件夾中遞歸查找目標(biāo)文件 IStorage next = storage.OpenStorage(res[first], IntPtr.Zero, 0x20, IntPtr.Zero, 0); try { return Find(next, res, first + 1); } finally { Marshal.ReleaseComObject(next); } } }}
上文提取文件的代碼已提到,#SYSTEM是一個(gè)二進(jìn)制文件,而javascript是不能處理二進(jìn)制文件的,因此,必須在后臺(tái)處理#SYSTEM文件,轉(zhuǎn)換成json格式的文本發(fā)送到客戶端。#SYSTEM的格式如下圖所示:
根據(jù)上圖所示的規(guī)律,既可以處理#SYSTEM文件了,代碼如下:
class ChmInfo { const UInt16 ID_HHC = 0x0000; // const UInt16 ID_MAIN = 0x0002; //起始頁的ID const UInt16 ID_Version = 0x0009; //版本的ID const UInt16 ID_TITLE = 0x0003; //標(biāo)題的ID const UInt16 ID_PRJNAME = 0x0006; //工程的ID const UInt16 ID_LAN = 0x0004; //區(qū)域的ID public String MainPage = String.Empty, PrjName = String.Empty, Title = String.Empty, HHA_Version = String.Empty, HHC = String.Empty; public Encoding Encoding = null; Hashtable _session = new Hashtable(); private bool ReadSession(BinaryReader reader) { if (reader.BaseStream.Position >= reader.BaseStream.Length) return false; UInt16 id = reader.ReadUInt16(); UInt16 count = reader.ReadUInt16(); if (count + reader.BaseStream.Position <= reader.BaseStream.Length) { if (count > 0) { _session[id] = reader.ReadBytes(count); } return true; } else { return false; } } public ChmInfo(Stream stream) { BinaryReader reader = new BinaryReader(stream); //讀取所有數(shù)據(jù)及其對(duì)應(yīng)的ID并保存到一個(gè)Hashtable中 while (ReadSession(reader)) ; try { if (_session.ContainsKey(ID_LAN)) { Byte[] data = _session[ID_LAN] as Byte[]; CultureInfo info = new CultureInfo(data[1] * 0x100 + data[0]); Encoding = Encoding.GetEncoding(info.TextInfo.ANSICodePage); } } catch { } if (Encoding == null) Encoding = Encoding.GetEncoding("GB2312"); if (_session.ContainsKey(ID_MAIN)) { Byte[] data = _session[ID_MAIN] as Byte[]; MainPage = Encoding.GetString(data, 0, data.Length - 1); } if (_session.ContainsKey(ID_TITLE)) { Byte[] data = _session[ID_TITLE] as Byte[]; Title = Encoding.GetString(data, 0, data.Length - 1); } if (_session.ContainsKey(ID_PRJNAME)) { Byte[] data = _session[ID_PRJNAME] as Byte[]; PrjName = Encoding.GetString(data, 0, data.Length - 1); } if (_session.ContainsKey(ID_Version)) { Byte[] data = _session[ID_Version] as Byte[]; HHA_Version = Encoding.GetString(data, 0, data.Length - 1); } if (_session.ContainsKey(ID_HHC)) { Byte[] data = _session[ID_HHC] as Byte[]; HHC = Encoding.GetString(data, 0, data.Length - 1); } } public Stream MakeStream() { //生成JSON并保存到一個(gè)MemoryStream中 String json = String.Format( "{{\"MainPage\":\"{0}\",\"Title\":\"{1}\",\"HHC\":\"{2}\",\"Encoding\":\"{3}\"}}", TransferCharJavascript(MainPage), TransferCharJavascript(Title), TransferCharJavascript(HHC), TransferCharJavascript(Encoding.HeaderName) ); Byte[] buffer = Encoding.UTF8.GetBytes(json); Stream stream = new MemoryStream(buffer.Length); stream.Write(buffer, 0, buffer.Length); stream.Seek(0, SeekOrigin.Begin); return stream; } public static string TransferCharJavascript(string s) { StringBuilder ret = new StringBuilder(); foreach (char c in s) { switch (c) { case '\r': case '\t': case '\n': case '\f': case '\v': case '\"': case '\\': case '\'': case '<': case '>': case '\0': ret.AppendFormat("\\u{0:X4}", (int)c); break; default: ret.Append(c); break; } } return ret.ToString(); } }
目錄文件保存著一個(gè)CHM文件的目錄結(jié)構(gòu),它是一個(gè)文本文件,為了減輕服務(wù)器的負(fù)擔(dān),將目錄文件放到瀏覽器來處理。在在線CHM閱讀器(1)一文中已提到,目錄文件大概的規(guī)律是:每一個(gè)<LI><OBJECT>…<OBJECT>對(duì)應(yīng)著目錄樹中的一個(gè)節(jié)點(diǎn),<OBJECT>…<OBJECT>中的參數(shù)記錄著該節(jié)點(diǎn)的屬性(對(duì)應(yīng)的頁面,名稱等)。如果這個(gè)節(jié)點(diǎn)有子節(jié)點(diǎn)的話,那么<LI>后面會(huì)緊跟著一個(gè)<UL></UL>,<UL>里面所有的節(jié)點(diǎn)都是其子節(jié)點(diǎn)。處理的代碼如下:
function ChmHHC(buffer){ var position = 0; var RegxTagName = /(<|<\/)([a-zA-Z]+)(\s[\S\s]*|)>/i; var RegxAttrs = /([a-zA-Z1-9]+)\s*=\s*\x22([^\x22]+)\x22/ig; //讀取下一個(gè)標(biāo)志(<標(biāo)志名>) function ReadTag() { var tag = { Name: "", Type: "", Attrs: {} }; var res = null; while (res == null) { if (position >= buffer.length) return null; while (position < buffer.length && buffer.charAt(position) != '<') position++; if (position >= buffer.length) return null; var s = position; while (position < buffer.length && buffer.charAt(position) != '>') position++; if (position >= buffer.length) return null; var e = position; position++; var tag_str = buffer.substr(s, e - s + 1); RegxTagName.lastIndex = 0; res = RegxTagName.exec(tag_str); } tag.Name = res[2].toUpperCase(); tag.Type = res[1] == '<' ? "Begin": "End"; if (tag.Type == "Begin" && res.length > 3 && res[3] != "") { RegxAttrs.lastIndex = 0; var atrr = null; while ((attr = RegxAttrs.exec(res[3])) != null) { tag.Attrs[attr[1].toLowerCase()] = attr[2]; } } return tag; } var current = null; function IsBeginTag(tag, name) { return tag.Type == "Begin" && tag.Name == name; } function IsEndTag(tag, name) { return tag.Type == "End" && tag.Name == name; } function RenderTag() { if (current != null && IsBeginTag(current, "LI")) { var node = { NodeType: "LI", SubNodes: [] }; current = ReadTag(); if (current != null && IsBeginTag(current, "OBJECT")) { node.type = current.Attrs["type"]; current = ReadTag(); while (current != null && !IsEndTag(current, "OBJECT")) { if (IsBeginTag(current, "PARAM")) { node[current.Attrs["name"]] = current.Attrs["value"]; } current = ReadTag(); } if (current != null && IsEndTag(current, "OBJECT")) current = ReadTag(); if (current != null && IsEndTag(current, "LI")) current = ReadTag(); //尾隨著LI的所有UL中的節(jié)點(diǎn)均作為該LI的子節(jié)點(diǎn) while(current != null && IsBeginTag(current, "UL")) { var ul = RenderTag(); if (ul != null) { for(var ul_index in ul.Nodes) node.SubNodes.push(ul.Nodes[ul_index]); } } return node; } } else if (current != null && IsBeginTag(current, "UL")) { var node = { NodeType: "UL", Nodes: [] }; current = ReadTag(); while (current != null && !IsEndTag(current, "UL")) { var subNode = RenderTag(); if (subNode != null) node.Nodes.push(subNode); } if (current != null) { current = ReadTag(); return node; } } else { current = ReadTag(); } return null; } var roots = []; this.Render = function() { position = 0; current = ReadTag(); while (current != null) { var node = RenderTag(); if (node != null) roots.push(node); } current = null; } this.GetNodes = function() { return roots; }}
調(diào)用ChmHHC的Render方法后,將HHC文件轉(zhuǎn)換成一個(gè)數(shù)組,保存著所有的節(jié)點(diǎn),其結(jié)構(gòu)與目錄的對(duì)應(yīng)關(guān)系如下圖所示:
在上文中,已經(jīng)介紹了如何提取出CHM文件中的文件(網(wǎng)頁,圖片等)以及如何解析目錄文件,下一篇文章,將介紹如何使用ISAPI篩選器和IHttpHandler來開發(fā)一個(gè)在線CHM閱讀器。
聯(lián)系客服