国产一级a片免费看高清,亚洲熟女中文字幕在线视频,黄三级高清在线播放,免费黄色视频在线看

打開APP
userphoto
未登錄

開通VIP,暢享免費(fèi)電子書等14項(xiàng)超值服

開通VIP
在線CHM閱讀器(2)——文件提取及關(guān)鍵文件解析

1、文件提取

在線CHM閱讀器(1)一文中已提到,CHM其實(shí)就是一個(gè)結(jié)構(gòu)化存儲(chǔ)文件(Structured Storage),如果也閱讀CHM文檔,就必須將文件,圖片等從CHM文件中提取出來,提取需要用到Structured Storage的StgOpenStorage函數(shù)以及IStorage和IStream接口,不過這些在.NET中都不能直接使用,需要先“包裝”一下。如何使用IStorage和IStream可以參考這篇文章:

CHM Help File Extractor

不過這篇文章提供的源代碼是用于反編譯出CHM的所有文件的,開發(fā)在線CHM閱讀器并不需要先反編譯出所有的文件,只需要把瀏覽器當(dāng)前請(qǐng)求的那個(gè)文件提取出來發(fā)送到客戶端即可。提取文件的代碼如下:

public class CHH{    /// <summary>    /// 提取CHM中的文件    /// </summary>    /// <param name="chm">chm文件的路徑</param>    /// <param name="res">要提取的文件的全路徑</param>    /// <returns></returns>    public static Stream Find(string chm, string res)    {        IStorage storage = ((ITStorage)new ITStorageClass()).StgOpenStorage(chm, IntPtr.Zero, 0x20, IntPtr.Zero, 0);        try        {            DateTime s = DateTime.Now;            IStream stream = Find(storage, res.Split(new char[] { '/' }, StringSplitOptions.RemoveEmptyEntries), 0);            double e = (DateTime.Now - s).TotalMilliseconds;            if (stream == null) return null;            if (res.ToUpper() == "#SYSTEM")            {                //#System是二進(jìn)制文件,javascript無法處理,因此處理成json格式的文本,數(shù)據(jù)格式為:                //{                //    HomePage:起始頁,                //    Encoding:編碼,                //    Title:標(biāo)題                //}                using (Stream cs = new ComStream(stream))                {                    try                    {                        return (new ChmInfo(cs)).MakeStream();                    }                    finally                    {                        cs.Close();                    }                }            }            else            {                return new ComStream(stream);            }        }        finally        {            Marshal.ReleaseComObject(storage);        }    }    static IStream Find(IStorage storage, string[] res, int first)    {        if (first == res.Length - 1)        {            IStream stream = null;            try            {                //找到對(duì)應(yīng)的文件                stream = storage.OpenStream(res[first], IntPtr.Zero, 0x20, 0);            }            catch            {            }            if (stream == null && res[first].ToUpper() == ".HHC")            {                //由于目錄文件的文件名一般不確定,因此做特殊處理                System.Runtime.InteropServices.ComTypes.STATSTG stats;                IEnumSTATSTG enumStats;                int i = 0;                storage.EnumElements(0, IntPtr.Zero, 0, out enumStats);                try                {                    enumStats.Reset();                    //枚舉所有文件查找                    while (enumStats.Next(1, out stats, out i) == 0)                    {                        if (System.IO.Path.GetExtension(stats.pwcsName).ToUpper() == ".HHC" && stats.type == 2)                        {                            stream = storage.OpenStream(stats.pwcsName, IntPtr.Zero, 0x20, 0);                            return stream;                        }                    }                }                finally                {                    Marshal.ReleaseComObject(enumStats);                }            }            return stream;        }        else        {            //在文件夾中遞歸查找目標(biāo)文件            IStorage next = storage.OpenStorage(res[first], IntPtr.Zero, 0x20, IntPtr.Zero, 0);            try            {                return Find(next, res, first + 1);            }            finally            {                Marshal.ReleaseComObject(next);            }        }    }}

2、處理#SYSTEM文件

上文提取文件的代碼已提到,#SYSTEM是一個(gè)二進(jìn)制文件,而javascript是不能處理二進(jìn)制文件的,因此,必須在后臺(tái)處理#SYSTEM文件,轉(zhuǎn)換成json格式的文本發(fā)送到客戶端。#SYSTEM的格式如下圖所示:

根據(jù)上圖所示的規(guī)律,既可以處理#SYSTEM文件了,代碼如下:

    class ChmInfo    {        const UInt16 ID_HHC = 0x0000;        //        const UInt16 ID_MAIN = 0x0002;        //起始頁的ID        const UInt16 ID_Version = 0x0009;   //版本的ID        const UInt16 ID_TITLE = 0x0003;        //標(biāo)題的ID        const UInt16 ID_PRJNAME = 0x0006;    //工程的ID        const UInt16 ID_LAN = 0x0004;        //區(qū)域的ID        public String MainPage = String.Empty, PrjName = String.Empty, Title = String.Empty, HHA_Version = String.Empty, HHC = String.Empty;        public Encoding Encoding = null;        Hashtable _session = new Hashtable();        private bool ReadSession(BinaryReader reader)        {            if (reader.BaseStream.Position >= reader.BaseStream.Length) return false;            UInt16 id = reader.ReadUInt16();            UInt16 count = reader.ReadUInt16();            if (count + reader.BaseStream.Position <= reader.BaseStream.Length)            {                if (count > 0)                {                    _session[id] = reader.ReadBytes(count);                }                return true;            }            else            {                return false;            }        }        public ChmInfo(Stream stream)        {            BinaryReader reader = new BinaryReader(stream);            //讀取所有數(shù)據(jù)及其對(duì)應(yīng)的ID并保存到一個(gè)Hashtable中            while (ReadSession(reader)) ;            try            {                if (_session.ContainsKey(ID_LAN))                {                    Byte[] data = _session[ID_LAN] as Byte[];                    CultureInfo info = new CultureInfo(data[1] * 0x100 + data[0]);                    Encoding = Encoding.GetEncoding(info.TextInfo.ANSICodePage);                }            }            catch            {            }            if (Encoding == null) Encoding = Encoding.GetEncoding("GB2312");            if (_session.ContainsKey(ID_MAIN))            {                Byte[] data = _session[ID_MAIN] as Byte[];                MainPage = Encoding.GetString(data, 0, data.Length - 1);            }            if (_session.ContainsKey(ID_TITLE))            {                Byte[] data = _session[ID_TITLE] as Byte[];                Title = Encoding.GetString(data, 0, data.Length - 1);            }            if (_session.ContainsKey(ID_PRJNAME))            {                Byte[] data = _session[ID_PRJNAME] as Byte[];                PrjName = Encoding.GetString(data, 0, data.Length - 1);            }            if (_session.ContainsKey(ID_Version))            {                Byte[] data = _session[ID_Version] as Byte[];                HHA_Version = Encoding.GetString(data, 0, data.Length - 1);            }            if (_session.ContainsKey(ID_HHC))            {                Byte[] data = _session[ID_HHC] as Byte[];                HHC = Encoding.GetString(data, 0, data.Length - 1);            }        }        public Stream MakeStream()        {            //生成JSON并保存到一個(gè)MemoryStream中            String json = String.Format(                "{{\"MainPage\":\"{0}\",\"Title\":\"{1}\",\"HHC\":\"{2}\",\"Encoding\":\"{3}\"}}",                TransferCharJavascript(MainPage),                TransferCharJavascript(Title),                TransferCharJavascript(HHC),                TransferCharJavascript(Encoding.HeaderName)            );            Byte[] buffer = Encoding.UTF8.GetBytes(json);            Stream stream = new MemoryStream(buffer.Length);            stream.Write(buffer, 0, buffer.Length);            stream.Seek(0, SeekOrigin.Begin);            return stream;        }        public static string TransferCharJavascript(string s)        {            StringBuilder ret = new StringBuilder();            foreach (char c in s)            {                switch (c)                {                case '\r':                case '\t':                case '\n':                case '\f':                case '\v':                case '\"':                case '\\':                case '\'':                case '<':                case '>':                case '\0':                    ret.AppendFormat("\\u{0:X4}", (int)c);                    break;                default:                    ret.Append(c);                    break;                }            }            return ret.ToString();        }    }

3、處理目錄(*.hhc)文件

目錄文件保存著一個(gè)CHM文件的目錄結(jié)構(gòu),它是一個(gè)文本文件,為了減輕服務(wù)器的負(fù)擔(dān),將目錄文件放到瀏覽器來處理。在在線CHM閱讀器(1)一文中已提到,目錄文件大概的規(guī)律是:每一個(gè)<LI><OBJECT>…<OBJECT>對(duì)應(yīng)著目錄樹中的一個(gè)節(jié)點(diǎn),<OBJECT>…<OBJECT>中的參數(shù)記錄著該節(jié)點(diǎn)的屬性(對(duì)應(yīng)的頁面,名稱等)。如果這個(gè)節(jié)點(diǎn)有子節(jié)點(diǎn)的話,那么<LI>后面會(huì)緊跟著一個(gè)<UL></UL>,<UL>里面所有的節(jié)點(diǎn)都是其子節(jié)點(diǎn)。處理的代碼如下:

function ChmHHC(buffer){    var position = 0;    var RegxTagName = /(<|<\/)([a-zA-Z]+)(\s[\S\s]*|)>/i;    var RegxAttrs = /([a-zA-Z1-9]+)\s*=\s*\x22([^\x22]+)\x22/ig;        //讀取下一個(gè)標(biāo)志(<標(biāo)志名>)    function ReadTag()    {        var tag = {            Name: "",            Type: "",            Attrs: {}        };        var res = null;        while (res == null)        {            if (position >= buffer.length) return null;            while (position < buffer.length && buffer.charAt(position) != '<') position++;            if (position >= buffer.length) return null;            var s = position;            while (position < buffer.length && buffer.charAt(position) != '>') position++;            if (position >= buffer.length) return null;            var e = position;            position++;            var tag_str = buffer.substr(s, e - s + 1);            RegxTagName.lastIndex = 0;            res = RegxTagName.exec(tag_str);        }        tag.Name = res[2].toUpperCase();        tag.Type = res[1] == '<' ? "Begin": "End";        if (tag.Type == "Begin" && res.length > 3 && res[3] != "")        {            RegxAttrs.lastIndex = 0;            var atrr = null;            while ((attr = RegxAttrs.exec(res[3])) != null)            {                tag.Attrs[attr[1].toLowerCase()] = attr[2];            }        }        return tag;    }    var current = null;    function IsBeginTag(tag, name)    {        return tag.Type == "Begin" && tag.Name == name;    }    function IsEndTag(tag, name)    {        return tag.Type == "End" && tag.Name == name;    }    function RenderTag()    {        if (current != null && IsBeginTag(current, "LI"))        {            var node = {                NodeType: "LI",                SubNodes: []            };            current = ReadTag();            if (current != null && IsBeginTag(current, "OBJECT"))            {                node.type = current.Attrs["type"];                current = ReadTag();                while (current != null && !IsEndTag(current, "OBJECT"))                {                    if (IsBeginTag(current, "PARAM"))                    {                        node[current.Attrs["name"]] = current.Attrs["value"];                    }                    current = ReadTag();                }                if (current != null && IsEndTag(current, "OBJECT")) current = ReadTag();                if (current != null && IsEndTag(current, "LI")) current = ReadTag();                //尾隨著LI的所有UL中的節(jié)點(diǎn)均作為該LI的子節(jié)點(diǎn)                while(current != null && IsBeginTag(current, "UL"))                {                    var ul = RenderTag();                    if (ul != null)                    {                        for(var ul_index in ul.Nodes) node.SubNodes.push(ul.Nodes[ul_index]);                    }                }                                return node;            }        }        else if (current != null && IsBeginTag(current, "UL"))        {            var node = {                NodeType: "UL",                Nodes: []            };            current = ReadTag();            while (current != null && !IsEndTag(current, "UL"))            {                var subNode = RenderTag();                if (subNode != null) node.Nodes.push(subNode);            }            if (current != null)            {                current = ReadTag();                return node;            }        }        else        {            current = ReadTag();        }        return null;    }    var roots = [];    this.Render = function()    {        position = 0;        current = ReadTag();        while (current != null)        {            var node = RenderTag();            if (node != null) roots.push(node);        }        current = null;    }    this.GetNodes = function()    {        return roots;    }}

調(diào)用ChmHHC的Render方法后,將HHC文件轉(zhuǎn)換成一個(gè)數(shù)組,保存著所有的節(jié)點(diǎn),其結(jié)構(gòu)與目錄的對(duì)應(yīng)關(guān)系如下圖所示:

在上文中,已經(jīng)介紹了如何提取出CHM文件中的文件(網(wǎng)頁,圖片等)以及如何解析目錄文件,下一篇文章,將介紹如何使用ISAPI篩選器和IHttpHandler來開發(fā)一個(gè)在線CHM閱讀器。

本站僅提供存儲(chǔ)服務(wù),所有內(nèi)容均由用戶發(fā)布,如發(fā)現(xiàn)有害或侵權(quán)內(nèi)容,請(qǐng)點(diǎn)擊舉報(bào)。
打開APP,閱讀全文并永久保存 查看更多類似文章
猜你喜歡
類似文章
代碼生成文檔工具
hhc.exe制作chm
C#生成CHM文件(外篇使用hha.dll)
電子書籍制作專集-應(yīng)用篇
CHM格式電子書制作詳解
C#:NET4.0中如何使用內(nèi)存映射操作大文件
更多類似文章 >>
生活服務(wù)
分享 收藏 導(dǎo)長圖 關(guān)注 下載文章
綁定賬號(hào)成功
后續(xù)可登錄賬號(hào)暢享VIP特權(quán)!
如果VIP功能使用有故障,
可點(diǎn)擊這里聯(lián)系客服!

聯(lián)系客服