XML压缩单元测试代码
- class Program {
- public static string XML = @"<?xml version=""1.0"" encoding=""utf-16""?>
- <Customer>
- <CustomerID>ALFKI</CustomerID>
- <PO>9572658</PO>
- <Address AddressType=""work"">
- <Street>One Main Street</Street>
- <City>Anywhere</City>
- <State>NJ</State>
- <Zip>08080</Zip>
- </Address>
- <Order>
- <OrderID>10966</OrderID >
- <LineItem>
- <ProductID>37</ProductID>
- <UnitPrice>26.50 </UnitPrice>
- <Quantity>8</Quantity>
- <Description>Gravad lax </Description>
- </LineItem>
- <LineItem>
- <ProductID>56 </ProductID>
- <UnitPrice>38.00</UnitPrice>
- <Quantity>12</Quantity>
- <Description>Gnocchi di nonna Alice</Description>
- </LineItem>
- </Order>
- </Customer>";
- static void Main(string[] args) {
- XmlZip zip = new XmlZip();
- byte[] bs = Encoding.UTF8.GetBytes(XML);
- Console.WriteLine("原始文件长度:{0}", bs.Length);
- MemoryStream ms = new MemoryStream();
- DeflateStream compressedzipStream = new DeflateStream(ms, CompressionMode.Compress, true);
- compressedzipStream.Write(bs, 0, bs.Length);
- compressedzipStream.Close();
- Console.WriteLine("Deflate压缩后长度: {0}", ms.Length);
- zip.Init(XML);
- bs = zip.XmlToBytes(XML);
- Console.WriteLine("XML压缩后长度:{0}", bs.Length);
- string str = zip.BytesToXml(bs);
- Console.WriteLine("还原后长度:{0}", Encoding.UTF8.GetByteCount(str));
- Console.WriteLine(str);
- ms = new MemoryStream();
- compressedzipStream = new DeflateStream(ms, CompressionMode.Compress, true);
- compressedzipStream.Write(bs, 0, bs.Length);
- compressedzipStream.Close();
- Console.WriteLine("先XML压缩,再Deflate压缩后的长度:{0}", ms.Length);
- Console.ReadKey();
- }
- }
测试输出
原始文件长度:740
Deflate压缩后长度: 438
XML压缩后长度:295
还原后长度:727
- <?xml version="1.0" encoding="utf-16"?>
- <Customer>
- <CustomerID>ALFKI</CustomerID>
- <PO>9572658</PO>
- <Address AddressType="work">
- <Street>One Main Street</Street>
- <City>Anywhere</City>
- <State>NJ</State>
- <Zip>08080</Zip>
- </Address>
- <Order>
- <OrderID>10966</OrderID>
- <LineItem>
- <ProductID>37</ProductID>
- <UnitPrice>26.50 </UnitPrice>
- <Quantity>8</Quantity>
- <Description>Gravad lax </Description>
- </LineItem>
- <LineItem>
- <ProductID>56 </ProductID>
- <UnitPrice>38.00</UnitPrice>
- <Quantity>12</Quantity>
- <Description>Gnocchi di nonna Alice</Description>
- </LineItem>
- </Order>
- </Customer>
先XML压缩,再Deflate压缩后的长度:357
可以看到,XML压缩后的数据约是原来数据的3分之一,可能没有其它专有的压缩算法的压缩率高,但效果还算是满意吧,而且我的算法是比较通用的,只要通信双方知道了XML的Schema,甚至双方只需要有一段完整的示例代码,就可以进行压缩通信,只做了功能测试,没做性能测试,大家可以先借鉴下思路。
完整代码
大致原理,就是通信双方各持有一个XML文档节点名称,属性名称的一个字典,然后发送方传输的时候用ushort代替原有的XML标签和属性名,接收方通过字典把ushort再转换成原始的元素名和属性名,这样大量不必要的重复的标签等就省去了。
代码只做本文的示例,写的比较随意,没有什么防御性和健壮性。
- internal enum ItemType {
- Element,
- Attritube
- }
- internal class XmlNodeItem {
- public string Xpath { get; set; }
- public string Text { get; set; }
- public ItemType ItemType { get; set; }
- public override string ToString() {
- return Xpath;
- }
- }
- internal class MyXpath {
- LinkedList<string> _node = new LinkedList<string>();
- public void AddElement(string name) {
- _node.AddLast(string.Format("/{0}", name));
- }
- public void AddAttribute(string name) {
- _node.AddLast(string.Format("/@{0}", name));
- }
- public void RemoveLastElement() {
- _node.RemoveLast();
- }
- public override string ToString() {
- StringBuilder sb = new StringBuilder();
- LinkedListNode<string> node = _node.First;
- sb.Append(node.Value);
- while ((nodenode = node.Next) != null) {
- sb.Append(node.Value);
- }
- return sb.ToString();
- }
- }
- class XmlZip {
- Dictionary<ushort, XmlNodeItem> _map = new Dictionary<ushort, XmlNodeItem>();
- Dictionary<string, ushort> _map2 = new Dictionary<string, ushort>();
- MyXpath _path = new MyXpath();
- public void Init(string xmlInput) {
- StringReader sr = new StringReader(xmlInput);
- XmlReader reader = XmlReader.Create(sr);
- MemoryStream ms = new MemoryStream();
- ushort i = 1;
- while (reader.Read()) {
- switch (reader.NodeType) {
- case XmlNodeType.Element:
- _path.AddElement(reader.Name);
- _map[i++] = new XmlNodeItem() {
- Xpath = _path.ToString(),
- Text = reader.Name,
- ItemTypeItemType = ItemType.Element
- };
- if (reader.HasAttributes) {
- reader.MoveToFirstAttribute();
- _path.AddAttribute(reader.Name);
- _map[i++] = new XmlNodeItem() {
- Xpath = _path.ToString(),
- Text = reader.Name,
- ItemTypeItemType = ItemType.Attritube
- };
- _path.RemoveLastElement();
- while (reader.MoveToNextAttribute()) {
- _path.AddAttribute(reader.Name);
- _map[i++] = new XmlNodeItem() {
- Xpath = _path.ToString(),
- Text = reader.Name,
- ItemTypeItemType = ItemType.Attritube
- };
- _path.RemoveLastElement();
- }
- reader.MoveToElement();
- }
- if (reader.IsEmptyElement) _path.RemoveLastElement();
- break;
- case XmlNodeType.EndElement:
- _path.RemoveLastElement();
- break;
- default:
- break;
- }
- }
- foreach (KeyValuePair<ushort, XmlNodeItem> pair in _map) {
- _map2[pair.Value.Xpath] = pair.Key;
- }
- }
- public byte[] XmlToBytes(string xmlInput) {
- StringReader sr = new StringReader(xmlInput);
- XmlReader reader = XmlReader.Create(sr);
- MemoryStream ms = new MemoryStream();
- BinaryWriter bw = new BinaryWriter(ms);
- while (reader.Read()) {
- ushort index;
- byte[] bs;
- switch (reader.NodeType) {
- case XmlNodeType.Element:
- _path.AddElement(reader.Name);
- if (_map2.TryGetValue(_path.ToString(), out index)) {
- bw.Write(index);
- }
- if (reader.HasAttributes) {
- reader.MoveToFirstAttribute();
- _path.AddAttribute(reader.Name);
- if (_map2.TryGetValue(_path.ToString(), out index)) {
- _path.RemoveLastElement();
- bw.Write(index);
- bs = Encoding.UTF8.GetBytes(reader.Value);
- bw.Write((ushort)bs.Length);
- bw.Write(bs);
- }
- while (reader.MoveToNextAttribute()) {
- _path.AddAttribute(reader.Name);
- if (_map2.TryGetValue(_path.ToString(), out index)) {
- _path.RemoveLastElement();
- bw.Write(index);
- bs = Encoding.UTF8.GetBytes(reader.Value);
- bw.Write((ushort)bs.Length);
- bw.Write(bs);
- }
- }
- reader.MoveToElement();
- }
- if (reader.IsEmptyElement) {
- _path.RemoveLastElement();
- bw.Write(ushort.MaxValue);
- }
- break;
- case XmlNodeType.EndElement:
- _path.RemoveLastElement();
- bw.Write(ushort.MaxValue);
- break;
- case XmlNodeType.Text:
- bw.Write((ushort)0);
- bs = Encoding.UTF8.GetBytes(reader.Value);
- bw.Write((ushort)bs.Length);
- bw.Write(bs);
- break;
- default:
- break;
- }
- }
- bw.Close();
- ms.Close();
- reader.Close();
- return ms.ToArray();
- }
- public string BytesToXml(byte[] bytes) {
- MemoryStream ms = new MemoryStream(bytes);
- BinaryReader br = new BinaryReader(ms);
- StringBuilder sb = new StringBuilder();
- StringWriter sw = new StringWriter(sb);
- XmlWriterSettings settings = new XmlWriterSettings();
- settings.Indent = true;
- XmlWriter writer = XmlWriter.Create(sw, settings);
- XmlNodeItem item;
- while (br.PeekChar() != -1) {
- ushort readFlag = br.ReadUInt16();
- int len;
- byte[] bs;
- string str;
- if (_map.TryGetValue(readFlag, out item)) {
- if (item.ItemType == ItemType.Element)
- writer.WriteStartElement(item.Text);
- else if (item.ItemType == ItemType.Attritube) {
- len = br.ReadUInt16();
- bs = br.ReadBytes(len);
- str = Encoding.UTF8.GetString(bs);
- writer.WriteAttributeString(item.Text, str);
- }
- }
- else if (readFlag == 0) {
- len = br.ReadUInt16();
- bs = br.ReadBytes(len);
- str = Encoding.UTF8.GetString(bs);
- writer.WriteString(str);
- }
- else if (readFlag == ushort.MaxValue) {
- writer.WriteEndElement();
- }
- }
- writer.Flush();
- writer.Close();
- sw.Close();
- br.Close();
- return sb.ToString();
- }
- }
【编辑推荐】