XML压缩单元测试代码
class Program {
public static string XML = @"<?xml version=""1.0"" encoding=""utf-16""?>
<Customer>
<CustomerID>ALFKI</CustomerID>
<PO>9572658</PO>
<Address AddressType=""work"">
<Street>One Main Street</Street>
<City>Anywhere</City>
<State>NJ</State>
<Zip>08080</Zip>
</Address>
<Order>
<OrderID>10966</OrderID >
<LineItem>
<ProductID>37</ProductID>
<UnitPrice>26.50 </UnitPrice>
<Quantity>8</Quantity>
<Description>Gravad lax </Description>
</LineItem>
<LineItem>
<ProductID>56 </ProductID>
<UnitPrice>38.00</UnitPrice>
<Quantity>12</Quantity>
<Description>Gnocchi di nonna Alice</Description>
</LineItem>
</Order>
</Customer>";
static void Main(string[] args) {
XmlZip zip = new XmlZip();
byte[] bs = Encoding.UTF8.GetBytes(XML);
Console.WriteLine("原始文件长度:{0}", bs.Length);
MemoryStream ms = new MemoryStream();
DeflateStream compressedzipStream = new DeflateStream(ms, CompressionMode.Compress, true);
compressedzipStream.Write(bs, 0, bs.Length);
compressedzipStream.Close();
Console.WriteLine("Deflate压缩后长度: {0}", ms.Length);
zip.Init(XML);
bs = zip.XmlToBytes(XML);
Console.WriteLine("XML压缩后长度:{0}", bs.Length);
string str = zip.BytesToXml(bs);
Console.WriteLine("还原后长度:{0}", Encoding.UTF8.GetByteCount(str));
Console.WriteLine(str);
ms = new MemoryStream();
compressedzipStream = new DeflateStream(ms, CompressionMode.Compress, true);
compressedzipStream.Write(bs, 0, bs.Length);
compressedzipStream.Close();
Console.WriteLine("先XML压缩,再Deflate压缩后的长度:{0}", ms.Length);
Console.ReadKey();
}
}
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
- 22.
- 23.
- 24.
- 25.
- 26.
- 27.
- 28.
- 29.
- 30.
- 31.
- 32.
- 33.
- 34.
- 35.
- 36.
- 37.
- 38.
- 39.
- 40.
- 41.
- 42.
- 43.
- 44.
- 45.
- 46.
- 47.
- 48.
- 49.
- 50.
- 51.
- 52.
- 53.
- 54.
- 55.
测试输出
原始文件长度:740
Deflate压缩后长度: 438
XML压缩后长度:295
还原后长度:727
<?xml version="1.0" encoding="utf-16"?>
<Customer>
<CustomerID>ALFKI</CustomerID>
<PO>9572658</PO>
<Address AddressType="work">
<Street>One Main Street</Street>
<City>Anywhere</City>
<State>NJ</State>
<Zip>08080</Zip>
</Address>
<Order>
<OrderID>10966</OrderID>
<LineItem>
<ProductID>37</ProductID>
<UnitPrice>26.50 </UnitPrice>
<Quantity>8</Quantity>
<Description>Gravad lax </Description>
</LineItem>
<LineItem>
<ProductID>56 </ProductID>
<UnitPrice>38.00</UnitPrice>
<Quantity>12</Quantity>
<Description>Gnocchi di nonna Alice</Description>
</LineItem>
</Order>
</Customer>
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
- 22.
- 23.
- 24.
- 25.
- 26.
先XML压缩,再Deflate压缩后的长度:357
可以看到,XML压缩后的数据约是原来数据的3分之一,可能没有其它专有的压缩算法的压缩率高,但效果还算是满意吧,而且我的算法是比较通用的,只要通信双方知道了XML的Schema,甚至双方只需要有一段完整的示例代码,就可以进行压缩通信,只做了功能测试,没做性能测试,大家可以先借鉴下思路。
完整代码
大致原理,就是通信双方各持有一个XML文档节点名称,属性名称的一个字典,然后发送方传输的时候用ushort代替原有的XML标签和属性名,接收方通过字典把ushort再转换成原始的元素名和属性名,这样大量不必要的重复的标签等就省去了。
代码只做本文的示例,写的比较随意,没有什么防御性和健壮性。
internal enum ItemType {
Element,
Attritube
}
internal class XmlNodeItem {
public string Xpath { get; set; }
public string Text { get; set; }
public ItemType ItemType { get; set; }
public override string ToString() {
return Xpath;
}
}
internal class MyXpath {
LinkedList<string> _node = new LinkedList<string>();
public void AddElement(string name) {
_node.AddLast(string.Format("/{0}", name));
}
public void AddAttribute(string name) {
_node.AddLast(string.Format("/@{0}", name));
}
public void RemoveLastElement() {
_node.RemoveLast();
}
public override string ToString() {
StringBuilder sb = new StringBuilder();
LinkedListNode<string> node = _node.First;
sb.Append(node.Value);
while ((nodenode = node.Next) != null) {
sb.Append(node.Value);
}
return sb.ToString();
}
}
class XmlZip {
Dictionary<ushort, XmlNodeItem> _map = new Dictionary<ushort, XmlNodeItem>();
Dictionary<string, ushort> _map2 = new Dictionary<string, ushort>();
MyXpath _path = new MyXpath();
public void Init(string xmlInput) {
StringReader sr = new StringReader(xmlInput);
XmlReader reader = XmlReader.Create(sr);
MemoryStream ms = new MemoryStream();
ushort i = 1;
while (reader.Read()) {
switch (reader.NodeType) {
case XmlNodeType.Element:
_path.AddElement(reader.Name);
_map[i++] = new XmlNodeItem() {
Xpath = _path.ToString(),
Text = reader.Name,
ItemTypeItemType = ItemType.Element
};
if (reader.HasAttributes) {
reader.MoveToFirstAttribute();
_path.AddAttribute(reader.Name);
_map[i++] = new XmlNodeItem() {
Xpath = _path.ToString(),
Text = reader.Name,
ItemTypeItemType = ItemType.Attritube
};
_path.RemoveLastElement();
while (reader.MoveToNextAttribute()) {
_path.AddAttribute(reader.Name);
_map[i++] = new XmlNodeItem() {
Xpath = _path.ToString(),
Text = reader.Name,
ItemTypeItemType = ItemType.Attritube
};
_path.RemoveLastElement();
}
reader.MoveToElement();
}
if (reader.IsEmptyElement) _path.RemoveLastElement();
break;
case XmlNodeType.EndElement:
_path.RemoveLastElement();
break;
default:
break;
}
}
foreach (KeyValuePair<ushort, XmlNodeItem> pair in _map) {
_map2[pair.Value.Xpath] = pair.Key;
}
}
public byte[] XmlToBytes(string xmlInput) {
StringReader sr = new StringReader(xmlInput);
XmlReader reader = XmlReader.Create(sr);
MemoryStream ms = new MemoryStream();
BinaryWriter bw = new BinaryWriter(ms);
while (reader.Read()) {
ushort index;
byte[] bs;
switch (reader.NodeType) {
case XmlNodeType.Element:
_path.AddElement(reader.Name);
if (_map2.TryGetValue(_path.ToString(), out index)) {
bw.Write(index);
}
if (reader.HasAttributes) {
reader.MoveToFirstAttribute();
_path.AddAttribute(reader.Name);
if (_map2.TryGetValue(_path.ToString(), out index)) {
_path.RemoveLastElement();
bw.Write(index);
bs = Encoding.UTF8.GetBytes(reader.Value);
bw.Write((ushort)bs.Length);
bw.Write(bs);
}
while (reader.MoveToNextAttribute()) {
_path.AddAttribute(reader.Name);
if (_map2.TryGetValue(_path.ToString(), out index)) {
_path.RemoveLastElement();
bw.Write(index);
bs = Encoding.UTF8.GetBytes(reader.Value);
bw.Write((ushort)bs.Length);
bw.Write(bs);
}
}
reader.MoveToElement();
}
if (reader.IsEmptyElement) {
_path.RemoveLastElement();
bw.Write(ushort.MaxValue);
}
break;
case XmlNodeType.EndElement:
_path.RemoveLastElement();
bw.Write(ushort.MaxValue);
break;
case XmlNodeType.Text:
bw.Write((ushort)0);
bs = Encoding.UTF8.GetBytes(reader.Value);
bw.Write((ushort)bs.Length);
bw.Write(bs);
break;
default:
break;
}
}
bw.Close();
ms.Close();
reader.Close();
return ms.ToArray();
}
public string BytesToXml(byte[] bytes) {
MemoryStream ms = new MemoryStream(bytes);
BinaryReader br = new BinaryReader(ms);
StringBuilder sb = new StringBuilder();
StringWriter sw = new StringWriter(sb);
XmlWriterSettings settings = new XmlWriterSettings();
settings.Indent = true;
XmlWriter writer = XmlWriter.Create(sw, settings);
XmlNodeItem item;
while (br.PeekChar() != -1) {
ushort readFlag = br.ReadUInt16();
int len;
byte[] bs;
string str;
if (_map.TryGetValue(readFlag, out item)) {
if (item.ItemType == ItemType.Element)
writer.WriteStartElement(item.Text);
else if (item.ItemType == ItemType.Attritube) {
len = br.ReadUInt16();
bs = br.ReadBytes(len);
str = Encoding.UTF8.GetString(bs);
writer.WriteAttributeString(item.Text, str);
}
}
else if (readFlag == 0) {
len = br.ReadUInt16();
bs = br.ReadBytes(len);
str = Encoding.UTF8.GetString(bs);
writer.WriteString(str);
}
else if (readFlag == ushort.MaxValue) {
writer.WriteEndElement();
}
}
writer.Flush();
writer.Close();
sw.Close();
br.Close();
return sb.ToString();
}
}
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
- 22.
- 23.
- 24.
- 25.
- 26.
- 27.
- 28.
- 29.
- 30.
- 31.
- 32.
- 33.
- 34.
- 35.
- 36.
- 37.
- 38.
- 39.
- 40.
- 41.
- 42.
- 43.
- 44.
- 45.
- 46.
- 47.
- 48.
- 49.
- 50.
- 51.
- 52.
- 53.
- 54.
- 55.
- 56.
- 57.
- 58.
- 59.
- 60.
- 61.
- 62.
- 63.
- 64.
- 65.
- 66.
- 67.
- 68.
- 69.
- 70.
- 71.
- 72.
- 73.
- 74.
- 75.
- 76.
- 77.
- 78.
- 79.
- 80.
- 81.
- 82.
- 83.
- 84.
- 85.
- 86.
- 87.
- 88.
- 89.
- 90.
- 91.
- 92.
- 93.
- 94.
- 95.
- 96.
- 97.
- 98.
- 99.
- 100.
- 101.
- 102.
- 103.
- 104.
- 105.
- 106.
- 107.
- 108.
- 109.
- 110.
- 111.
- 112.
- 113.
- 114.
- 115.
- 116.
- 117.
- 118.
- 119.
- 120.
- 121.
- 122.
- 123.
- 124.
- 125.
- 126.
- 127.
- 128.
- 129.
- 130.
- 131.
- 132.
- 133.
- 134.
- 135.
- 136.
- 137.
- 138.
- 139.
- 140.
- 141.
- 142.
- 143.
- 144.
- 145.
- 146.
- 147.
- 148.
- 149.
- 150.
- 151.
- 152.
- 153.
- 154.
- 155.
- 156.
- 157.
- 158.
- 159.
- 160.
- 161.
- 162.
- 163.
- 164.
- 165.
- 166.
- 167.
- 168.
- 169.
- 170.
- 171.
- 172.
- 173.
- 174.
- 175.
- 176.
- 177.
- 178.
- 179.
- 180.
- 181.
- 182.
- 183.
- 184.
- 185.
- 186.
- 187.
- 188.
- 189.
【编辑推荐】