对于现在越来越轻量级,越来越讲究速度和接近用户的应用来说,xml确实有点复杂了。解析起来不仅耗内存,而且很复杂。这就好像花了几千块钱买了个MS Office,但是80%的feature都用不着,还白白的耗着CPU和内存。个人觉得,设置文件用XML其实挺好,因为设置文件一般并不太大,而且要求可读性强,还有很多乱七八糟的需求,可以利用XML的力量。昨天搞chrome的设置,发现chrome的设置文件也是使用的json,读起来也是轻松愉快。前阵子做了个程序,需要解析豆瓣API调用返回的XML。真想说一句,豆瓣你别用XML了。至少,提供个json版的API调用吧。(以上谨代表个人观点)
解析豆瓣返回的xml,实在是不想用DOM这个重量级的玩意。DOM这个玩意,说它强大好还是说它官僚好呢。我倾向于使用SAXP解析。但是现在面临的一个问题是,我需要根据xml节点的名字和属性值(一个或者多个)来决定当前的值是不是我想要的。这就麻烦一点点。***反应是考虑xpath。后来觉得不如自己做一个得了,权当是按需定制一个轻量级的xpath。
首先定义XMLSearchUnit类,这个类的实例用来描述一个需要在XML中搜索的值,值可以是xml节点的值,或者是节点的属性。
- package com.deepnighttwo.resourceresolver.douban.resolver.utils;
- import java.util.HashMap;
- import java.util.Map;
- import org.xml.sax.Attributes;
- /**
- *
- * Represent a search task. Target could be value of a node or attribute of the
- * node.
- *
- * @author mzang
- */
- public class XMLSearchUnit {
- // attribute values to be matched during search
- private Map<String, String> attributeMatchValidation = new HashMap<String, String>();
- // if target is an attribute, then set this member to be the attribute name.
- // if it is null or empty, then means the target is node value.
- private String expectedAttr;
- // xml path, format is: /node_name/node_name/...
- private String xmlPath;
- public XMLSearchUnit(String xmlPath) {
- this.xmlPath = xmlPath;
- }
- /**
- * if current node meets the search conditions or not. Meets means the path
- * is correct and the attribute value is matched.
- *
- * @param path
- * @param attributes
- * @return
- */
- public boolean match(String path, Attributes attributes) {
- if (xmlPath.equals(path) == false) {
- return false;
- }
- for (String key : attributeMatchValidation.keySet()) {
- String exp = attributeMatchValidation.get(key);
- String compare = attributes.getValue(key);
- if (exp.equalsIgnoreCase(compare) == false) {
- return false;
- }
- }
- return true;
- }
- public Map<String, String> getAttributeMatchValidation() {
- return attributeMatchValidation;
- }
- public void addAttributeValidation(String key, String value) {
- attributeMatchValidation.put(key, value);
- }
- public String getXmlPath() {
- return xmlPath;
- }
- public void setAttributeMatchValidation(
- Map<String, String> attributeMatchValidation) {
- this.attributeMatchValidation = attributeMatchValidation;
- }
- public String getExpectedAttr() {
- return expectedAttr;
- }
- /**
- * if target is node value, then set expectedAttr to null. if target is an
- * attribute value, set it to be the attribute name.
- *
- * @param expectedAttr
- */
- public void setExpectedAttr(String expectedAttr) {
- this.expectedAttr = expectedAttr;
- }
- /**
- * hash code can be cached if all properties are not be be changed.
- */
- @Override
- public int hashCode() {
- final int prime = 31;
- int result = 1;
- result = prime
- * result
- + ((attributeMatchValidation == null) ? 0
- : attributeMatchValidation.hashCode());
- result = prime * result
- + ((expectedAttr == null) ? 0 : expectedAttr.hashCode());
- result = prime * result + ((xmlPath == null) ? 0 : xmlPath.hashCode());
- return result;
- }
- @Override
- public boolean equals(Object obj) {
- if (this == obj)
- return true;
- if (obj == null)
- return false;
- if (getClass() != obj.getClass())
- return false;
- XMLSearchUnit other = (XMLSearchUnit) obj;
- if (attributeMatchValidation == null) {
- if (other.attributeMatchValidation != null)
- return false;
- } else if (!attributeMatchValidation
- .equals(other.attributeMatchValidation))
- return false;
- if (expectedAttr == null) {
- if (other.expectedAttr != null)
- return false;
- } else if (!expectedAttr.equals(other.expectedAttr))
- return false;
- if (xmlPath == null) {
- if (other.xmlPath != null)
- return false;
- } else if (!xmlPath.equals(other.xmlPath))
- return false;
- return true;
- }
- }
这个类比较简单。就是用一个hashmap保待匹配的attribut键值对,用一个字符串表示期待的attribute name,用一个字符串表示期待的node path。
然后就是如何在SAXP里用到这个类的实例去搜索了。
- package com.deepnighttwo.resourceresolver.douban.resolver.utils;
- import java.io.InputStream;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.List;
- import java.util.Map;
- import javax.xml.parsers.SAXParser;
- import javax.xml.parsers.SAXParserFactory;
- import org.xml.sax.Attributes;
- import org.xml.sax.InputSource;
- import org.xml.sax.SAXException;
- import org.xml.sax.XMLReader;
- import org.xml.sax.helpers.DefaultHandler;
- /**
- *
- * SAXP parser working with XMLSearchUnit.
- *
- * @author mzang
- */
- public class DoubanSearchParser extends DefaultHandler {
- // create and initial search units
- public static final XMLSearchUnit DETAILS_LINK_API_PATH = new XMLSearchUnit(
- "/feed/entry/id");
- public static final XMLSearchUnit DETAILS_CONTENT_PATH = new XMLSearchUnit(
- "/entry/summary");
- public static final XMLSearchUnit DETAILS_TITLE_PATH = new XMLSearchUnit(
- "/entry/title");
- public static final XMLSearchUnit DETAILS_CHINESE_NAME_PATH = new XMLSearchUnit(
- "/entry/db:attribute");
- public static final XMLSearchUnit DETAILS_RATINGE_PATH = new XMLSearchUnit(
- "/entry/gd:rating");
- public static final XMLSearchUnit DETAILS_RATINGE_RATER_COUNT_PATH = new XMLSearchUnit(
- "/entry/gd:rating");
- public static final XMLSearchUnit DETAILS_LINK_URL_PATH = new XMLSearchUnit(
- "/feed/entry/link");
- static {
- DETAILS_LINK_URL_PATH.addAttributeValidation("rel", "alternate");
- DETAILS_LINK_URL_PATH.setExpectedAttr("href");
- DETAILS_CHINESE_NAME_PATH.addAttributeValidation("lang", "zh_CN");
- DETAILS_CHINESE_NAME_PATH.addAttributeValidation("name", "aka");
- DETAILS_RATINGE_PATH.setExpectedAttr("average");
- DETAILS_RATINGE_RATER_COUNT_PATH.setExpectedAttr("numRaters");
- }
- // a map to store the XMLSearchUnit and value
- private Map<XMLSearchUnit, String> results = new HashMap<XMLSearchUnit, String>();
- // a counter of search unit. if it is 0, then all search unit finds a match
- // value and the result of the XML will be skipped.
- private int count = 0;
- private StringBuilder path = new StringBuilder();
- private static final String pathSeparater = "/";
- private XMLSearchUnit[] searchUnits;
- List<XMLSearchUnit> foundItems = new ArrayList<XMLSearchUnit>();
- /**
- * constructor, accept XML input stream, 0 or more search unit instances.
- *
- * @param input
- * @param expectedPath
- * @return
- */
- public Map<XMLSearchUnit, String> parseResults(InputStream input,
- XMLSearchUnit... expectedPath) {
- for (XMLSearchUnit search : expectedPath) {
- results.put(search, null);
- }
- searchUnits = expectedPath;
- count = expectedPath.length;
- XMLReader xmlReader = null;
- try {
- SAXParserFactory spfactory = SAXParserFactory.newInstance();
- spfactory.setValidating(false);
- SAXParser saxParser = spfactory.newSAXParser();
- xmlReader = saxParser.getXMLReader();
- xmlReader.setContentHandler(this);
- xmlReader.parse(new InputSource(input));
- } catch (Exception e) {
- System.err.println(e);
- System.exit(1);
- }
- return results;
- }
- private void addToPath(String addPath) {
- path.append(pathSeparater).append(addPath.toLowerCase());
- }
- private void popPath() {
- int index = path.lastIndexOf(pathSeparater);
- // String removedPath = path.substring(index);
- path.delete(index, path.length());
- }
- @Override
- public void startElement(String uri, String localName, String qName,
- Attributes attributes) throws SAXException {
- foundItems.clear();
- if (count == 0) {
- return;
- }
- // update path
- addToPath(qName);
- List<XMLSearchUnit> foundAttrItems = null;
- // check if current node matches search units. if it is a node value
- // search, then store it in a member variable named foundItems because
- // the value of the node is known only when reaches the end of the
- // node.but for attribute search, it value is known here. So then are
- // put in a local variable list named foundAttrItems.
- for (XMLSearchUnit unit : searchUnits) {
- if (unit.match(path.toString(), attributes) == true) {
- if (unit.getExpectedAttr() == null) {
- foundItems.add(unit);
- } else {
- if (foundAttrItems == null) {
- foundAttrItems = new ArrayList<XMLSearchUnit>();
- }
- foundAttrItems.add(unit);
- }
- }
- }
- // if no attribute match, return.
- if (foundAttrItems == null) {
- return;
- }
- // fill search unit value using attribute value. update count.
- for (XMLSearchUnit attrUnit : foundAttrItems) {
- String attrValue = attributes.getValue(attrUnit.getExpectedAttr());
- if (results.get(attrUnit) == null) {
- count--;
- }
- results.put(attrUnit, attrValue);
- count--;
- }
- }
- /**
- * if current node matches, the the node value is useful, store it.
- */
- @Override
- public void characters(char[] ch, int start, int length)
- throws SAXException {
- if (count == 0) {
- return;
- }
- if (foundItems.size() == 0) {
- return;
- }
- for (XMLSearchUnit unit : foundItems) {
- String content = new String(ch, start, length);
- if (results.get(unit) == null) {
- count--;
- }
- results.put(unit, content);
- }
- }
- @Override
- public void endElement(String uri, String localName, String qName)
- throws SAXException {
- foundItems.clear();
- if (count == 0) {
- return;
- }
- popPath();
- }
- }
原文链接:http://www.cnblogs.com/deepnighttwo/archive/2011/03/13/1982748.html
【编辑推荐】