728x90
출처 : http://www.supermind.org/blog/613/dom4j-xpath-tagsoup-namespaces-sweet
http://tomasblue.tistory.com/entry/Dom4j-XPath%EB%A5%BC-%EC%9D%B4%EC%9A%A9%ED%95%9C-%EC%BF%BC%EB%A6%AC-%EB%AC%B8%EC%A0%9C%EC%A0%90-%ED%95%B4%EA%B2%B0-Namespace%EA%B4%80%EB%A0%A8
// XPath xpath = XPathFactory.newInstance().newXPath(); // BufferedReader bis = new BufferedReader (new InputStreamReader ("index.html"), "UTF-8")); // XMLReader tagsoup = new org.ccil.cowan.tagsoup.Parser(); SAXReader reader = new SAXReader(tagsoup); Document doc = reader.read(bis); // 네임스페이스 제거 XPath를 이용하려면 네임스페이스를 제거해야 함 XMLUtils.fixNamespaces(doc); //XMLUtils.generateXmlFile(doc, "index.xml"); //<li class="bar">하나 DefaultText han_mean_Node = (DefaultText)ajax_pron_dlNode.selectSingleNode("//li[@class='bar']/text()[1]"); //<span class="count"> // <b>2</b>개 //</span> DefaultElement hanziCountNode = (DefaultElement)doc.selectSingleNode("//span[@class=\"count\"]/b"); //<div id="content"> // <div id="detail"> // <ul id="data1">1</ul> // <ul id="data2">2</ul> List contentList = doc.selectNodes("//div[@id='content']/div[@id='detail']/node()");XMLUtils.java
import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.util.List; import org.dom4j.Attribute; import org.dom4j.Document; import org.dom4j.Element; import org.dom4j.Namespace; import org.dom4j.Node; import org.dom4j.QName; import org.dom4j.io.OutputFormat; import org.dom4j.io.XMLWriter; public class XMLUtils { /** * @param doc DOM4J XML Document * Generate XML File */ public static void generateXmlFile(Document doc, String xmlfile) { XMLWriter writer = null; try { OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(new File(xmlfile)), "UTF-8"); OutputFormat format = OutputFormat.createPrettyPrint(); format.setEncoding("UTF-8"); writer = new XMLWriter(osw,format); writer.write(doc); } catch (IOException e) { e.printStackTrace(); } finally { if(writer != null) { try { writer.close(); } catch (IOException e) { e.printStackTrace(); } } } } /** * @param doc DOM4J XML Document * Generate XML File */ private static void generateXmlFile(Element doc, String xmlfile) { XMLWriter writer = null; try { OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(new File(xmlfile)), "UTF-8"); OutputFormat format = OutputFormat.createPrettyPrint(); format.setEncoding("UTF-8"); writer = new XMLWriter(osw,format); writer.write(doc); } catch (IOException e) { e.printStackTrace(); } finally { if(writer != null) { try { writer.close(); } catch (IOException e) { e.printStackTrace(); } } } } /** * 네임스페이스 제거 XPath를 이용하려면 네임스페이스를 제거해야 함 */ public static void fixNamespaces(Document doc) { Element root = doc.getRootElement(); if (root.getNamespace() != Namespace.NO_NAMESPACE) removeNamespaces(root.content()); } /** * 네임스페이스 원복 */ public static void unfixNamespaces(Document doc, Namespace original) { Element root = doc.getRootElement(); if (original != null) setNamespaces(root.content(), original); } /** * 목록내 모든 element의 네임스페이스 제거 */ @SuppressWarnings("unchecked") private static void removeNamespaces(List l) { setNamespaces(l, Namespace.NO_NAMESPACE); } /** * Element의 네임스페이스 설정(child포함) */ private static void setNamespaces(Element elem, Namespace ns) { setNamespace(elem, ns); setNamespaces(elem.content(), ns); } /** * 목록내 node(child포함)의 네임스페이스 설정 */ @SuppressWarnings("unchecked") private static void setNamespaces(List l, Namespace ns) { Node n = null; for (int i = 0; i < l.size(); i++) { n = (Node) l.get(i); if (n.getNodeType() == Node.ATTRIBUTE_NODE) ((Attribute) n).setNamespace(ns); if (n.getNodeType() == Node.ELEMENT_NODE) setNamespaces((Element) n, ns); } } /** * Elemnet의 네임스페이스 설정 */ private static void setNamespace(Element elem, Namespace ns) { elem.setQName(QName.get(elem.getName(), ns, elem.getQualifiedName())); } }
728x90