출처 : http://www.supermind.org/blog/613/dom4j-xpath-tagsoup-namespaces-sweet
http://tomasblue.tistory.com/entry/Dom4j-XPath%EB%A5%BC-%EC%9D%B4%EC%9A%A9%ED%95%9C-%EC%BF%BC%EB%A6%AC-%EB%AC%B8%EC%A0%9C%EC%A0%90-%ED%95%B4%EA%B2%B0-Namespace%EA%B4%80%EB%A0%A8
//
XPath xpath = XPathFactory.newInstance().newXPath();
//
BufferedReader bis = new BufferedReader (new InputStreamReader ("index.html"), "UTF-8"));
//
XMLReader tagsoup = new org.ccil.cowan.tagsoup.Parser();
SAXReader reader = new SAXReader(tagsoup);
Document doc = reader.read(bis);
// 네임스페이스 제거 XPath를 이용하려면 네임스페이스를 제거해야 함
XMLUtils.fixNamespaces(doc);
//XMLUtils.generateXmlFile(doc, "index.xml");
//<li class="bar">하나
DefaultText han_mean_Node = (DefaultText)ajax_pron_dlNode.selectSingleNode("//li[@class='bar']/text()[1]");
//<span class="count">
// <b>2</b>개
//</span>
DefaultElement hanziCountNode = (DefaultElement)doc.selectSingleNode("//span[@class=\"count\"]/b");
//<div id="content">
// <div id="detail">
// <ul id="data1">1</ul>
// <ul id="data2">2</ul>
List contentList = doc.selectNodes("//div[@id='content']/div[@id='detail']/node()");XMLUtils.java import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.List;
import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.Namespace;
import org.dom4j.Node;
import org.dom4j.QName;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.XMLWriter;
public class XMLUtils {
/**
* @param doc DOM4J XML Document
* Generate XML File
*/
public static void generateXmlFile(Document doc, String xmlfile) {
XMLWriter writer = null;
try {
OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(new File(xmlfile)), "UTF-8");
OutputFormat format = OutputFormat.createPrettyPrint();
format.setEncoding("UTF-8");
writer = new XMLWriter(osw,format);
writer.write(doc);
} catch (IOException e) {
e.printStackTrace();
} finally {
if(writer != null) {
try {
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
/**
* @param doc DOM4J XML Document
* Generate XML File
*/
private static void generateXmlFile(Element doc, String xmlfile) {
XMLWriter writer = null;
try {
OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(new File(xmlfile)), "UTF-8");
OutputFormat format = OutputFormat.createPrettyPrint();
format.setEncoding("UTF-8");
writer = new XMLWriter(osw,format);
writer.write(doc);
} catch (IOException e) {
e.printStackTrace();
} finally {
if(writer != null) {
try {
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
/**
* 네임스페이스 제거 XPath를 이용하려면 네임스페이스를 제거해야 함
*/
public static void fixNamespaces(Document doc) {
Element root = doc.getRootElement();
if (root.getNamespace() != Namespace.NO_NAMESPACE)
removeNamespaces(root.content());
}
/**
* 네임스페이스 원복
*/
public static void unfixNamespaces(Document doc, Namespace original) {
Element root = doc.getRootElement();
if (original != null)
setNamespaces(root.content(), original);
}
/**
* 목록내 모든 element의 네임스페이스 제거
*/
@SuppressWarnings("unchecked")
private static void removeNamespaces(List l) {
setNamespaces(l, Namespace.NO_NAMESPACE);
}
/**
* Element의 네임스페이스 설정(child포함)
*/
private static void setNamespaces(Element elem, Namespace ns) {
setNamespace(elem, ns);
setNamespaces(elem.content(), ns);
}
/**
* 목록내 node(child포함)의 네임스페이스 설정
*/
@SuppressWarnings("unchecked")
private static void setNamespaces(List l, Namespace ns) {
Node n = null;
for (int i = 0; i < l.size(); i++) {
n = (Node) l.get(i);
if (n.getNodeType() == Node.ATTRIBUTE_NODE)
((Attribute) n).setNamespace(ns);
if (n.getNodeType() == Node.ELEMENT_NODE)
setNamespaces((Element) n, ns);
}
}
/**
* Elemnet의 네임스페이스 설정
*/
private static void setNamespace(Element elem, Namespace ns) {
elem.setQName(QName.get(elem.getName(), ns, elem.getQualifiedName()));
}
}
ScrollViewTest.zip