名前空間ありの XML で XPath
名前空間ありの XML で XPath するときは javax.xml.namespace.NamespaceContext を実装する必要がある。めんどっ。
package tmp; import java.io.BufferedInputStream; import java.io.IOException; import java.net.HttpURLConnection; import java.net.URL; import java.util.Arrays; import java.util.Iterator; import javax.xml.namespace.NamespaceContext; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathFactory; import org.w3c.dom.Document; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; public class NSTest { public static Document getDocument(String url) throws IOException, SAXException, ParserConfigurationException { HttpURLConnection connection = null; BufferedInputStream in = null; try { connection = (HttpURLConnection) new URL(url).openConnection(); connection.setRequestMethod("GET"); in = new BufferedInputStream(connection.getInputStream()); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true); return factory.newDocumentBuilder().parse(in); } finally { if (in != null) { try { in.close(); } catch (IOException e) { // do nothing } } if (connection != null) { connection.disconnect(); } } } public static void main(String[] args) throws Exception { XPath xpath = XPathFactory.newInstance().newXPath(); xpath.setNamespaceContext(new NamespaceContext() { public String getNamespaceURI(String prefix) { if (prefix == null) throw new IllegalArgumentException(); else if ("hatena".equals(prefix)) return "http://www.hatena.ne.jp/info/xmlns#"; return null; } public String getPrefix(String namespaceURI) { if (namespaceURI == null) throw new IllegalArgumentException(); else if ("http://www.hatena.ne.jp/info/xmlns#".equals(namespaceURI)) return "hatena"; return null; } public Iterator<String> getPrefixes(String namespaceURI) { if (namespaceURI == null) throw new IllegalArgumentException(); else if ("http://www.hatena.ne.jp/info/xmlns#".equals(namespaceURI)) return Arrays.asList("hatena").iterator(); return null; } }); Document document = NSTest.getDocument("http://f.hatena.ne.jp/twitter/rss"); NodeList nodes = (NodeList) xpath.evaluate("//hatena:imageurl", document, XPathConstants.NODESET); for (int i = 0, m = nodes.getLength(); i < m; ++i) { System.out.println(nodes.item(i).getTextContent()); } } }
d:\tmp> java tmp.NSTest http://f.hatena.ne.jp/images/fotolife/t/twitter/20080413/20080413235100.jpg http://f.hatena.ne.jp/images/fotolife/t/twitter/20080413/20080413235054.jpg http://f.hatena.ne.jp/images/fotolife/t/twitter/20080413/20080413234330.jpg ...