=Start=
缘由:
趁着用Java解析XML时学习一下XPath的语法,方便后面有需要的时候参考。
正文:
参考解答:
XPath常用语法对照表
表达式 | 说明 |
// | 表示返回XML文档中的所有符合查找条件的元素,而忽略文档中元素的位置级别 |
/ | 表示绝对路径 |
News/Links/name | 表示相对路径 |
* | 表所所有元素 |
or | 或 |
and | 与 |
其它表达式 | =,!=,<,>,>=,<= |
Text() | 文本 |
Node() | 节点 |
number last() | last 函数返回一个数字,该数字等于从表达式求值上下文中的上下文大小(即返回节点个数) |
number position() | position函数返回一个数字,该数字等于从表达式求值上下文中的上下文位置(即当前位置) |
number count(node-set) | count 函数返回在参数node-set中节点的个数。 |
boolean not(boolean) | 如果参数为真 not 函数返回真,否则返回假。 |
number number(object?) | number 函数参数依下列各项转换成数字 |
//* | 获得所有节点 |
../* | 获得当前节点的父节点的所有节点 |
//Links | 获得所有的Links节点 |
//Links[name=”网易”] | 获得子节点name的文本为“网易”的所有Links节点 |
//Links[@id=”1”] | 获得属性ID=1的所有Links节点 |
//*[name=”新浪”] | 获得子节点name的文本为“新浪”的所有节点 |
//*[@id=”1”] | 获得属性ID=1的所有节点 |
//*[@id] | 获得存在属性ID的所有节点点 |
//*[name] | 获得存在子节点name的所有节点 |
//Links[序号] | 获得返回的N个Links节点中的第序号个节点 |
//Links[1 or 2] | 获得返回的N个Links节点的中第一个和第二个节点 |
//*[name=”网易” and @id=”1”] | 获得所有子点节name的文本为“网易” 且自己的属生id=“1”的节点 |
//text() | 选择所有的正文节点(即叶子节点) |
//Links[position()=last()] | 获得返回的N个Links节点中的最后一个节点等同于//Links[last()] |
//*[contains(name,”新”)] | 获得子节点name的文本中包含“新”的所有节点 |
//Links[1] | 获得返回的N个Links节点中的第一个节点 |
//Links[1]/name[1] | 获得第一个Links的第一个name节点 |
//Links/name | 获得所有Links节点下的所有name节点 |
//*[@id>1] | 获得属性ID>1的所有节点 |
//*[number(@id)+1>1] | 获得属生ID的值加1>1的所有节点 |
//*[number(text())>1] | 获得文本节点的文本值大于1的所有节点 |
//*[(number(text()) mode 2)=1] | 获得文本节点的文本值为基数的所有节点 |
当节点/元素名称中包含分号(:)时该怎么处理?
或者考虑忽略namespace
//*[name()='media:thumbnail']
或者考虑先用个粗略的过滤方法:
/item/*[local-name()='thumbnail']
Java中使用XPath的样例
package com.example;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import java.io.File;
import java.io.StringReader;
/**
* @author ixyzero
* Created on 2022-05-08
*/
public class opXml2 {
public static void main(String[] args) {
try {
Document document = loadXMLFromString(getXMLString());
System.out.println(document.getDocumentElement()); //
System.out.println(document.getDocumentElement().getNodeName()); //
System.out.println(document.getDocumentElement().getAttributes().getLength()); //
System.out.println(Node.ELEMENT_NODE); // 元素节点ELEMENT_NODE的type对应数值为 1
System.out.println(document.getDocumentElement().getChildNodes().getLength()); // 一级节点(根结点下一层)的个数
// 遍历一级节点(根结点下一层)
NodeList nodeList = document.getDocumentElement().getChildNodes();
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
System.out.println(node.getNodeName() + "\t" + node.getNodeType());
}
// 遍历特定tag的元素并打印出其子元素类型为ELEMENT_NODE的子元素的类型和节点名称
NodeList nList = document.getElementsByTagName("p:sp");
for (int i = 0; i < nList.getLength(); i++) {
Node nNode = nList.item(i);
System.out.println(String.format("\nCurrent Element: %s, childNode count: %d",
nNode.getNodeName(), nNode.getChildNodes().getLength()));
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
for (int j = 0; j < eElement.getChildNodes().getLength(); j++) {
System.out.println(eElement.getChildNodes().item(j).getNodeType() + "\t" + eElement.getChildNodes().item(j).getNodeName());
}
}
}
/*
//* -> 获得所有节点
//*[@cx] -> 获得存在属性cx的所有节点
//*[@cx="1"] -> 获得存在属性cx且值为1的所有节点
//*[name()='p:sp'] -> 获得所有p:sp节点
//*[name()='p:sp'][1] -> 获得第一个p:sp节点
//*[local-name()='sp'] -> *:sp
//element_name -> 获得所有element_name节点,如果element_name字符串中没有冒号(:)的话直接用这种方式就OK
//book/title -> 获得所有book节点下的所有title节点
book/title -> 0 node
/book/title -> 0 node
//book/title -> 4 nodes
inventory/book/title -> 3 nodes
/inventory/book/title -> 3 nodes
*/
XPath xPath = XPathFactory.newInstance().newXPath();
String expression = "/inventory/book/title";
System.out.println(expression);
NodeList nodeList1 = (NodeList) xPath.compile(expression).evaluate(document, XPathConstants.NODESET);
System.out.println(nodeList1.getLength());
for (int i = 0; i < nodeList1.getLength(); i++) {
Node nNode = nodeList1.item(i);
System.out.println(String.format("Current Element: %s, node content: %s",
nNode.getNodeName(), nNode.getTextContent()));
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static Document loadXMLFromFile(String filePath) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
return builder.parse(new File(filePath));
}
public static Document loadXMLFromString(String xml) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
InputSource is = new InputSource(new StringReader(xml));
return builder.parse(is);
}
public static String getXMLString() {
return "<inventory id=\"123\" name=\"test\" year=\"2022\">\n" +
" <book year=\"2020\">\n" +
" <title>Snow Crash</title>\n" +
" <author>Neal Stephenson</author>\n" +
" <publisher>Spectra</publisher>\n" +
" <isbn>0553380958</isbn>\n" +
" <price>14.95</price>\n" +
" </book>\n" +
" <book year=\"2015\">\n" +
" <title>Burning Tower</title>\n" +
" <author>Larry Niven</author>\n" +
" <publisher>Pocket</publisher>\n" +
" <isbn>0743416910</isbn>\n" +
" <price>5.99</price>\n" +
" </book>\n" +
" <book year=\"1995\">\n" +
" <title>Zodiac</title>\n" +
" <author>Neal Stephenson</author>\n" +
" <publisher>Spectra</publisher>\n" +
" <isbn>0553573862</isbn>\n" +
" <price>7.50</price>\n" +
" </book>\n" +
"\n" +
" <p:cSld>\n" +
" <book year=\"1995\">\n" +
" <title>Zodiac</title>\n" +
" <author>Neal Stephenson</author>\n" +
" <publisher>Spectra</publisher>\n" +
" <isbn>0553573862</isbn>\n" +
" <price>7.50</price>\n" +
" </book>\n" +
" <p:spTree>\n" +
" <p:nvGrpSpPr>\n" +
" <p:cNvPr id=\"1\" name=\"\"/>\n" +
" <p:cNvGrpSpPr/>\n" +
" <p:nvPr/>\n" +
" </p:nvGrpSpPr>\n" +
" <p:grpSpPr>\n" +
" <a:xfrm>\n" +
" <a:off x=\"0\" y=\"0\"/>\n" +
" <a:ext cx=\"0\" cy=\"0\"/>\n" +
" <a:chOff x=\"0\" y=\"0\"/>\n" +
" <a:chExt cx=\"0\" cy=\"0\"/>\n" +
" </a:xfrm>\n" +
" </p:grpSpPr>\n" +
" <p:sp>\n" +
" <p:nvSpPr>\n" +
" <p:cNvSpPr>\n" +
" <a:spLocks noGrp=\"true\"/>\n" +
" </p:cNvSpPr>\n" +
" <p:nvPr>\n" +
" <p:ph type=\"ctrTitle\"/>\n" +
" </p:nvPr>\n" +
" </p:nvSpPr>\n" +
" <p:spPr/>\n" +
" <p:txBody>\n" +
" <a:bodyPr/>\n" +
" <a:lstStyle/>\n" +
" <a:p>\n" +
" <a:r>\n" +
" <a:rPr kumimoji=\"true\" lang=\"en-US\" altLang=\"zh-CN\" dirty=\"false\"/>\n" +
" <a:t>This is title</a:t>\n" +
" </a:r>\n" +
" <a:endParaRPr kumimoji=\"true\" lang=\"zh-CN\" altLang=\"en-US\" dirty=\"false\"/>\n" +
" </a:p>\n" +
" </p:txBody>\n" +
" </p:sp>\n" +
" <p:sp>\n" +
" <p:txBody>\n" +
" <a:bodyPr/>\n" +
" <a:lstStyle/>\n" +
" <a:p>\n" +
" <a:r>\n" +
" <a:rPr kumimoji=\"true\" lang=\"en-US\" altLang=\"zh-CN\" dirty=\"false\"/>\n" +
" <a:t>Subtitle here</a:t>\n" +
" </a:r>\n" +
" <a:endParaRPr kumimoji=\"true\" lang=\"zh-CN\" altLang=\"en-US\" dirty=\"false\"/>\n" +
" </a:p>\n" +
" </p:txBody>\n" +
" </p:sp>\n" +
" <p:pic>\n" +
" <p:nvPicPr>\n" +
" <p:cNvPr id=\"4136\" name=\"Picture 1\" descr=\"1.png\"/>\n" +
" <p:nvPr/>\n" +
" </p:nvPicPr>\n" +
" <p:blipFill>\n" +
" <a:blip cstate=\"print\" r:link=\"rId3\"/>\n" +
" </p:blipFill>\n" +
" </p:pic>\n" +
" </p:spTree>\n" +
" <p:extLst>\n" +
" <p:ext uri=\"{BB962C8B-B14F-4D97-AF65-F5344CB8AC3E}\">\n" +
" <p14:creationId val=\"3160098309\"/>\n" +
" </p:ext>\n" +
" </p:extLst>\n" +
" </p:cSld>\n" +
"</inventory>";
}
}
参考链接:
XML 元素
https://www.runoob.com/xml/xml-elements.html
Intro to XPath with Java
https://www.baeldung.com/java-xpath
Java XPath API语法教程 # XPath常用语法
http://www.51gjie.com/java/747.html
XPath query for XML node with colon in node name
https://newbedev.com/xpath-query-for-xml-node-with-colon-in-node-name
Use XPath to parse element name containing a colon
https://stackoverflow.com/questions/4282147/use-xpath-to-parse-element-name-containing-a-colon
XML Namespaces and How They Affect XPath and XSLT
https://docs.microsoft.com/en-us/previous-versions/dotnet/articles/ms950779(v=msdn.10)?redirectedfrom=MSDN
=END=