=Start=
缘由:
简单记录一下对pptx文件进行简单读写的方法,方便有需要的参考。
正文:
参考解答:
直接看代码吧,简单直接,如果对OOXML的格式规范有一定了解的话理解起来会更容易。
package com.example;
import org.docx4j.TraversalUtil;
import org.docx4j.XmlUtils;
import org.docx4j.dml.CTTextBody;
import org.docx4j.dml.CTTextParagraph;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.PresentationMLPackage;
import org.docx4j.openpackaging.parts.PresentationML.SlidePart;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;
import org.pptx4j.jaxb.Context;
import org.pptx4j.pml.Shape;
import java.io.File;
import java.util.List;
/**
* @author ixyzero
* Created on 2022-05-04
*/
public class opOfficePptx3 {
public static void main(String[] args) {
String filePath = "new-test.pptx";
try {
PresentationMLPackage presentationMLPackage = PresentationMLPackage.load(new File(filePath));
// traverse pptx content
// printPptxContent(presentationMLPackage);
printPptxContentStr(presentationMLPackage);
// add hidden text
textMark(presentationMLPackage, "test 98308");
// traverse pptx content again for comparison
printPptxContentStr(presentationMLPackage);
presentationMLPackage.save(new File("new-test-pptx3.pptx")); // mark then save
} catch (Docx4JException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
return;
}
private static void printPptxContentStr(PresentationMLPackage pptxPackage) throws Exception {
String tag = "a:t";
// 对于一个pptx文件来说,解压之后在 ppt/slides/ 下面的每一个文件 slideN.xml 就是一个页面,也是一个单独的XML文件
// 所以如果想要获取pptx中所有的页面内容的话,需要遍历每一个slide然后解析并提取其中的特定节点的内容
for (int i=0; i<pptxPackage.getMainPresentationPart().getSlideCount(); i++) {
SlidePart slide = pptxPackage.getMainPresentationPart().getSlide(i);
System.out.println(String.format("\nthis is slide %d:", i));
// traverse xml content via jsoup
String xmlStr = slide.getXML();
//System.out.println(xmlStr); // print all xml content
Document doc = Jsoup.parse(xmlStr, "", Parser.xmlParser());
System.out.println(doc.getElementsByTag(tag)); // <a:t>xxx</a:t> 原来这个叫tag而不是id/css query selector
}
return;
}
// TraverseSlide.java (https://github.com/plutext/docx4j/tree/VERSION_11_4_6/docx4j-samples-pptx4j)
private static void printPptxContent(PresentationMLPackage pptxPackage) throws Exception {
for (int i=0; i<pptxPackage.getMainPresentationPart().getSlideCount(); i++) {
SlidePart slide = pptxPackage.getMainPresentationPart().getSlide(i);
System.out.println(String.format("\nthis is slide %d:", i));
// 下面的这段代码内容其实可以简单的用打印 slide.getXML() 来替代……
new TraversalUtil(slide.getJaxbElement().getCSld().getSpTree().getSpOrGrpSpOrGraphicFrame(),
new TraversalUtil.Callback() {
String indent = "";
// @Override
public List<Object> apply(Object o) {
String text = "";
try {
System.out.println(indent + o.getClass().getName() + "\n\n" + XmlUtils.marshaltoString(o, true, org.pptx4j.jaxb.Context.jcPML));
} catch (RuntimeException me) {
System.out.println(indent + o.getClass().getName());
}
if (o instanceof org.pptx4j.pml.Shape) {
CTTextBody txBody = ((org.pptx4j.pml.Shape) o).getTxBody();
if (txBody != null) {
for (CTTextParagraph tp : txBody.getP()) {
System.out.println(indent + tp.getClass().getName() + "\n\n" + XmlUtils.marshaltoString(tp, true, true, org.pptx4j.jaxb.Context.jcPML,
"http://schemas.openxmlformats.org/presentationml/2006/main", "txBody", CTTextParagraph.class));
}
}
}
return null;
}
// @Override
public boolean shouldTraverse(Object o) {
return true;
}
// Depth first
// @Override
public void walkJAXBElements(Object parent) {
indent += " ";
List children = getChildren(parent);
if (children != null) {
for (Object o : children) {
// if its wrapped in javax.xml.bind.JAXBElement, get its
// value
o = XmlUtils.unwrap(o);
this.apply(o);
if (this.shouldTraverse(o)) {
walkJAXBElements(o);
}
}
}
indent = indent.substring(0, indent.length() - 4);
}
// @Override
public List<Object> getChildren(Object o) {
return TraversalUtil.getChildrenImpl(o);
}
}
);
}
}
private static void textMark(PresentationMLPackage pptxPackage, String text) throws Exception {
Shape shape = ((Shape) XmlUtils.unmarshalString(
getHiddenShape(text), Context.jcPML) );
for (int i = 0; i < pptxPackage.getMainPresentationPart().getSlideCount(); i++) {
if(i % 2 == 0) {
SlidePart slidePart = pptxPackage.getMainPresentationPart().getSlide(i);
slidePart.getJaxbElement().getCSld().getSpTree().getSpOrGrpSpOrGraphicFrame().add(shape);
}
}
return;
}
private static String getHiddenShape(String hiddenStr) {
return "<p:sp xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:p=\"http://schemas.openxmlformats.org/presentationml/2006/main\">" +
"<p:nvSpPr>\n" +
"<p:cNvPr id=\"2001\" name=\"Shape 2001\"/>\n" + // p:cNvPr -> id/name
"<p:cNvSpPr txBox=\"true\"/>\n" +
"<p:nvPr/>\n" +
"</p:nvSpPr>\n" +
"<p:spPr>\n" +
"<a:xfrm>\n" +
"<a:off x=\"-100\" y=\"0\"/>\n" + // a:off-x/y
"<a:ext cx=\"16999\" cy=\"16999\"/>\n" + // a:ext-cx/cy
"</a:xfrm>\n" +
"<a:prstGeom prst=\"rect\">\n" +
"<a:avLst/>\n" +
"</a:prstGeom>\n" +
"</p:spPr>\n" +
"<p:txBody>\n" +
"<a:bodyPr lIns=\"16158\" tIns=\"16158\" rIns=\"16158\" bIns=\"16158\" anchor=\"t\" anchorCtr=\"false\">\n" +
"<a:noAutofit/>\n" +
"</a:bodyPr>\n" +
"<a:lstStyle/>\n" +
"<a:p>\n" +
"<a:pPr>\n" +
"<a:spcBef>\n" +
"<a:spcPts val=\"0\"/>\n" +
"</a:spcBef>\n" +
"<a:buNone/>\n" +
"</a:pPr>\n" +
"<a:r>\n" +
"<a:rPr lang=\"en\" sz=\"100\" dirty=\"false\">\n" +
"<a:solidFill>\n" +
"<a:schemeClr val=\"lt1\">\n" +
"<a:alpha val=\"1000\"/>\n" +
"</a:schemeClr>\n" +
"</a:solidFill>\n" +
"</a:rPr>\n" +
"<a:t>" + hiddenStr + "</a:t>\n" +
"</a:r>\n" +
"</a:p>\n" +
"</p:txBody>\n" +
"</p:sp>";
}
}
参考链接:
p:cNvPr -> Non-Visual Drawing Properties
http://www.datypic.com/sc/ooxml/e-p_cNvPr-1.html
p:nvSpPr -> Non-Visual Properties for a Shape
http://www.datypic.com/sc/ooxml/e-p_nvSpPr-1.html
p:nvPicPr -> Non-Visual Properties for a Picture
http://www.datypic.com/sc/ooxml/e-p_nvPicPr-1.html
a:xfrm -> 2D Transform for Individual Objects
http://www.datypic.com/sc/ooxml/e-a_xfrm-4.html
a:off -> Offset (X-Axis/Y-Axis Coordinate)
http://www.datypic.com/sc/ooxml/e-a_off-1.html
a:ext -> Extent Length/Width
http://www.datypic.com/sc/ooxml/e-a_ext-2.html
=END=
《“用 docx4j 对pptx文档进行简单读写”》 有 1 条评论
https://blog.51cto.com/topic/docx4j.html
使用Docx4j操作PPT指南系列(一)
https://blog.51cto.com/williamx/758955
https://blog.51cto.com/williamx/760870
https://blog.51cto.com/williamx/769277
https://blog.51cto.com/williamx/771179
https://blog.51cto.com/williamx/771202