下面列出了怎么用org.w3c.dom.DocumentFragment的API类实例代码及写法,或者点击链接到github查看源代码。
/**
* Use RunFontSelector to determine the correct font for the list item label.
*
* @param context
* @param foListItemLabelBody
* @param pPr
* @param rPr
* @param text
*/
protected static void setFont(RunFontSelector runFontSelector, Element foListItemLabelBody, PPr pPr, RPr rPr, String text) {
DocumentFragment result = (DocumentFragment)runFontSelector.fontSelector(pPr, rPr, text);
log.debug(XmlUtils.w3CDomNodeToString(result));
// eg <fo:inline xmlns:fo="http://www.w3.org/1999/XSL/Format" font-family="Times New Roman">1)</fo:inline>
// Now get the attribute value
if (result!=null && result.getFirstChild()!=null) {
Attr attr = ((Element)result.getFirstChild()).getAttributeNode("font-family");
if (attr!=null) {
foListItemLabelBody.setAttribute("font-family", attr.getValue());
}
}
}
/**
* Parse the specified input stream in a DOM DocumentFragment, owned by the specified Document.
*
* @param input the InputStream to parse
* @param owningDocument the Document which will own the returned DocumentFragment
* @return a DocumentFragment
* @throws DecryptionException thrown if there is an error parsing the input stream
*/
private DocumentFragment parseInputStream(InputStream input, Document owningDocument) throws DecryptionException {
// Since Xerces currently seems not to handle parsing into a DocumentFragment
// without a bit hackery, use this to simulate, so we can keep the API
// the way it hopefully will look in the future. Obviously this only works for
// input streams containing valid XML instances, not fragments.
Document newDocument = null;
try {
newDocument = parserPool.parse(input);
} catch (XMLParserException e) {
log.error("Error parsing decrypted input stream", e);
throw new DecryptionException("Error parsing input stream", e);
}
Element element = newDocument.getDocumentElement();
owningDocument.adoptNode(element);
DocumentFragment container = owningDocument.createDocumentFragment();
container.appendChild(element);
return container;
}
public ParseResult filter(Content content, ParseResult parseResult,
HTMLMetaTags metaTags, DocumentFragment doc) {
Parse parse = parseResult.get(content.getUrl());
String url = content.getBaseUrl();
ArrayList outlinks = new ArrayList();
walk(doc, parse, metaTags, url, outlinks);
if (outlinks.size() > 0) {
Outlink[] old = parse.getData().getOutlinks();
String title = parse.getData().getTitle();
List list = Arrays.asList(old);
outlinks.addAll(list);
ParseStatus status = parse.getData().getStatus();
String text = parse.getText();
Outlink[] newlinks = (Outlink[])outlinks.toArray(new Outlink[outlinks.size()]);
ParseData parseData = new ParseData(status, title, newlinks,
parse.getData().getContentMeta(),
parse.getData().getParseMeta());
// replace original parse obj with new one
parseResult.put(content.getUrl(), new ParseText(text), parseData);
}
return parseResult;
}
@Override
public void filter(String URL, byte[] content, DocumentFragment doc,
ParseResult parse) {
ParseData parseData = parse.get(URL);
Metadata metadata = parseData.getMetadata();
if (copyKeyName != null) {
String signature = metadata.getFirstValue(key_name);
if (signature != null) {
metadata.setValue(copyKeyName, signature);
}
}
byte[] data = null;
if (useText) {
String text = parseData.getText();
if (StringUtils.isNotBlank(text)) {
data = text.getBytes(StandardCharsets.UTF_8);
}
} else {
data = content;
}
if (data == null) {
data = URL.getBytes(StandardCharsets.UTF_8);
}
String hex = DigestUtils.md5Hex(data);
metadata.setValue(key_name, hex);
}
private Node appendNewChild(XMLStreamReader reader, boolean wrapped, Document contextDocument,
XMLStreamWriter writer, Element element) throws XMLStreamException {
StaxUtils.copy(reader, writer);
DocumentFragment result = contextDocument.createDocumentFragment();
Node child = element.getFirstChild();
if (wrapped) {
child = child.getFirstChild();
}
if (child != null && child.getNextSibling() == null) {
return child;
}
while (child != null) {
Node nextChild = child.getNextSibling();
result.appendChild(child);
child = nextChild;
}
return result;
}
/**
* 将 html 格式的文本过滤掉标签.
* @param html
* html 格式的字符串
* @return String
* 过滤掉 html 标签后的文本。如果 html 为空,返回空串""
*/
private String htmlToText(String html) {
if (html == null) {
return "";
}
DOMFragmentParser parser = new DOMFragmentParser();
CoreDocumentImpl codeDoc = new CoreDocumentImpl();
InputSource inSource = new InputSource(new ByteArrayInputStream(html.getBytes()));
inSource.setEncoding(textCharset);
DocumentFragment doc = codeDoc.createDocumentFragment();
try {
parser.parse(inSource, doc);
} catch (Exception e) {
return "";
}
textBuffer = new StringBuffer();
processNode(doc);
return textBuffer.toString();
}
/**
* 将 html 格式的文本过滤掉标签.
* @param html
* html 格式的字符串
* @return String
* 过滤掉 html 标签后的文本。如果 html 为空,返回空串""
*/
private String htmlToText(String html) {
if (html == null) {
return "";
}
DOMFragmentParser parser = new DOMFragmentParser();
CoreDocumentImpl codeDoc = new CoreDocumentImpl();
InputSource inSource = new InputSource(new ByteArrayInputStream(html.getBytes()));
inSource.setEncoding(textCharset);
DocumentFragment doc = codeDoc.createDocumentFragment();
try {
parser.parse(inSource, doc);
} catch (Exception e) {
return "";
}
textBuffer = new StringBuffer();
processNode(doc);
return textBuffer.toString();
}
/** Try to find the document's language from page headers and metadata */
private String detectLanguage(Parse page, DocumentFragment doc) {
String lang = getLanguageFromMetadata(page.getData().getParseMeta());
if (lang == null) {
LanguageParser parser = new LanguageParser(doc);
lang = parser.getLanguage();
}
if (lang != null) {
return lang;
}
lang = page.getData().getContentMeta().get(Response.CONTENT_LANGUAGE);
return lang;
}
public final void testDocumentType() throws ParseException {
String[] docTypes = {
"<!DOCTYPE html PUBLIC "
+ "\"-//W3C//DTD HTML 4.01 Transitional//EN\" "
+ "\"http://www.w3.org/TR/html4/loose.dtd\">",
"<!DOCTYPE html PUBLIC "
+ "\"-//W3C//DTD XHTML 1.0 Transitional//EN\" "
+ "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">",
"<!DOCTYPE html>"
};
for (String docType : docTypes) {
Document doc = DomParser.makeDocument(DoctypeMaker.parse(docType), null);
DocumentFragment html = html(fromString("<html><b>my text</b></html>"));
doc.appendChild(doc.adoptNode(html));
MoreAsserts.assertStartsWith(docType,
Nodes.render(doc.getDoctype(), html, null));
}
}
/**
* Given one or two nodes, see if the two can be combined.
* If two are passed in, they might be combined into one and returned, or
* the first will be appended to parent, and the other returned.
*/
private Node appendNormalized(
Node pending, Node current, DocumentFragment parent) {
if (pending == null) { return current; }
if (pending.getNodeType() != Node.TEXT_NODE
|| current.getNodeType() != Node.TEXT_NODE) {
parent.appendChild(pending);
return current;
}
Text a = (Text) pending, b = (Text) current;
Text combined = doc.createTextNode(a.getTextContent() + b.getTextContent());
if (needsDebugData) {
Nodes.setFilePositionFor(
combined,
FilePosition.span(
Nodes.getFilePositionFor(a),
Nodes.getFilePositionFor(b)));
Nodes.setRawText(combined, Nodes.getRawText(a) + Nodes.getRawText(b));
}
return combined;
}
public Element getElement(DOMResult r) {
// JAXP spec is ambiguous about what really happens in this case,
// so work defensively
Node n = r.getNode();
if( n instanceof Document ) {
return ((Document)n).getDocumentElement();
}
if( n instanceof Element )
return (Element)n;
if( n instanceof DocumentFragment )
return (Element)n.getChildNodes().item(0);
// if the result object contains something strange,
// it is not a user problem, but it is a JAXB provider's problem.
// That's why we throw a runtime exception.
throw new IllegalStateException(n.toString());
}
public final void testIssue1211XmlnsOnDiv() throws Exception {
DocumentFragment f = htmlFragment(fromString(
""
+ "<div xmlns:os=\"http://ns.opensocial.org/2008/markup\">\n"
+ " <os:ViewerRequest key=\"viewer\"/>\n"
+ "</div>"));
assertEquals(
""
+ "<div xmlns:os=\"http://ns.opensocial.org/2008/markup\">\n"
+ " <os:ViewerRequest key=\"viewer\">\n"
+ "</os:ViewerRequest></div>",
Nodes.render(f));
}
/**
* @deprecated Replaced by {@link #fromInputStream(java.io.InputStream, com.sandflow.smpte.regxml.dict.DefinitionResolver,
* com.sandflow.smpte.regxml.FragmentBuilder.AUIDNameResolver, com.sandflow.util.events.EventHandler,
* com.sandflow.smpte.util.UL, org.w3c.dom.Document)}
* This constructor does not allow the
* caller to provide an event handler, and instead uses java.util.logging to
* output events.
*
* @param mxfpartition MXF partition, including the Partition Pack. Must not be null.
* @param defresolver MetaDictionary definitions. Must not be null.
* @param enumnameresolver Allows the local name of extendible enumeration
* values to be inserted as comments. May be null.
* @param rootclasskey Root class of Fragment. The Preface class is used if null.
* @param document DOM for which the Document Fragment is created. Must not be null.
*
* @return Document Fragment containing a single RegXML Fragment
*
* @throws IOException
* @throws KLVException
* @throws com.sandflow.smpte.regxml.MXFFragmentBuilder.MXFException
* @throws com.sandflow.smpte.regxml.FragmentBuilder.RuleException
*/
public static DocumentFragment fromInputStream(InputStream mxfpartition,
DefinitionResolver defresolver,
FragmentBuilder.AUIDNameResolver enumnameresolver,
UL rootclasskey,
Document document) throws IOException, KLVException, MXFException, FragmentBuilder.RuleException {
EventHandler handler = new EventHandler() {
@Override
public boolean handle(Event evt) {
switch (evt.getSeverity()) {
case ERROR:
case FATAL:
LOG.severe(evt.getMessage());
break;
case INFO:
LOG.info(evt.getMessage());
break;
case WARN:
LOG.warning(evt.getMessage());
}
return true;
}
};
return fromInputStream(mxfpartition,
defresolver,
enumnameresolver,
handler,
rootclasskey,
document);
}
private DocumentFragment html(String source) {
try {
return htmlFragment(fromString(source));
} catch (ParseException e) {
throw new RuntimeException(e);
}
}
/**
* Serializes the DOM document fragmnt using the previously specified
* writer and output format. Throws an exception only if
* an I/O exception occured while serializing.
*
* @param elem The element to serialize
* @throws IOException An I/O exception occured while
* serializing
*/
public void serialize( DocumentFragment frag )
throws IOException
{
reset();
prepare();
serializeNode( frag );
_printer.flush();
if ( _printer.getException() != null )
throw _printer.getException();
}
/**
* Unimplemented. See org.w3c.dom.Document
*
* @return null
*/
public DocumentFragment createDocumentFragment()
{
error(XMLErrorResources.ER_FUNCTION_NOT_SUPPORTED);
return null;
}
public DocumentFragment asDocumentFragment() throws OpenRDFException,
TransformerException, IOException, ParserConfigurationException {
Document doc = asDocument();
DocumentFragment frag = doc.createDocumentFragment();
frag.appendChild(doc.getDocumentElement());
return frag;
}
public void onMessage(Message jmsMessage) {
try {
TextMessage text = (TextMessage) jmsMessage;
XMLStreamReader reader = StaxUtils.createXMLStreamReader(new StringReader(text.getText()));
Notify notify = (Notify) jaxbContext.createUnmarshaller()
.unmarshal(reader);
reader.close();
for (Iterator<NotificationMessageHolderType> ith = notify.getNotificationMessage().iterator();
ith.hasNext();) {
NotificationMessageHolderType h = ith.next();
Object content = h.getMessage().getAny();
if (!(content instanceof Element)) {
DocumentFragment doc = DOMUtils.getEmptyDocument().createDocumentFragment();
jaxbContext.createMarshaller().marshal(content, doc);
content = DOMUtils.getFirstElement(doc);
}
if (!doFilter((Element) content)) {
ith.remove();
} else {
h.setTopic(topic);
h.setSubscriptionReference(getEpr());
}
}
if (!notify.getNotificationMessage().isEmpty()) {
doNotify(notify);
}
} catch (Exception e) {
LOGGER.log(Level.WARNING, "Error notifying consumer", e);
}
}
public void testAddNamespaceElement() throws Exception {
String xml = "<a:Box xmlns:a=\"http://example.org/a#\" required=\"true\"><a:widget size=\"10\"> </a:widget><a:grommit id=\"23\"> text </a:grommit></a:Box>";
Document doc = parse(xml);
ObjectFactory of = con.getObjectFactory();
Entity entity = con.addDesignation(of.createObject(), Entity.class);
DocumentFragment frag = doc.createDocumentFragment();
frag.appendChild(doc.getDocumentElement());
entity.setXML(frag);
RepositoryResult<Statement> resuts = con.getStatements(entity.getResource(), pred, null);
String label = resuts.next().getObject().stringValue();
resuts.close();
assertEquals(xml, label);
}
public final void testIssue1211XmlnsOnScript() throws Exception {
DocumentFragment f = htmlFragment(fromString(
""
+ "<script type=\"text/os-data\"\n"
+ " xmlns:os=\"http://ns.opensocial.org/2008/markup\">\n"
+ " <os:ViewerRequest key=\"viewer\"/>\n"
+ "</script>"));
assertEquals(
""
+ "<script type=\"text/os-data\""
+ " xmlns:os=\"http://ns.opensocial.org/2008/markup\">\n"
+ " <os:ViewerRequest key=\"viewer\"/>\n"
+ "</script>",
Nodes.render(f));
}
private void buildEssenceDescriptorList(List<String> uuidList, IMFErrorLogger imfErrorLogger) throws IOException{
try {
List<EssenceDescriptorBaseType> essenceDescriptorList = this.cplRoot.getEssenceDescriptorList().getEssenceDescriptor();
List<InterchangeObject.InterchangeObjectBO> essenceDescriptors = this.imfTrackFileReader.getEssenceDescriptors(imfErrorLogger);
for(InterchangeObject.InterchangeObjectBO essenceDescriptor : essenceDescriptors) {
KLVPacket.Header essenceDescriptorHeader = essenceDescriptor.getHeader();
List<KLVPacket.Header> subDescriptorHeaders = this.imfTrackFileReader.getSubDescriptorKLVHeader(essenceDescriptor, imfErrorLogger);
/*Create a dom*/
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
Document document = docBuilder.newDocument();
EssenceDescriptorBaseType essenceDescriptorBaseType = new EssenceDescriptorBaseType();
String uuid = IMFUUIDGenerator.getInstance().getUrnUUID();
essenceDescriptorBaseType.setId(uuid);
uuidList.add(uuid);
DocumentFragment documentFragment = this.getEssenceDescriptorAsDocumentFragment(document, essenceDescriptorHeader, subDescriptorHeaders, imfErrorLogger);
Node node = documentFragment.getFirstChild();
essenceDescriptorBaseType.getAny().add(node);
essenceDescriptorList.add(essenceDescriptorBaseType);
}
}
catch(ParserConfigurationException e){
imfErrorLogger.addError(IMFErrorLogger.IMFErrors.ErrorCodes.IMF_CPL_ERROR, IMFErrorLogger.IMFErrors
.ErrorLevels.FATAL, e.getMessage());
throw new IMFException(e);
}
}
public void print(Node node) throws XMLStreamException {
switch (node.getNodeType()) {
case Node.DOCUMENT_NODE:
visitDocument((Document) node);
break;
case Node.DOCUMENT_FRAGMENT_NODE:
visitDocumentFragment((DocumentFragment) node);
break;
case Node.ELEMENT_NODE:
visitElement((Element) node);
break;
case Node.TEXT_NODE:
visitText((Text) node);
break;
case Node.CDATA_SECTION_NODE:
visitCDATASection((CDATASection) node);
break;
case Node.PROCESSING_INSTRUCTION_NODE:
visitProcessingInstruction((ProcessingInstruction) node);
break;
case Node.ENTITY_REFERENCE_NODE:
visitReference((EntityReference) node);
break;
case Node.COMMENT_NODE:
visitComment((Comment) node);
break;
case Node.DOCUMENT_TYPE_NODE:
break;
case Node.ATTRIBUTE_NODE:
case Node.ENTITY_NODE:
default:
throw new XMLStreamException("Unexpected DOM Node Type "
+ node.getNodeType()
);
}
}
/**
* Scan the HTML document looking at possible indications of content
* language<br>
* <li>1. html lang attribute
* (http://www.w3.org/TR/REC-html40/struct/dirlang.html#h-8.1) <li>2. meta
* dc.language
* (http://dublincore.org/documents/2000/07/16/usageguide/qualified
* -html.shtml#language) <li>3. meta http-equiv (content-language)
* (http://www.w3.org/TR/REC-html40/struct/global.html#h-7.4.4.2) <br>
*/
public ParseResult filter(Content content, ParseResult parseResult,
HTMLMetaTags metaTags, DocumentFragment doc) {
String lang = null;
Parse parse = parseResult.get(content.getUrl());
if (detect >= 0 && identify < 0) {
lang = detectLanguage(parse, doc);
} else if (detect < 0 && identify >= 0) {
lang = identifyLanguage(parse);
} else if (detect < identify) {
lang = detectLanguage(parse, doc);
if (lang == null) {
lang = identifyLanguage(parse);
}
} else if (identify < detect) {
lang = identifyLanguage(parse);
if (lang == null) {
lang = detectLanguage(parse, doc);
}
} else {
LOG.warn("No configuration for language extraction policy is provided");
return parseResult;
}
if (lang != null) {
parse.getData().getParseMeta().set(Metadata.LANGUAGE, lang);
return parseResult;
}
return parseResult;
}
private DocumentFragment parseTagSoup(InputSource input) throws Exception {
HTMLDocumentImpl doc = new HTMLDocumentImpl();
DocumentFragment frag = doc.createDocumentFragment();
DOMBuilder builder = new DOMBuilder(doc, frag);
org.ccil.cowan.tagsoup.Parser reader = new org.ccil.cowan.tagsoup.Parser();
reader.setContentHandler(builder);
reader.setFeature(org.ccil.cowan.tagsoup.Parser.ignoreBogonsFeature, true);
reader.setFeature(org.ccil.cowan.tagsoup.Parser.bogonsEmptyFeature, false);
reader.setProperty("http://xml.org/sax/properties/lexical-handler", builder);
reader.parse(input);
return frag;
}
@Override
public void filter(String URL, byte[] content, DocumentFragment doc,
ParseResult parse) {
Metadata metadata = parse.get(URL).getMetadata();
String value = partitioner.getPartition(URL, metadata);
metadata.setValue(mdKey, value);
}
/**
* Unimplemented. See org.w3c.dom.Document
*
* @return null
*/
public DocumentFragment createDocumentFragment()
{
error(XMLErrorResources.ER_FUNCTION_NOT_SUPPORTED);
return null;
}
public static DocumentFragment fromJsoup(
org.jsoup.nodes.Document jsoupDocument) {
HTMLDocumentImpl htmlDoc = new HTMLDocumentImpl();
htmlDoc.setErrorChecking(false);
DocumentFragment fragment = htmlDoc.createDocumentFragment();
org.jsoup.nodes.Element rootEl = jsoupDocument.child(0); // skip the
// #root node
NodeTraversor.traverse(new W3CBuilder(htmlDoc, fragment), rootEl);
return fragment;
}
private void writeNode(Node nd, XMLStreamWriter writer) throws XMLStreamException {
if (writer instanceof W3CDOMStreamWriter) {
W3CDOMStreamWriter dw = (W3CDOMStreamWriter)writer;
if (dw.getCurrentNode() != null) {
if (nd instanceof DocumentFragment
&& nd.getOwnerDocument() == dw.getCurrentNode().getOwnerDocument()) {
Node ch = nd.getFirstChild();
while (ch != null) {
nd.removeChild(ch);
dw.getCurrentNode().appendChild(org.apache.cxf.helpers.DOMUtils.getDomElement(ch));
ch = nd.getFirstChild();
}
} else if (nd.getOwnerDocument() == dw.getCurrentNode().getOwnerDocument()) {
dw.getCurrentNode().appendChild(nd);
return;
} else if (nd instanceof DocumentFragment) {
nd = dw.getDocument().importNode(nd, true);
dw.getCurrentNode().appendChild(nd);
return;
}
} else if (dw.getCurrentFragment() != null) {
if (nd.getOwnerDocument() == dw.getCurrentFragment().getOwnerDocument()) {
dw.getCurrentFragment().appendChild(nd);
return;
} else if (nd instanceof DocumentFragment) {
nd = dw.getDocument().importNode(nd, true);
dw.getCurrentFragment().appendChild(nd);
return;
}
}
}
if (nd instanceof Document) {
StaxUtils.writeDocument((Document)nd,
writer, false, true);
} else {
StaxUtils.writeNode(nd, writer, true);
}
}
/**
* Serializes the DOM document fragmnt using the previously specified
* writer and output format. Throws an exception only if
* an I/O exception occured while serializing.
*
* @param elem The element to serialize
* @throws IOException An I/O exception occured while
* serializing
*/
public void serialize( DocumentFragment frag )
throws IOException
{
reset();
prepare();
serializeNode( frag );
_printer.flush();
if ( _printer.getException() != null )
throw _printer.getException();
}
public void print(Node node) throws XMLStreamException {
switch (node.getNodeType()) {
case Node.DOCUMENT_NODE:
visitDocument((Document) node);
break;
case Node.DOCUMENT_FRAGMENT_NODE:
visitDocumentFragment((DocumentFragment) node);
break;
case Node.ELEMENT_NODE:
visitElement((Element) node);
break;
case Node.TEXT_NODE:
visitText((Text) node);
break;
case Node.CDATA_SECTION_NODE:
visitCDATASection((CDATASection) node);
break;
case Node.PROCESSING_INSTRUCTION_NODE:
visitProcessingInstruction((ProcessingInstruction) node);
break;
case Node.ENTITY_REFERENCE_NODE:
visitReference((EntityReference) node);
break;
case Node.COMMENT_NODE:
visitComment((Comment) node);
break;
case Node.DOCUMENT_TYPE_NODE:
break;
case Node.ATTRIBUTE_NODE:
case Node.ENTITY_NODE:
default:
throw new XMLStreamException("Unexpected DOM Node Type "
+ node.getNodeType()
);
}
}