Kurz notiert: Einen Dom-Node in einen String (HTML-Schnipsel) verwandeln.
import java.io.StringWriter;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Node;
// ...
public static String nodeToString(final Node node) throws TransformerFactoryConfigurationError, TransformerException {
final StreamResult xmlOutput = new StreamResult(new StringWriter());
final Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
transformer.transform(new DOMSource(node), xmlOutput);
return xmlOutput.getWriter().toString();
}
Im folgenden wird die Methode benutzt, um ein XML (HTML)-Element mit einer bestimmten ID auszugeben:
String inputHtml = "<html><body><div id=\\"id1\\">Test 1</div><div id=\\"id2\\">Test 2</div></body></html>";
String elementId = "id1";
String result = null;
if (StringUtils.isNotBlank(inputHtml) && StringUtils.isNotBlank(elementId)) {
try {
// HTML Parsen
final DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
final Document doc = docBuilder.parse(new ByteArrayInputStream(inputHtml.getBytes("UTF-8")));
final Element element = doc.getElementById(elementId);
if (element != null) {
result = nodeToString(element);
}
} catch (final ParserConfigurationException | SAXException | IOException | TransformerFactoryConfigurationError | TransformerException e) {
LOG.warn("Fehler beim Versuch, HTML zu verarbeiten", e);
}
}