Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,11 @@
<artifactId>unxml</artifactId>
<version>${unxml.version}</version>
</dependency>
<dependency>
<groupId>net.sf.saxon</groupId>
<artifactId>Saxon-HE</artifactId>
<version>12.4</version>
</dependency>
<dependency>
<groupId>org.python</groupId>
<artifactId>jython-standalone</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,24 @@
*/
package io.cdap.plugin.http.common.pagination.page;

import com.fasterxml.jackson.databind.node.ArrayNode;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.nerdforge.unxml.Parsing;
import com.nerdforge.unxml.factory.ParsingFactory;
import com.nerdforge.unxml.parsers.Parser;
import com.nerdforge.unxml.parsers.builders.ObjectNodeParserBuilder;
import com.google.gson.JsonObject;
import io.cdap.cdap.api.data.format.StructuredRecord;
import io.cdap.cdap.api.data.schema.Schema;
import io.cdap.cdap.format.StructuredRecordStringConverter;
import io.cdap.plugin.http.common.http.HttpResponse;
import io.cdap.plugin.http.source.common.BaseHttpSourceConfig;
import org.w3c.dom.Document;
import net.sf.saxon.s9api.Processor;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.XPathCompiler;
import net.sf.saxon.s9api.XdmItem;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.s9api.XdmValue;
import net.sf.saxon.trans.XPathException;

import java.util.Iterator;
import java.util.Map;
import javax.xml.xpath.XPathConstants;

/**
* Returns sub elements which are specified by XPath, one by one.
Expand All @@ -41,15 +42,17 @@
class XmlPage extends BasePage {
private final Map<String, String> fieldsMapping;
private final Iterator<JsonElement> iterator;
private final Document document;
private final XdmNode document;
private final Schema schema;
private final BaseHttpSourceConfig config;

private final Processor processor = new Processor(false);

XmlPage(BaseHttpSourceConfig config, HttpResponse httpResponse) {
super(httpResponse);
this.config = config;
this.fieldsMapping = config.getFullFieldsMapping();
this.document = XmlUtil.createXmlDocument(httpResponse.getBody());
this.document = XmlUtil.createXmlDocument(processor, httpResponse.getBody());
this.iterator = getDocumentElementsIterator();
this.schema = config.getSchema();
}
Expand Down Expand Up @@ -79,33 +82,48 @@ public PageEntry next() {
*/
@Override
public String getPrimitiveByPath(String path) {
return (String) XmlUtil.getByXPath(document, path, XPathConstants.STRING);
return XmlUtil.getByXPath(processor, document, path);
}

/**
* 1. Converts xml to a structure which is defined by "Fields Mapping" configuration. This is done using unxml.
* 1. Converts xml to a structure which is defined by "Fields Mapping" configuration. This is done using saxon.
* 2. The result entity is a json array.
* 3. An iterator for elements of json array is returned.
*
* @return an iterator for elements of result json array.
*/
private Iterator<JsonElement> getDocumentElementsIterator() {
Parsing parsing = ParsingFactory.getInstance().create();
ObjectNodeParserBuilder obj = parsing.obj();

for (Map.Entry<String, String> entry : fieldsMapping.entrySet()) {
String schemaFieldName = entry.getKey();
String fieldPath = entry.getValue();

obj = obj.attribute(schemaFieldName, fieldPath, XmlUtil.xmlTextNodeParser());
XPathCompiler xPathCompiler = processor.newXPathCompiler();
JsonArray jsonArray = new JsonArray();
try {
for (XdmItem entry : xPathCompiler.evaluate(config.getResultPath(), document)) {
JsonObject jsonObject = new JsonObject();
for (String schemaFieldName : fieldsMapping.keySet()) {
XdmValue xdmItems = xPathCompiler.evaluate(fieldsMapping.get(schemaFieldName), entry);
String value = getValueFromXdmItem(xdmItems);
jsonObject.addProperty(schemaFieldName, value);
}
jsonArray.add(jsonObject);
}
} catch (SaxonApiException | XPathException e) {
throw new RuntimeException(e);
}

Parser<ArrayNode> parser = parsing.arr(config.getResultPath(), obj).build();
ArrayNode node = parser.apply(document);
JsonArray jsonArray = JSONUtil.toJsonArray(node.toString());
return jsonArray.iterator();
}

private String getValueFromXdmItem(XdmValue xdmItems) throws XPathException {
StringBuilder value = new StringBuilder();
int[] i = new int[1];
((XdmNode) xdmItems).children().iterator().forEachRemaining(t -> i[0] = i[0] + 1);
// If main node contains child node, return full node else value of the node
if (i[0] > 1) {
value.append(xdmItems);
} else {
value.append(xdmItems.getUnderlyingValue().getStringValue());
}
return value.toString();
}

@Override
public void close() {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,24 @@

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.TextNode;
import com.google.common.base.Charsets;
import com.nerdforge.unxml.parsers.Parser;
import org.w3c.dom.Document;
import net.sf.saxon.s9api.DocumentBuilder;
import net.sf.saxon.s9api.Processor;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.XPathCompiler;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.trans.XPathException;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.io.StringWriter;
import javax.xml.namespace.QName;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.transform.stream.StreamSource;
import javax.xml.xpath.XPathFactory;

/**
Expand All @@ -50,19 +46,19 @@ public class XmlUtil {
/**
* Create xml document instance out of a String.
*
* @param processor Saxon processor with xml document configuration
* @param xmlString xml in string format
* @return a Document instance representing input xml
* @return a XdmNode Document instance representing input xml
*/
public static Document createXmlDocument(String xmlString) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setIgnoringComments(true);

public static XdmNode createXmlDocument(Processor processor, String xmlString) {
DocumentBuilder documentBuilder = processor.newDocumentBuilder();
XdmNode document = null;
try {
InputStream input = new ByteArrayInputStream(xmlString.getBytes(Charsets.UTF_8));
return factory.newDocumentBuilder().parse(input);
} catch (ParserConfigurationException | SAXException | IOException e) {
throw new IllegalStateException("Failed to parse xml document", e);
document = documentBuilder.build(new StreamSource(new StringReader(xmlString)));
} catch (SaxonApiException e) {
throw new RuntimeException(e);
}
return document;
}

/**
Expand Down Expand Up @@ -106,17 +102,17 @@ public static String nodeToString(Node node) {
* Throws an exception if element is not of given path.
* Returns null if element not found
*
* @param document document instance
* @param processor Saxon processor with xml document configuration
* @param document XdmNode document instance
* @param path xpath string representation
* @param returnType a type of element expected to be returned
* @return element found by XPath or null if not found.
*/
public static Object getByXPath(Document document, String path, QName returnType) {
XPath xpath = xPathfactory.newXPath();
public static String getByXPath(Processor processor, XdmNode document, String path) {
XPathCompiler xPathCompiler = processor.newXPathCompiler();
try {
XPathExpression expr = xpath.compile(path);
return expr.evaluate(document, returnType);
} catch (XPathExpressionException e) {
return xPathCompiler.evaluate(path, document).getUnderlyingValue()
.getStringValue();
} catch (XPathException | SaxonApiException e) {
return null;
}
}
Expand Down