Search in sources :

Example 26 with XObject

use of org.apache.xpath.objects.XObject in project fess-crawler by codelibs.

the class XpathTransformer method storeData.

@Override
protected void storeData(final ResponseData responseData, final ResultData resultData) {
    final DOMParser parser = getDomParser();
    try (final InputStream in = responseData.getResponseBody()) {
        final InputSource is = new InputSource(in);
        if (responseData.getCharSet() != null) {
            is.setEncoding(responseData.getCharSet());
        }
        parser.parse(is);
    } catch (final Exception e) {
        throw new CrawlingAccessException("Could not parse " + responseData.getUrl(), e);
    }
    final Document document = parser.getDocument();
    final StringBuilder buf = new StringBuilder(1000);
    buf.append(getResultDataHeader());
    for (final Map.Entry<String, String> entry : fieldRuleMap.entrySet()) {
        final String path = entry.getValue();
        try {
            final XObject xObj = getXPathAPI().eval(document, path);
            final int type = xObj.getType();
            switch(type) {
                case XObject.CLASS_BOOLEAN:
                    final boolean b = xObj.bool();
                    buf.append(getResultDataBody(entry.getKey(), Boolean.toString(b)));
                    break;
                case XObject.CLASS_NUMBER:
                    final double d = xObj.num();
                    buf.append(getResultDataBody(entry.getKey(), Double.toString(d)));
                    break;
                case XObject.CLASS_STRING:
                    final String str = xObj.str();
                    buf.append(getResultDataBody(entry.getKey(), str.trim()));
                    break;
                case XObject.CLASS_NODESET:
                    final NodeList nodeList = xObj.nodelist();
                    final List<String> strList = new ArrayList<>();
                    for (int i = 0; i < nodeList.getLength(); i++) {
                        final Node node = nodeList.item(i);
                        strList.add(node.getTextContent());
                    }
                    buf.append(getResultDataBody(entry.getKey(), strList));
                    break;
                case XObject.CLASS_RTREEFRAG:
                    final int rtf = xObj.rtf();
                    buf.append(getResultDataBody(entry.getKey(), Integer.toString(rtf)));
                    break;
                case XObject.CLASS_NULL:
                case XObject.CLASS_UNKNOWN:
                case XObject.CLASS_UNRESOLVEDVARIABLE:
                default:
                    Object obj = xObj.object();
                    if (obj == null) {
                        obj = "";
                    }
                    buf.append(getResultDataBody(entry.getKey(), obj.toString()));
                    break;
            }
        } catch (final TransformerException e) {
            logger.warn("Could not parse a value of " + entry.getKey() + ":" + entry.getValue());
        }
    }
    buf.append(getAdditionalData(responseData, document));
    buf.append(getResultDataFooter());
    final String data = buf.toString().trim();
    try {
        resultData.setData(data.getBytes(charsetName));
    } catch (final UnsupportedEncodingException e) {
        if (logger.isInfoEnabled()) {
            logger.info("Invalid charsetName: " + charsetName + ". Changed to " + Constants.UTF_8, e);
        }
        charsetName = Constants.UTF_8_CHARSET.name();
        resultData.setData(data.getBytes(Constants.UTF_8_CHARSET));
    }
    resultData.setEncoding(charsetName);
}
Also used : InputSource(org.xml.sax.InputSource) CrawlingAccessException(org.codelibs.fess.crawler.exception.CrawlingAccessException) InputStream(java.io.InputStream) NodeList(org.w3c.dom.NodeList) Node(org.w3c.dom.Node) ArrayList(java.util.ArrayList) UnsupportedEncodingException(java.io.UnsupportedEncodingException) Document(org.w3c.dom.Document) CrawlingAccessException(org.codelibs.fess.crawler.exception.CrawlingAccessException) TransformerException(javax.xml.transform.TransformerException) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) XObject(org.apache.xpath.objects.XObject) DOMParser(org.cyberneko.html.parsers.DOMParser) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) XObject(org.apache.xpath.objects.XObject) TransformerException(javax.xml.transform.TransformerException)

Example 27 with XObject

use of org.apache.xpath.objects.XObject in project fess by codelibs.

the class FessXpathTransformer method storeData.

@Override
protected void storeData(final ResponseData responseData, final ResultData resultData) {
    final DOMParser parser = getDomParser();
    try (final BufferedInputStream bis = new BufferedInputStream(responseData.getResponseBody())) {
        final byte[] bomBytes = new byte[UTF8_BOM_SIZE];
        bis.mark(UTF8_BOM_SIZE);
        final int size = bis.read(bomBytes);
        if (size < 3 || !isUtf8BomBytes(bomBytes)) {
            bis.reset();
        }
        final InputSource is = new InputSource(bis);
        if (responseData.getCharSet() != null) {
            is.setEncoding(responseData.getCharSet());
        }
        parser.parse(is);
    } catch (final Exception e) {
        throw new CrawlingAccessException("Could not parse " + responseData.getUrl(), e);
    }
    final Document document = parser.getDocument();
    processMetaRobots(responseData, resultData, document);
    processXRobotsTag(responseData, resultData);
    final Map<String, Object> dataMap = new LinkedHashMap<>();
    for (final Map.Entry<String, String> entry : fieldRuleMap.entrySet()) {
        final String path = entry.getValue();
        try {
            final XObject xObj = getXPathAPI().eval(document, path);
            final int type = xObj.getType();
            switch(type) {
                case XObject.CLASS_BOOLEAN:
                    final boolean b = xObj.bool();
                    putResultDataBody(dataMap, entry.getKey(), Boolean.toString(b));
                    break;
                case XObject.CLASS_NUMBER:
                    final double d = xObj.num();
                    putResultDataBody(dataMap, entry.getKey(), Double.toString(d));
                    break;
                case XObject.CLASS_STRING:
                    final String str = xObj.str();
                    putResultDataBody(dataMap, entry.getKey(), str);
                    break;
                case XObject.CLASS_NULL:
                case XObject.CLASS_UNKNOWN:
                case XObject.CLASS_NODESET:
                case XObject.CLASS_RTREEFRAG:
                case XObject.CLASS_UNRESOLVEDVARIABLE:
                default:
                    final Boolean isPruned = fieldPrunedRuleMap.get(entry.getKey());
                    Node value = getXPathAPI().selectSingleNode(document, entry.getValue());
                    if (value != null && isPruned != null && isPruned.booleanValue()) {
                        value = pruneNode(value);
                    }
                    putResultDataBody(dataMap, entry.getKey(), value != null ? value.getTextContent() : null);
                    break;
            }
        } catch (final TransformerException e) {
            logger.warn("Could not parse a value of {}:{}", entry.getKey(), entry.getValue(), e);
        }
    }
    putAdditionalData(dataMap, responseData, document);
    normalizeData(responseData, dataMap);
    try {
        resultData.setData(SerializeUtil.fromObjectToBinary(dataMap));
    } catch (final Exception e) {
        throw new CrawlingAccessException("Could not serialize object: " + responseData.getUrl(), e);
    }
    resultData.setEncoding(charsetName);
}
Also used : InputSource(org.xml.sax.InputSource) CrawlingAccessException(org.codelibs.fess.crawler.exception.CrawlingAccessException) Node(org.w3c.dom.Node) Document(org.w3c.dom.Document) CrawlingAccessException(org.codelibs.fess.crawler.exception.CrawlingAccessException) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException) TransformerException(javax.xml.transform.TransformerException) MalformedURLException(java.net.MalformedURLException) ChildUrlsException(org.codelibs.fess.crawler.exception.ChildUrlsException) LinkedHashMap(java.util.LinkedHashMap) BufferedInputStream(java.io.BufferedInputStream) XObject(org.apache.xpath.objects.XObject) DOMParser(org.codelibs.nekohtml.parsers.DOMParser) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) NamedNodeMap(org.w3c.dom.NamedNodeMap) XObject(org.apache.xpath.objects.XObject) TransformerException(javax.xml.transform.TransformerException)

Example 28 with XObject

use of org.apache.xpath.objects.XObject in project j2objc by google.

the class NodeSorter method compare.

/**
 * Return the results of a compare of two nodes.
 * TODO: Optimize compare -- cache the getStringExpr results, key by m_selectPat + hash of node.
 *
 * @param n1 First node to use in compare
 * @param n2 Second node to use in compare
 * @param kIndex Index of NodeSortKey to use for sort
 * @param support XPath context to use
 *
 * @return The results of the compare of the two nodes.
 *
 * @throws TransformerException
 */
int compare(NodeCompareElem n1, NodeCompareElem n2, int kIndex, XPathContext support) throws TransformerException {
    int result = 0;
    NodeSortKey k = (NodeSortKey) m_keys.elementAt(kIndex);
    if (k.m_treatAsNumbers) {
        double n1Num, n2Num;
        if (kIndex == 0) {
            n1Num = ((Double) n1.m_key1Value).doubleValue();
            n2Num = ((Double) n2.m_key1Value).doubleValue();
        } else if (kIndex == 1) {
            n1Num = ((Double) n1.m_key2Value).doubleValue();
            n2Num = ((Double) n2.m_key2Value).doubleValue();
        } else /* Leave this in case we decide to use an array later
      if (kIndex < maxkey)
      {
      double n1Num = (double)n1.m_keyValue[kIndex];
      double n2Num = (double)n2.m_keyValue[kIndex];
      }*/
        {
            // Get values dynamically
            XObject r1 = k.m_selectPat.execute(m_execContext, n1.m_node, k.m_namespaceContext);
            XObject r2 = k.m_selectPat.execute(m_execContext, n2.m_node, k.m_namespaceContext);
            n1Num = r1.num();
            // Can't use NaN for compare. They are never equal. Use zero instead.
            // That way we can keep elements in document order.
            // n1Num = Double.isNaN(d) ? 0.0 : d;
            n2Num = r2.num();
        // n2Num = Double.isNaN(d) ? 0.0 : d;
        }
        if ((n1Num == n2Num) && ((kIndex + 1) < m_keys.size())) {
            result = compare(n1, n2, kIndex + 1, support);
        } else {
            double diff;
            if (Double.isNaN(n1Num)) {
                if (Double.isNaN(n2Num))
                    diff = 0.0;
                else
                    diff = -1;
            } else if (Double.isNaN(n2Num))
                diff = 1;
            else
                diff = n1Num - n2Num;
            // process order parameter
            result = (int) ((diff < 0.0) ? (k.m_descending ? 1 : -1) : (diff > 0.0) ? (k.m_descending ? -1 : 1) : 0);
        }
    } else // end treat as numbers
    {
        CollationKey n1String, n2String;
        if (kIndex == 0) {
            n1String = (CollationKey) n1.m_key1Value;
            n2String = (CollationKey) n2.m_key1Value;
        } else if (kIndex == 1) {
            n1String = (CollationKey) n1.m_key2Value;
            n2String = (CollationKey) n2.m_key2Value;
        } else /* Leave this in case we decide to use an array later
      if (kIndex < maxkey)
      {
        String n1String = (String)n1.m_keyValue[kIndex];
        String n2String = (String)n2.m_keyValue[kIndex];
      }*/
        {
            // Get values dynamically
            XObject r1 = k.m_selectPat.execute(m_execContext, n1.m_node, k.m_namespaceContext);
            XObject r2 = k.m_selectPat.execute(m_execContext, n2.m_node, k.m_namespaceContext);
            n1String = k.m_col.getCollationKey(r1.str());
            n2String = k.m_col.getCollationKey(r2.str());
        }
        // Use collation keys for faster compare, but note that whitespaces
        // etc... are treated differently from if we were comparing Strings.
        result = n1String.compareTo(n2String);
        // Process caseOrder parameter
        if (k.m_caseOrderUpper) {
            String tempN1 = n1String.getSourceString().toLowerCase();
            String tempN2 = n2String.getSourceString().toLowerCase();
            if (tempN1.equals(tempN2)) {
                // java defaults to upper case is greater.
                result = result == 0 ? 0 : -result;
            }
        }
        // Process order parameter
        if (k.m_descending) {
            result = -result;
        }
    }
    if (0 == result) {
        if ((kIndex + 1) < m_keys.size()) {
            result = compare(n1, n2, kIndex + 1, support);
        }
    }
    if (0 == result) {
        // I shouldn't have to do this except that there seems to
        // be a glitch in the mergesort
        // if(r1.getType() == r1.CLASS_NODESET)
        // {
        // %OPT%
        DTM dtm = support.getDTM(n1.m_node);
        result = dtm.isNodeAfter(n1.m_node, n2.m_node) ? -1 : 1;
    // }
    }
    return result;
}
Also used : CollationKey(java.text.CollationKey) DTM(org.apache.xml.dtm.DTM) XObject(org.apache.xpath.objects.XObject)

Example 29 with XObject

use of org.apache.xpath.objects.XObject in project j2objc by google.

the class TransformerImpl method setParameter.

/**
 * Set a parameter for the templates.
 *
 * @param name The name of the parameter.
 * @param namespace The namespace of the parameter.
 * @param value The value object.  This can be any valid Java object
 * -- it's up to the processor to provide the proper
 * coersion to the object, or simply pass it on for use
 * in extensions.
 */
public void setParameter(String name, String namespace, Object value) {
    VariableStack varstack = getXPathContext().getVarStack();
    QName qname = new QName(namespace, name);
    XObject xobject = XObject.create(value, getXPathContext());
    StylesheetRoot sroot = m_stylesheetRoot;
    Vector vars = sroot.getVariablesAndParamsComposed();
    int i = vars.size();
    while (--i >= 0) {
        ElemVariable variable = (ElemVariable) vars.elementAt(i);
        if (variable.getXSLToken() == Constants.ELEMNAME_PARAMVARIABLE && variable.getName().equals(qname)) {
            varstack.setGlobalVariable(i, xobject);
        }
    }
}
Also used : VariableStack(org.apache.xpath.VariableStack) ElemVariable(org.apache.xalan.templates.ElemVariable) StylesheetRoot(org.apache.xalan.templates.StylesheetRoot) QName(org.apache.xml.utils.QName) Vector(java.util.Vector) NodeVector(org.apache.xml.utils.NodeVector) XObject(org.apache.xpath.objects.XObject)

Example 30 with XObject

use of org.apache.xpath.objects.XObject in project j2objc by google.

the class TransformerImpl method pushGlobalVars.

/**
 * Internal -- push the global variables from the Stylesheet onto
 * the context's runtime variable stack.
 * <p>If we encounter a variable
 * that is already defined in the variable stack, we ignore it.  This
 * is because the second variable definition will be at a lower import
 * precedence.  Presumably, global"variables at the same import precedence
 * with the same name will have been caught during the recompose process.
 * <p>However, if we encounter a parameter that is already defined in the
 * variable stack, we need to see if this is a parameter whose value was
 * supplied by a setParameter call.  If so, we need to "receive" the one
 * already in the stack, ignoring this one.  If it is just an earlier
 * xsl:param or xsl:variable definition, we ignore it using the same
 * reasoning as explained above for the variable.
 *
 * @param contextNode The root of the source tree, can't be null.
 *
 * @throws TransformerException
 */
protected void pushGlobalVars(int contextNode) throws TransformerException {
    XPathContext xctxt = m_xcontext;
    VariableStack vs = xctxt.getVarStack();
    StylesheetRoot sr = getStylesheet();
    Vector vars = sr.getVariablesAndParamsComposed();
    int i = vars.size();
    vs.link(i);
    while (--i >= 0) {
        ElemVariable v = (ElemVariable) vars.elementAt(i);
        // XObject xobj = v.getValue(this, contextNode);
        XObject xobj = new XUnresolvedVariable(v, contextNode, this, vs.getStackFrame(), 0, true);
        if (null == vs.elementAt(i))
            vs.setGlobalVariable(i, xobj);
    }
}
Also used : VariableStack(org.apache.xpath.VariableStack) ElemVariable(org.apache.xalan.templates.ElemVariable) XUnresolvedVariable(org.apache.xalan.templates.XUnresolvedVariable) StylesheetRoot(org.apache.xalan.templates.StylesheetRoot) XPathContext(org.apache.xpath.XPathContext) Vector(java.util.Vector) NodeVector(org.apache.xml.utils.NodeVector) XObject(org.apache.xpath.objects.XObject)

Aggregations

XObject (org.apache.xpath.objects.XObject)107 TransformerException (javax.xml.transform.TransformerException)27 DTM (org.apache.xml.dtm.DTM)24 XPathContext (org.apache.xpath.XPathContext)24 XNodeSet (org.apache.xpath.objects.XNodeSet)15 DTMIterator (org.apache.xml.dtm.DTMIterator)12 VariableStack (org.apache.xpath.VariableStack)11 Vector (java.util.Vector)9 QName (org.apache.xml.utils.QName)9 Node (org.w3c.dom.Node)9 Expression (org.apache.xpath.Expression)8 XString (org.apache.xpath.objects.XString)7 DTMAxisTraverser (org.apache.xml.dtm.DTMAxisTraverser)6 XMLString (org.apache.xml.utils.XMLString)6 org.apache.xpath (org.apache.xpath)6 Document (org.w3c.dom.Document)6 ArrayList (java.util.ArrayList)5 NodeVector (org.apache.xml.utils.NodeVector)5 SAXException (org.xml.sax.SAXException)5 Hashtable (java.util.Hashtable)4