Search in sources :

Example 1 with TermInfo

use of de.catma.indexer.TermInfo in project catma by forTEXT.

the class TPGraphProjectIndexer method getSpanContextFor.

@Override
public SpanContext getSpanContextFor(String sourceDocumentId, Range range, int spanContextSize, SpanDirection direction) throws IOException {
    GraphTraversalSource g = graph.traversal();
    List<Vertex> positionVs = g.V().hasLabel(nt(ProjectRevision)).outE(rt(hasDocument)).inV().has(nt(SourceDocument), "documentId", sourceDocumentId).inE(rt(isPartOf)).outV().hasLabel(nt(Term)).outE(rt(hasPosition)).inV().hasLabel(nt(Position)).filter(new InRangeFilter(range)).order().by("tokenOffset", Order.asc).toList();
    SpanContext spanContext = new SpanContext(sourceDocumentId);
    if (!positionVs.isEmpty()) {
        Vertex firstPositionV = positionVs.get(0);
        Vertex lastPositionV = positionVs.get(positionVs.size() - 1);
        if (direction.equals(SpanDirection.BOTH) || direction.equals(SpanDirection.BACKWARD)) {
            GraphTraversal<Vertex, Path> backwardAdjacencyTraversal = g.V(firstPositionV).repeat(__.in(rt(isAdjacentTo))).times(spanContextSize).path();
            if (backwardAdjacencyTraversal.hasNext()) {
                Path backwardAdjacencyPath = backwardAdjacencyTraversal.next();
                Iterator<Object> backwardAdjacencyPathIterator = backwardAdjacencyPath.iterator();
                // skip first
                backwardAdjacencyPathIterator.next();
                while (backwardAdjacencyPathIterator.hasNext()) {
                    Vertex positionVertex = (Vertex) backwardAdjacencyPathIterator.next();
                    Vertex termV = g.V(positionVertex).inE(rt(hasPosition)).outV().hasLabel(nt(Term)).next();
                    String term = (String) termV.property("literal").value();
                    int tokenOffset = (int) positionVertex.property("tokenOffset").value();
                    int startOffset = (int) positionVertex.property("startOffset").value();
                    int endOffset = (int) positionVertex.property("endOffset").value();
                    spanContext.addBackwardToken(new TermInfo(term, startOffset, endOffset, tokenOffset));
                }
            }
        }
        if (direction.equals(SpanDirection.BOTH) || direction.equals(SpanDirection.FORWARD)) {
            GraphTraversal<Vertex, Path> forwardAdjacencyTraversal = g.V(lastPositionV).repeat(__.out(rt(isAdjacentTo))).times(spanContextSize).path();
            if (forwardAdjacencyTraversal.hasNext()) {
                Path forwardAdjacencyPath = forwardAdjacencyTraversal.next();
                Iterator<Object> forwardAdjacencyPathIterator = forwardAdjacencyPath.iterator();
                // skip first
                forwardAdjacencyPathIterator.next();
                while (forwardAdjacencyPathIterator.hasNext()) {
                    Vertex positionVertex = (Vertex) forwardAdjacencyPathIterator.next();
                    Vertex termV = g.V(positionVertex).inE(rt(hasPosition)).outV().hasLabel(nt(Term)).next();
                    String term = (String) termV.property("literal").value();
                    int tokenOffset = (int) positionVertex.property("tokenOffset").value();
                    int startOffset = (int) positionVertex.property("startOffset").value();
                    int endOffset = (int) positionVertex.property("endOffset").value();
                    spanContext.addForwardToken(new TermInfo(term, startOffset, endOffset, tokenOffset));
                }
            }
        }
    }
    if (!spanContext.getBackwardTokens().isEmpty()) {
        TermInfo firstToken = spanContext.getBackwardTokens().get(0);
        TermInfo lastToken = spanContext.getBackwardTokens().get(spanContext.getBackwardTokens().size() - 1);
        spanContext.setBackwardRange(new Range(firstToken.getRange().getStartPoint(), lastToken.getRange().getEndPoint()));
    }
    if (!spanContext.getForwardTokens().isEmpty()) {
        TermInfo firstToken = spanContext.getForwardTokens().get(0);
        TermInfo lastToken = spanContext.getForwardTokens().get(spanContext.getForwardTokens().size() - 1);
        spanContext.setForwardRange(new Range(firstToken.getRange().getStartPoint(), lastToken.getRange().getEndPoint()));
    }
    return spanContext;
}
Also used : Path(org.apache.tinkerpop.gremlin.process.traversal.Path) Vertex(org.apache.tinkerpop.gremlin.structure.Vertex) SpanContext(de.catma.indexer.SpanContext) Range(de.catma.document.Range) GraphTraversalSource(org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource) TermInfo(de.catma.indexer.TermInfo)

Example 2 with TermInfo

use of de.catma.indexer.TermInfo in project catma by forTEXT.

the class KwicListJSONSerializer method toJSON.

public String toJSON(List<KeywordInContext> kwicList, boolean caseSensitive) {
    JsonNodeFactory factory = JsonNodeFactory.instance;
    ObjectNode kwicListJson = factory.objectNode();
    ArrayNode prefixArraysJson = factory.arrayNode();
    ArrayNode tokenArray = factory.arrayNode();
    ArrayNode postfixArraysJson = factory.arrayNode();
    kwicListJson.set(KwicSerializationField.prefixArrays.name(), prefixArraysJson);
    kwicListJson.set(KwicSerializationField.tokenArray.name(), tokenArray);
    kwicListJson.set(KwicSerializationField.postfixArrays.name(), postfixArraysJson);
    kwicListJson.put(KwicSerializationField.caseSensitive.name(), Boolean.toString(caseSensitive));
    int rtlCount = 0;
    for (KeywordInContext kwic : kwicList) {
        if (kwic instanceof KeywordInSpanContext) {
            KeywordInSpanContext spanKwic = (KeywordInSpanContext) kwic;
            ArrayNode prefixArrayJson = factory.arrayNode();
            prefixArraysJson.add(prefixArrayJson);
            for (TermInfo ti : spanKwic.getSpanContext().getBackwardTokens()) {
                prefixArrayJson.add(ti.getTerm());
            }
            tokenArray.add(spanKwic.getKeyword());
            ArrayNode postfixArrayJson = factory.arrayNode();
            postfixArraysJson.add(postfixArrayJson);
            for (TermInfo ti : spanKwic.getSpanContext().getForwardTokens()) {
                postfixArrayJson.add(ti.getTerm());
            }
            if (kwic.isRightToLeft()) {
                rtlCount++;
            }
        }
    }
    // rightToLeftLanaguage->true if more than half of the kwics stem from RTL documents
    kwicListJson.put(KwicSerializationField.rightToLeftLanguage.name(), Boolean.toString(rtlCount > (BigDecimal.valueOf(kwicList.size()).divide(BigDecimal.valueOf(2), BigDecimal.ROUND_HALF_UP).intValue())));
    return kwicListJson.toString();
}
Also used : ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) TermInfo(de.catma.indexer.TermInfo) ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) KeywordInSpanContext(de.catma.indexer.KeywordInSpanContext) KeywordInContext(de.catma.document.source.KeywordInContext) JsonNodeFactory(com.fasterxml.jackson.databind.node.JsonNodeFactory)

Example 3 with TermInfo

use of de.catma.indexer.TermInfo in project catma by forTEXT.

the class WildcardStateHandler method checkLastListEntry.

private void checkLastListEntry() {
    if (!orderedTermInfos.isEmpty()) {
        TermInfo lastTermInfo = orderedTermInfos.get(orderedTermInfos.size() - 1);
        if (lastTermInfo.getRange().getEndPoint() == this.wildcardTermInfo.getRange().getStartPoint()) {
            this.wildcardTermInfo = new TermInfo(lastTermInfo.getTerm() + wildcardTermInfo.getTerm(), lastTermInfo.getRange().getStartPoint(), wildcardTermInfo.getRange().getEndPoint());
            orderedTermInfos.remove(orderedTermInfos.size() - 1);
        }
    }
}
Also used : TermInfo(de.catma.indexer.TermInfo)

Aggregations

TermInfo (de.catma.indexer.TermInfo)3 ArrayNode (com.fasterxml.jackson.databind.node.ArrayNode)1 JsonNodeFactory (com.fasterxml.jackson.databind.node.JsonNodeFactory)1 ObjectNode (com.fasterxml.jackson.databind.node.ObjectNode)1 Range (de.catma.document.Range)1 KeywordInContext (de.catma.document.source.KeywordInContext)1 KeywordInSpanContext (de.catma.indexer.KeywordInSpanContext)1 SpanContext (de.catma.indexer.SpanContext)1 Path (org.apache.tinkerpop.gremlin.process.traversal.Path)1 GraphTraversalSource (org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource)1 Vertex (org.apache.tinkerpop.gremlin.structure.Vertex)1