Search in sources :

Example 1 with SpanContext

use of de.catma.indexer.SpanContext in project catma by forTEXT.

the class TPGraphProjectIndexer method getSpanContextFor.

@Override
public SpanContext getSpanContextFor(String sourceDocumentId, Range range, int spanContextSize, SpanDirection direction) throws IOException {
    GraphTraversalSource g = graph.traversal();
    List<Vertex> positionVs = g.V().hasLabel(nt(ProjectRevision)).outE(rt(hasDocument)).inV().has(nt(SourceDocument), "documentId", sourceDocumentId).inE(rt(isPartOf)).outV().hasLabel(nt(Term)).outE(rt(hasPosition)).inV().hasLabel(nt(Position)).filter(new InRangeFilter(range)).order().by("tokenOffset", Order.asc).toList();
    SpanContext spanContext = new SpanContext(sourceDocumentId);
    if (!positionVs.isEmpty()) {
        Vertex firstPositionV = positionVs.get(0);
        Vertex lastPositionV = positionVs.get(positionVs.size() - 1);
        if (direction.equals(SpanDirection.BOTH) || direction.equals(SpanDirection.BACKWARD)) {
            GraphTraversal<Vertex, Path> backwardAdjacencyTraversal = g.V(firstPositionV).repeat(__.in(rt(isAdjacentTo))).times(spanContextSize).path();
            if (backwardAdjacencyTraversal.hasNext()) {
                Path backwardAdjacencyPath = backwardAdjacencyTraversal.next();
                Iterator<Object> backwardAdjacencyPathIterator = backwardAdjacencyPath.iterator();
                // skip first
                backwardAdjacencyPathIterator.next();
                while (backwardAdjacencyPathIterator.hasNext()) {
                    Vertex positionVertex = (Vertex) backwardAdjacencyPathIterator.next();
                    Vertex termV = g.V(positionVertex).inE(rt(hasPosition)).outV().hasLabel(nt(Term)).next();
                    String term = (String) termV.property("literal").value();
                    int tokenOffset = (int) positionVertex.property("tokenOffset").value();
                    int startOffset = (int) positionVertex.property("startOffset").value();
                    int endOffset = (int) positionVertex.property("endOffset").value();
                    spanContext.addBackwardToken(new TermInfo(term, startOffset, endOffset, tokenOffset));
                }
            }
        }
        if (direction.equals(SpanDirection.BOTH) || direction.equals(SpanDirection.FORWARD)) {
            GraphTraversal<Vertex, Path> forwardAdjacencyTraversal = g.V(lastPositionV).repeat(__.out(rt(isAdjacentTo))).times(spanContextSize).path();
            if (forwardAdjacencyTraversal.hasNext()) {
                Path forwardAdjacencyPath = forwardAdjacencyTraversal.next();
                Iterator<Object> forwardAdjacencyPathIterator = forwardAdjacencyPath.iterator();
                // skip first
                forwardAdjacencyPathIterator.next();
                while (forwardAdjacencyPathIterator.hasNext()) {
                    Vertex positionVertex = (Vertex) forwardAdjacencyPathIterator.next();
                    Vertex termV = g.V(positionVertex).inE(rt(hasPosition)).outV().hasLabel(nt(Term)).next();
                    String term = (String) termV.property("literal").value();
                    int tokenOffset = (int) positionVertex.property("tokenOffset").value();
                    int startOffset = (int) positionVertex.property("startOffset").value();
                    int endOffset = (int) positionVertex.property("endOffset").value();
                    spanContext.addForwardToken(new TermInfo(term, startOffset, endOffset, tokenOffset));
                }
            }
        }
    }
    if (!spanContext.getBackwardTokens().isEmpty()) {
        TermInfo firstToken = spanContext.getBackwardTokens().get(0);
        TermInfo lastToken = spanContext.getBackwardTokens().get(spanContext.getBackwardTokens().size() - 1);
        spanContext.setBackwardRange(new Range(firstToken.getRange().getStartPoint(), lastToken.getRange().getEndPoint()));
    }
    if (!spanContext.getForwardTokens().isEmpty()) {
        TermInfo firstToken = spanContext.getForwardTokens().get(0);
        TermInfo lastToken = spanContext.getForwardTokens().get(spanContext.getForwardTokens().size() - 1);
        spanContext.setForwardRange(new Range(firstToken.getRange().getStartPoint(), lastToken.getRange().getEndPoint()));
    }
    return spanContext;
}
Also used : Path(org.apache.tinkerpop.gremlin.process.traversal.Path) Vertex(org.apache.tinkerpop.gremlin.structure.Vertex) SpanContext(de.catma.indexer.SpanContext) Range(de.catma.document.Range) GraphTraversalSource(org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource) TermInfo(de.catma.indexer.TermInfo)

Example 2 with SpanContext

use of de.catma.indexer.SpanContext in project catma by forTEXT.

the class TPGraphProjectIndexer method searchCollocation.

@Override
public QueryResult searchCollocation(QueryId queryId, QueryResult baseResult, QueryResult collocationConditionResult, int spanContextSize, SpanDirection direction) throws IOException {
    int baseResultSize = baseResult.size();
    int collocConditionResultSize = collocationConditionResult.size();
    boolean swapCollocationDirection = baseResultSize > collocConditionResultSize;
    // swap to reduce the amount of span context computation
    if (swapCollocationDirection) {
        QueryResult bufferResult = baseResult;
        baseResult = collocationConditionResult;
        collocationConditionResult = bufferResult;
    }
    Multimap<String, QueryResultRow> collocConditionResultBySourceDocumentId = ArrayListMultimap.create();
    collocationConditionResult.forEach(row -> collocConditionResultBySourceDocumentId.put(row.getSourceDocumentId(), row));
    QueryResultRowArray matchingBaseRows = new QueryResultRowArray();
    QueryResultRowArray matchingCollocConditionRows = new QueryResultRowArray();
    for (QueryResultRow row : baseResult) {
        if (collocConditionResultBySourceDocumentId.containsKey(row.getSourceDocumentId())) {
            SpanContext spanContext = getSpanContextFor(row.getSourceDocumentId(), row.getRange(), spanContextSize, direction);
            boolean baseMatch = matchingBaseRows.contains(row);
            for (QueryResultRow collocConditionRow : collocConditionResultBySourceDocumentId.get(row.getSourceDocumentId())) {
                boolean collocMatch = matchingCollocConditionRows.contains(collocConditionRow);
                if (!baseMatch || !collocMatch) {
                    if (spanContext.hasOverlappingRange(collocConditionRow.getRanges(), direction)) {
                        if (!baseMatch) {
                            matchingBaseRows.add(row);
                            baseMatch = true;
                        }
                        if (!collocMatch) {
                            matchingCollocConditionRows.add(collocConditionRow);
                            collocMatch = true;
                        }
                    }
                }
            }
        }
    }
    // swap back
    if (swapCollocationDirection) {
        QueryResultRowArray bufferResult = matchingBaseRows;
        matchingBaseRows = matchingCollocConditionRows;
        matchingCollocConditionRows = bufferResult;
    }
    return matchingBaseRows;
}
Also used : QueryResult(de.catma.queryengine.result.QueryResult) SpanContext(de.catma.indexer.SpanContext) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) CommentQueryResultRow(de.catma.queryengine.result.CommentQueryResultRow) QueryResultRow(de.catma.queryengine.result.QueryResultRow) QueryResultRowArray(de.catma.queryengine.result.QueryResultRowArray)

Aggregations

SpanContext (de.catma.indexer.SpanContext)2 Range (de.catma.document.Range)1 TermInfo (de.catma.indexer.TermInfo)1 CommentQueryResultRow (de.catma.queryengine.result.CommentQueryResultRow)1 QueryResult (de.catma.queryengine.result.QueryResult)1 QueryResultRow (de.catma.queryengine.result.QueryResultRow)1 QueryResultRowArray (de.catma.queryengine.result.QueryResultRowArray)1 TagQueryResultRow (de.catma.queryengine.result.TagQueryResultRow)1 Path (org.apache.tinkerpop.gremlin.process.traversal.Path)1 GraphTraversalSource (org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource)1 Vertex (org.apache.tinkerpop.gremlin.structure.Vertex)1