use of de.catma.indexer.SpanContext in project catma by forTEXT.
the class TPGraphProjectIndexer method getSpanContextFor.
@Override
public SpanContext getSpanContextFor(String sourceDocumentId, Range range, int spanContextSize, SpanDirection direction) throws IOException {
GraphTraversalSource g = graph.traversal();
List<Vertex> positionVs = g.V().hasLabel(nt(ProjectRevision)).outE(rt(hasDocument)).inV().has(nt(SourceDocument), "documentId", sourceDocumentId).inE(rt(isPartOf)).outV().hasLabel(nt(Term)).outE(rt(hasPosition)).inV().hasLabel(nt(Position)).filter(new InRangeFilter(range)).order().by("tokenOffset", Order.asc).toList();
SpanContext spanContext = new SpanContext(sourceDocumentId);
if (!positionVs.isEmpty()) {
Vertex firstPositionV = positionVs.get(0);
Vertex lastPositionV = positionVs.get(positionVs.size() - 1);
if (direction.equals(SpanDirection.BOTH) || direction.equals(SpanDirection.BACKWARD)) {
GraphTraversal<Vertex, Path> backwardAdjacencyTraversal = g.V(firstPositionV).repeat(__.in(rt(isAdjacentTo))).times(spanContextSize).path();
if (backwardAdjacencyTraversal.hasNext()) {
Path backwardAdjacencyPath = backwardAdjacencyTraversal.next();
Iterator<Object> backwardAdjacencyPathIterator = backwardAdjacencyPath.iterator();
// skip first
backwardAdjacencyPathIterator.next();
while (backwardAdjacencyPathIterator.hasNext()) {
Vertex positionVertex = (Vertex) backwardAdjacencyPathIterator.next();
Vertex termV = g.V(positionVertex).inE(rt(hasPosition)).outV().hasLabel(nt(Term)).next();
String term = (String) termV.property("literal").value();
int tokenOffset = (int) positionVertex.property("tokenOffset").value();
int startOffset = (int) positionVertex.property("startOffset").value();
int endOffset = (int) positionVertex.property("endOffset").value();
spanContext.addBackwardToken(new TermInfo(term, startOffset, endOffset, tokenOffset));
}
}
}
if (direction.equals(SpanDirection.BOTH) || direction.equals(SpanDirection.FORWARD)) {
GraphTraversal<Vertex, Path> forwardAdjacencyTraversal = g.V(lastPositionV).repeat(__.out(rt(isAdjacentTo))).times(spanContextSize).path();
if (forwardAdjacencyTraversal.hasNext()) {
Path forwardAdjacencyPath = forwardAdjacencyTraversal.next();
Iterator<Object> forwardAdjacencyPathIterator = forwardAdjacencyPath.iterator();
// skip first
forwardAdjacencyPathIterator.next();
while (forwardAdjacencyPathIterator.hasNext()) {
Vertex positionVertex = (Vertex) forwardAdjacencyPathIterator.next();
Vertex termV = g.V(positionVertex).inE(rt(hasPosition)).outV().hasLabel(nt(Term)).next();
String term = (String) termV.property("literal").value();
int tokenOffset = (int) positionVertex.property("tokenOffset").value();
int startOffset = (int) positionVertex.property("startOffset").value();
int endOffset = (int) positionVertex.property("endOffset").value();
spanContext.addForwardToken(new TermInfo(term, startOffset, endOffset, tokenOffset));
}
}
}
}
if (!spanContext.getBackwardTokens().isEmpty()) {
TermInfo firstToken = spanContext.getBackwardTokens().get(0);
TermInfo lastToken = spanContext.getBackwardTokens().get(spanContext.getBackwardTokens().size() - 1);
spanContext.setBackwardRange(new Range(firstToken.getRange().getStartPoint(), lastToken.getRange().getEndPoint()));
}
if (!spanContext.getForwardTokens().isEmpty()) {
TermInfo firstToken = spanContext.getForwardTokens().get(0);
TermInfo lastToken = spanContext.getForwardTokens().get(spanContext.getForwardTokens().size() - 1);
spanContext.setForwardRange(new Range(firstToken.getRange().getStartPoint(), lastToken.getRange().getEndPoint()));
}
return spanContext;
}
use of de.catma.indexer.SpanContext in project catma by forTEXT.
the class TPGraphProjectIndexer method searchCollocation.
@Override
public QueryResult searchCollocation(QueryId queryId, QueryResult baseResult, QueryResult collocationConditionResult, int spanContextSize, SpanDirection direction) throws IOException {
int baseResultSize = baseResult.size();
int collocConditionResultSize = collocationConditionResult.size();
boolean swapCollocationDirection = baseResultSize > collocConditionResultSize;
// swap to reduce the amount of span context computation
if (swapCollocationDirection) {
QueryResult bufferResult = baseResult;
baseResult = collocationConditionResult;
collocationConditionResult = bufferResult;
}
Multimap<String, QueryResultRow> collocConditionResultBySourceDocumentId = ArrayListMultimap.create();
collocationConditionResult.forEach(row -> collocConditionResultBySourceDocumentId.put(row.getSourceDocumentId(), row));
QueryResultRowArray matchingBaseRows = new QueryResultRowArray();
QueryResultRowArray matchingCollocConditionRows = new QueryResultRowArray();
for (QueryResultRow row : baseResult) {
if (collocConditionResultBySourceDocumentId.containsKey(row.getSourceDocumentId())) {
SpanContext spanContext = getSpanContextFor(row.getSourceDocumentId(), row.getRange(), spanContextSize, direction);
boolean baseMatch = matchingBaseRows.contains(row);
for (QueryResultRow collocConditionRow : collocConditionResultBySourceDocumentId.get(row.getSourceDocumentId())) {
boolean collocMatch = matchingCollocConditionRows.contains(collocConditionRow);
if (!baseMatch || !collocMatch) {
if (spanContext.hasOverlappingRange(collocConditionRow.getRanges(), direction)) {
if (!baseMatch) {
matchingBaseRows.add(row);
baseMatch = true;
}
if (!collocMatch) {
matchingCollocConditionRows.add(collocConditionRow);
collocMatch = true;
}
}
}
}
}
}
// swap back
if (swapCollocationDirection) {
QueryResultRowArray bufferResult = matchingBaseRows;
matchingBaseRows = matchingCollocConditionRows;
matchingCollocConditionRows = bufferResult;
}
return matchingBaseRows;
}
Aggregations