use of de.catma.indexer.TermInfo in project catma by forTEXT.
the class TPGraphProjectIndexer method getSpanContextFor.
@Override
public SpanContext getSpanContextFor(String sourceDocumentId, Range range, int spanContextSize, SpanDirection direction) throws IOException {
GraphTraversalSource g = graph.traversal();
List<Vertex> positionVs = g.V().hasLabel(nt(ProjectRevision)).outE(rt(hasDocument)).inV().has(nt(SourceDocument), "documentId", sourceDocumentId).inE(rt(isPartOf)).outV().hasLabel(nt(Term)).outE(rt(hasPosition)).inV().hasLabel(nt(Position)).filter(new InRangeFilter(range)).order().by("tokenOffset", Order.asc).toList();
SpanContext spanContext = new SpanContext(sourceDocumentId);
if (!positionVs.isEmpty()) {
Vertex firstPositionV = positionVs.get(0);
Vertex lastPositionV = positionVs.get(positionVs.size() - 1);
if (direction.equals(SpanDirection.BOTH) || direction.equals(SpanDirection.BACKWARD)) {
GraphTraversal<Vertex, Path> backwardAdjacencyTraversal = g.V(firstPositionV).repeat(__.in(rt(isAdjacentTo))).times(spanContextSize).path();
if (backwardAdjacencyTraversal.hasNext()) {
Path backwardAdjacencyPath = backwardAdjacencyTraversal.next();
Iterator<Object> backwardAdjacencyPathIterator = backwardAdjacencyPath.iterator();
// skip first
backwardAdjacencyPathIterator.next();
while (backwardAdjacencyPathIterator.hasNext()) {
Vertex positionVertex = (Vertex) backwardAdjacencyPathIterator.next();
Vertex termV = g.V(positionVertex).inE(rt(hasPosition)).outV().hasLabel(nt(Term)).next();
String term = (String) termV.property("literal").value();
int tokenOffset = (int) positionVertex.property("tokenOffset").value();
int startOffset = (int) positionVertex.property("startOffset").value();
int endOffset = (int) positionVertex.property("endOffset").value();
spanContext.addBackwardToken(new TermInfo(term, startOffset, endOffset, tokenOffset));
}
}
}
if (direction.equals(SpanDirection.BOTH) || direction.equals(SpanDirection.FORWARD)) {
GraphTraversal<Vertex, Path> forwardAdjacencyTraversal = g.V(lastPositionV).repeat(__.out(rt(isAdjacentTo))).times(spanContextSize).path();
if (forwardAdjacencyTraversal.hasNext()) {
Path forwardAdjacencyPath = forwardAdjacencyTraversal.next();
Iterator<Object> forwardAdjacencyPathIterator = forwardAdjacencyPath.iterator();
// skip first
forwardAdjacencyPathIterator.next();
while (forwardAdjacencyPathIterator.hasNext()) {
Vertex positionVertex = (Vertex) forwardAdjacencyPathIterator.next();
Vertex termV = g.V(positionVertex).inE(rt(hasPosition)).outV().hasLabel(nt(Term)).next();
String term = (String) termV.property("literal").value();
int tokenOffset = (int) positionVertex.property("tokenOffset").value();
int startOffset = (int) positionVertex.property("startOffset").value();
int endOffset = (int) positionVertex.property("endOffset").value();
spanContext.addForwardToken(new TermInfo(term, startOffset, endOffset, tokenOffset));
}
}
}
}
if (!spanContext.getBackwardTokens().isEmpty()) {
TermInfo firstToken = spanContext.getBackwardTokens().get(0);
TermInfo lastToken = spanContext.getBackwardTokens().get(spanContext.getBackwardTokens().size() - 1);
spanContext.setBackwardRange(new Range(firstToken.getRange().getStartPoint(), lastToken.getRange().getEndPoint()));
}
if (!spanContext.getForwardTokens().isEmpty()) {
TermInfo firstToken = spanContext.getForwardTokens().get(0);
TermInfo lastToken = spanContext.getForwardTokens().get(spanContext.getForwardTokens().size() - 1);
spanContext.setForwardRange(new Range(firstToken.getRange().getStartPoint(), lastToken.getRange().getEndPoint()));
}
return spanContext;
}
use of de.catma.indexer.TermInfo in project catma by forTEXT.
the class KwicListJSONSerializer method toJSON.
public String toJSON(List<KeywordInContext> kwicList, boolean caseSensitive) {
JsonNodeFactory factory = JsonNodeFactory.instance;
ObjectNode kwicListJson = factory.objectNode();
ArrayNode prefixArraysJson = factory.arrayNode();
ArrayNode tokenArray = factory.arrayNode();
ArrayNode postfixArraysJson = factory.arrayNode();
kwicListJson.set(KwicSerializationField.prefixArrays.name(), prefixArraysJson);
kwicListJson.set(KwicSerializationField.tokenArray.name(), tokenArray);
kwicListJson.set(KwicSerializationField.postfixArrays.name(), postfixArraysJson);
kwicListJson.put(KwicSerializationField.caseSensitive.name(), Boolean.toString(caseSensitive));
int rtlCount = 0;
for (KeywordInContext kwic : kwicList) {
if (kwic instanceof KeywordInSpanContext) {
KeywordInSpanContext spanKwic = (KeywordInSpanContext) kwic;
ArrayNode prefixArrayJson = factory.arrayNode();
prefixArraysJson.add(prefixArrayJson);
for (TermInfo ti : spanKwic.getSpanContext().getBackwardTokens()) {
prefixArrayJson.add(ti.getTerm());
}
tokenArray.add(spanKwic.getKeyword());
ArrayNode postfixArrayJson = factory.arrayNode();
postfixArraysJson.add(postfixArrayJson);
for (TermInfo ti : spanKwic.getSpanContext().getForwardTokens()) {
postfixArrayJson.add(ti.getTerm());
}
if (kwic.isRightToLeft()) {
rtlCount++;
}
}
}
// rightToLeftLanaguage->true if more than half of the kwics stem from RTL documents
kwicListJson.put(KwicSerializationField.rightToLeftLanguage.name(), Boolean.toString(rtlCount > (BigDecimal.valueOf(kwicList.size()).divide(BigDecimal.valueOf(2), BigDecimal.ROUND_HALF_UP).intValue())));
return kwicListJson.toString();
}
use of de.catma.indexer.TermInfo in project catma by forTEXT.
the class WildcardStateHandler method checkLastListEntry.
private void checkLastListEntry() {
if (!orderedTermInfos.isEmpty()) {
TermInfo lastTermInfo = orderedTermInfos.get(orderedTermInfos.size() - 1);
if (lastTermInfo.getRange().getEndPoint() == this.wildcardTermInfo.getRange().getStartPoint()) {
this.wildcardTermInfo = new TermInfo(lastTermInfo.getTerm() + wildcardTermInfo.getTerm(), lastTermInfo.getRange().getStartPoint(), wildcardTermInfo.getRange().getEndPoint());
orderedTermInfos.remove(orderedTermInfos.size() - 1);
}
}
}
Aggregations