use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class LegacyGraphConverter method convertToAnnotationGraph.
public static AnnotationGraph convertToAnnotationGraph(SDocumentGraph docGraph, List<Long> matchedNodeIDs) {
Set<Long> matchSet = new HashSet<>(matchedNodeIDs);
AnnotationGraph annoGraph = new AnnotationGraph();
List<String> pathList = CommonHelper.getCorpusPath(docGraph.getDocument().getGraph(), docGraph.getDocument());
annoGraph.setPath(pathList.toArray(new String[pathList.size()]));
annoGraph.setDocumentName(docGraph.getDocument().getName());
Map<SNode, AnnisNode> allNodes = new HashMap<>();
for (SNode sNode : docGraph.getNodes()) {
SFeature featNodeRaw = sNode.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE));
if (featNodeRaw != null) {
RelannisNodeFeature featNode = (RelannisNodeFeature) featNodeRaw.getValue();
long internalID = featNode.getInternalID();
AnnisNode aNode = new AnnisNode(internalID);
for (SAnnotation sAnno : sNode.getAnnotations()) {
aNode.addNodeAnnotation(new Annotation(sAnno.getNamespace(), sAnno.getName(), sAnno.getValue_STEXT()));
}
aNode.setName(sNode.getName());
Set<SLayer> layers = sNode.getLayers();
if (!layers.isEmpty()) {
aNode.setNamespace(layers.iterator().next().getName());
}
RelannisNodeFeature feat = (RelannisNodeFeature) sNode.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE)).getValue();
if (sNode instanceof SToken) {
List<DataSourceSequence> seqList = docGraph.getOverlappedDataSourceSequence(sNode, SALT_TYPE.STEXT_OVERLAPPING_RELATION);
if (seqList != null) {
DataSourceSequence seq = seqList.get(0);
Preconditions.checkNotNull(seq, "DataSourceSequence is null for token %s", sNode.getId());
SSequentialDS seqDS = seq.getDataSource();
Preconditions.checkNotNull(seqDS, "SSequentalDS is null for token %s", sNode.getId());
Preconditions.checkNotNull(seqDS.getData(), "SSequentalDS data is null for token %s", sNode.getId());
String seqDSData = (String) seqDS.getData();
Preconditions.checkNotNull(seqDSData, "casted SSequentalDS data is null for token %s", sNode.getId());
Preconditions.checkNotNull(seq.getStart(), "SSequentalDS start is null for token %s", sNode.getId());
Preconditions.checkNotNull(seq.getEnd(), "SSequentalDS end is null for supposed token %s", sNode.getId());
int start = seq.getStart().intValue();
int end = seq.getEnd().intValue();
Preconditions.checkState(start >= 0 && start <= end && end <= seqDSData.length(), "Illegal start or end of textual DS for token (start %s, end: %s)", sNode.getId(), start, end);
String spannedText = seqDSData.substring(start, end);
Preconditions.checkNotNull(spannedText, "spanned text is null for supposed token %s (start: %s, end: %s)", sNode.getId(), start, end);
aNode.setSpannedText(spannedText);
aNode.setToken(true);
aNode.setTokenIndex(feat.getTokenIndex());
}
} else {
aNode.setToken(false);
aNode.setTokenIndex(null);
}
aNode.setCorpus(feat.getCorpusRef());
aNode.setTextId(feat.getTextRef());
aNode.setLeft(feat.getLeft());
aNode.setLeftToken(feat.getLeftToken());
aNode.setRight(feat.getRight());
aNode.setRightToken(feat.getRightToken());
if (matchSet.contains(aNode.getId())) {
aNode.setMatchedNodeInQuery((long) matchedNodeIDs.indexOf(aNode.getId()) + 1);
annoGraph.getMatchedNodeIds().add(aNode.getId());
} else {
aNode.setMatchedNodeInQuery(null);
}
annoGraph.addNode(aNode);
allNodes.put(sNode, aNode);
}
}
for (SRelation rel : docGraph.getRelations()) {
RelannisEdgeFeature featRelation = RelannisEdgeFeature.extract(rel);
if (featRelation != null) {
addRelation(rel, featRelation.getPre(), featRelation.getComponentID(), allNodes, annoGraph);
}
}
// add relations with empty relation name for every dominance relation
List<SDominanceRelation> dominanceRelations = new LinkedList<>(docGraph.getDominanceRelations());
for (SDominanceRelation rel : dominanceRelations) {
RelannisEdgeFeature featEdge = RelannisEdgeFeature.extract(rel);
if (featEdge != null && featEdge.getArtificialDominanceComponent() != null && featEdge.getArtificialDominancePre() != null) {
addRelation(SDominanceRelation.class, null, rel.getAnnotations(), rel.getSource(), rel.getTarget(), rel.getLayers(), featEdge.getArtificialDominancePre(), featEdge.getArtificialDominanceComponent(), allNodes, annoGraph);
}
}
return annoGraph;
}
use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class TimelineReconstructorTest method testBematacDialog.
/**
* Tests a sample dialog reconstruction.
* The dialog is this one: https://korpling.org/annis3/?id=44b60a56-31da-4469-b438-62fdb67f28f1
*
* The Salt which was generated by ANNIS is loaded and the virtual tokenization is removed.
* It is checked if
* <ul>
* <li>the newly created tokenization is correct</li>
* <li>spans cover the correct token</li>
* </ul>
*/
@Test
public void testBematacDialog() {
SDocumentGraph docGraph = SaltUtil.loadDocumentGraph(URI.createURI(getClass().getResource("SampleDialog.salt").toString()));
Map<String, String> anno2order = new HashMap<>();
anno2order.put("default_ns::instructee_utt", "instructee_dipl");
anno2order.put("default_ns::instructor_utt", "instructor_dipl");
TimelineReconstructor.removeVirtualTokenization(docGraph, anno2order);
// instructor_dipl, instructor_norm, instructee_dipl, instructee_norm, instructee_extra, break
List<STextualDS> texts = docGraph.getTextualDSs();
assertEquals(6, texts.size());
STextualDS instructorDipl = findTextualDSByName("instructor_dipl", texts);
assertNotNull(instructorDipl);
assertEquals("in Richtung des Toasters gehst ja gehst", instructorDipl.getText());
DataSourceSequence<Integer> seq = new DataSourceSequence<>();
seq.setDataSource(instructorDipl);
seq.setStart(instructorDipl.getStart());
seq.setEnd(instructorDipl.getEnd());
List<SToken> instructorDiplToken = docGraph.getTokensBySequence(seq);
assertEquals(7, instructorDiplToken.size());
assertEquals("in", docGraph.getText(instructorDiplToken.get(0)));
assertEquals("Richtung", docGraph.getText(instructorDiplToken.get(1)));
assertEquals("des", docGraph.getText(instructorDiplToken.get(2)));
assertEquals("Toasters", docGraph.getText(instructorDiplToken.get(3)));
assertEquals("gehst", docGraph.getText(instructorDiplToken.get(4)));
assertEquals("ja", docGraph.getText(instructorDiplToken.get(5)));
assertEquals("gehst", docGraph.getText(instructorDiplToken.get(6)));
// check that the other real spans are now connected with the token
List<SNode> uttNode = docGraph.getNodesByName("sSpan1294");
assertNotNull(uttNode);
assertEquals(1, uttNode.size());
SAnnotation uttAnno = uttNode.get(0).getAnnotation("default_ns::instructor_utt");
assertNotNull(uttAnno);
assertEquals("utt", uttAnno.getValue_STEXT());
List<SRelation> uttOutRelations = uttNode.get(0).getOutRelations();
assertNotNull(uttOutRelations);
assertEquals(5, uttOutRelations.size());
for (SRelation rel : uttOutRelations) {
assertTrue(rel instanceof SSpanningRelation);
assertEquals(instructorDipl, CommonHelper.getTextualDSForNode((SNode) rel.getTarget(), docGraph));
}
STextualDS instructorNorm = findTextualDSByName("instructor_norm", texts);
assertNotNull(instructorNorm);
assertEquals("in Richtung des Toasters gehst ja gehst", instructorNorm.getText());
STextualDS instructeeDipl = findTextualDSByName("instructee_dipl", texts);
assertNotNull(instructeeDipl);
assertEquals("mhm ich geh in Richtung des Toasters okay", instructeeDipl.getText());
STextualDS instructeeNorm = findTextualDSByName("instructee_norm", texts);
assertNotNull(instructeeNorm);
assertEquals("ich gehe in Richtung des Toasters okay", instructeeNorm.getText());
STextualDS instructeeExtra = findTextualDSByName("instructee_extra", texts);
assertNotNull(instructeeExtra);
assertEquals("zeichnet", instructeeExtra.getText());
STextualDS breakText = findTextualDSByName("break", texts);
assertNotNull(breakText);
assertEquals("0,7 0,5", breakText.getText());
}
use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class EventExtractor method parseSalt.
/**
* Converts Salt document graph to rows.
*
* @param input
* @param showSpanAnnos
* @param showTokenAnnos
* @param mediaLayer A set of all annotation layers which should be treated as special media layer.
* @param annotationNames
* @param replaceValueWithMediaIcon If true the actual value is removed and an icon for playing the media file is shown instead.
* @param startTokenIndex token index of the first token in the match
* @param endTokenIndex token index of the last token in the match
* @param pdfController makes status of all pdfviewer available for the
* events.
* @param text If non-null only include annotations for nodes of the specified text.
* @return
*/
public static LinkedHashMap<String, ArrayList<Row>> parseSalt(VisualizerInput input, boolean showSpanAnnos, boolean showTokenAnnos, List<String> annotationNames, Set<String> mediaLayer, boolean replaceValueWithMediaIcon, long startTokenIndex, long endTokenIndex, PDFController pdfController, STextualDS text) {
SDocumentGraph graph = input.getDocument().getDocumentGraph();
// only look at annotations which were defined by the user
LinkedHashMap<String, ArrayList<Row>> rowsByAnnotation = new LinkedHashMap<>();
for (String anno : annotationNames) {
rowsByAnnotation.put(anno, new ArrayList<Row>());
}
AtomicInteger eventCounter = new AtomicInteger();
PDFPageHelper pageNumberHelper = new PDFPageHelper(input);
if (showSpanAnnos) {
for (SSpan span : graph.getSpans()) {
if (text == null || text == CommonHelper.getTextualDSForNode(span, graph)) {
addAnnotationsForNode(span, graph, startTokenIndex, endTokenIndex, pdfController, pageNumberHelper, eventCounter, rowsByAnnotation, true, mediaLayer, replaceValueWithMediaIcon);
}
}
// end for each span
}
if (showTokenAnnos) {
for (SToken tok : graph.getTokens()) {
if (text == null || text == CommonHelper.getTextualDSForNode(tok, graph)) {
addAnnotationsForNode(tok, graph, startTokenIndex, endTokenIndex, pdfController, pageNumberHelper, eventCounter, rowsByAnnotation, false, mediaLayer, replaceValueWithMediaIcon);
}
}
}
// 2. merge rows when possible
for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
mergeAllRowsIfPossible(e.getValue());
}
// 3. sort events on one row by left token index
for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
for (Row r : e.getValue()) {
sortEventsByTokenIndex(r);
}
}
// 4. split up events if they cover islands
for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
for (Row r : e.getValue()) {
splitRowsOnIslands(r, graph, text, startTokenIndex, endTokenIndex);
}
}
// 5. split up events if they have gaps
for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
for (Row r : e.getValue()) {
splitRowsOnGaps(r, graph, startTokenIndex, endTokenIndex);
}
}
return rowsByAnnotation;
}
use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class RSTImpl method transformSaltToJSON.
private String transformSaltToJSON(VisualizerInput visInput) {
graph = visInput.getSResult().getDocumentGraph();
List<SNode> rootSNodes = graph.getRoots();
List<SNode> rstRoots = new ArrayList<SNode>();
for (SNode sNode : rootSNodes) {
if (CommonHelper.checkSLayer(namespace, sNode)) {
rstRoots.add(sNode);
}
}
if (rootSNodes.size() > 0) {
// collect all sentence and sort them.
graph.traverse(rstRoots, GRAPH_TRAVERSE_TYPE.TOP_DOWN_DEPTH_FIRST, "getSentences", new GraphTraverseHandler() {
@Override
public void nodeReached(GRAPH_TRAVERSE_TYPE traversalType, String traversalId, SNode currNode, SRelation sRelation, SNode fromNode, long order) {
if (currNode instanceof SStructure && isSegment(currNode)) {
sentences.add((SStructure) currNode);
}
}
@Override
public void nodeLeft(GRAPH_TRAVERSE_TYPE traversalType, String traversalId, SNode currNode, SRelation edge, SNode fromNode, long order) {
}
@Override
public boolean checkConstraint(GRAPH_TRAVERSE_TYPE traversalType, String traversalId, SRelation edge, SNode currNode, long order) {
// token are not needed
if (currNode instanceof SToken) {
return false;
}
return true;
}
});
// decorate segments with sentence number
int i = 1;
for (SStructure sentence : sentences) {
sentence.createProcessingAnnotation(SENTENCE_INDEX, SENTENCE_INDEX, Integer.toString(i));
i++;
}
graph.traverse(rstRoots, GRAPH_TRAVERSE_TYPE.TOP_DOWN_DEPTH_FIRST, "jsonBuild", this);
} else {
log.debug("does not find an annotation which matched {}", ANNOTATION_KEY);
graph.traverse(rstRoots, GRAPH_TRAVERSE_TYPE.TOP_DOWN_DEPTH_FIRST, "jsonBuild", this);
}
return result.toString();
}
use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class RSTImpl method createJsonEntry.
private JSONObject createJsonEntry(SNode currNode) {
JSONObject jsonData = new JSONObject();
StringBuilder sb = new StringBuilder();
// use a hash set so we don't get any duplicate entries
LinkedHashSet<SToken> token = new LinkedHashSet<>();
List<SRelation<SNode, SNode>> edges;
if (currNode instanceof SStructure) {
edges = currNode.getGraph().getOutRelations(currNode.getId());
// get all tokens directly dominated tokens and build a string
for (SRelation<SNode, SNode> sedge : edges) {
if (sedge.getTarget() instanceof SToken) {
token.add((SToken) sedge.getTarget());
}
}
// build strings
Iterator<SToken> tokIterator = token.iterator();
while (tokIterator.hasNext()) {
SToken tok = tokIterator.next();
String text = getText(tok);
String color = getHTMLColor(tok);
if (color != null) {
sb.append("<span style=\"color : ").append(color).append(";\">");
} else {
sb.append("<span>");
}
if (tokIterator.hasNext()) {
sb.append(text).append(" ");
} else {
sb.append(text);
}
sb.append("</span>");
}
}
try {
// build unique id, cause is used for an unique html element id.
jsonData.put("id", getUniStrId(currNode));
jsonData.put("name", currNode.getName());
/**
* additional data oject for edge labels and rendering sentences
*/
JSONObject data = new JSONObject();
JSONArray edgesJSON = getOutGoingEdgeTypeAnnotation(currNode);
// since we have found some tokens, it must be a sentence in RST.
if (token.size() > 0) {
data.put("sentence", sb.toString());
}
if (edgesJSON != null) {
data.put("edges", edgesJSON);
}
if (currNode instanceof SStructure && isSegment(currNode)) {
SProcessingAnnotation sentence_idx = currNode.getProcessingAnnotation(SENTENCE_INDEX + "::" + SENTENCE_INDEX);
int index = sentence_idx == null ? -1 : Integer.parseInt(sentence_idx.getValue_STEXT());
data.put(SENTENCE_LEFT, index);
data.put(SENTENCE_RIGHT, index);
}
jsonData.put("data", data);
} catch (JSONException ex) {
log.error("problems create entry for {}", currNode, ex);
}
return jsonData;
}
Aggregations