use of org.corpus_tools.salt.core.SAnnotation in project ANNIS by korpling.
the class RSTImpl method getOutGoingEdgeTypeAnnotation.
private JSONArray getOutGoingEdgeTypeAnnotation(SNode node) throws JSONException {
List<SRelation<SNode, SNode>> out = node.getGraph().getOutRelations(node.getId());
String type;
Set<SAnnotation> annos;
JSONArray edgeData = new JSONArray();
// check if there is a pointing relation
if (out == null) {
return edgeData;
}
for (SRelation<SNode, SNode> edge : out) {
if (!(edge instanceof SRelation) || edge.getTarget() instanceof SToken) {
continue;
}
type = ((SRelation) edge).getType();
String sTypeAsString = "edge";
if (type != null && !type.isEmpty()) {
sTypeAsString = type;
}
JSONObject jsonEdge = new JSONObject();
edgeData.put(jsonEdge);
jsonEdge.put("sType", sTypeAsString);
if (((SRelation) edge).getTarget() instanceof SNode) {
/**
* Invert the direction of the RST-edge.
*/
if (getRSTType().equals(sTypeAsString)) {
jsonEdge.put("to", getUniStrId(node));
jsonEdge.put("from", getUniStrId((SNode) ((SRelation) edge).getTarget()));
} else {
jsonEdge.put("from", getUniStrId(node));
jsonEdge.put("to", getUniStrId((SNode) ((SRelation) edge).getTarget()));
}
} else {
throw new JSONException("could not cast to SNode");
}
annos = edge.getAnnotations();
if (annos != null) {
for (SAnnotation anno : annos) {
getOrCreateArray(jsonEdge, "annotation").put(anno.getValue_STEXT());
}
}
}
return edgeData;
}
use of org.corpus_tools.salt.core.SAnnotation in project ANNIS by korpling.
the class CSVMultiTokExporter method outputText.
/**
* Takes a match and outputs a csv-line
*
* @param graph
* @param alignmc
* @param matchNumber
* @param out
*
* @throws java.io.IOException
*/
@Override
public void outputText(SDocumentGraph graph, boolean alignmc, int matchNumber, Writer out) throws IOException, IllegalArgumentException {
// first match
if (matchNumber == 0) {
// output header
List<String> headerLine = new ArrayList<>();
for (Map.Entry<Integer, TreeSet<String>> match : annotationsForMatchedNodes.entrySet()) {
int node_id = match.getKey();
headerLine.add(String.valueOf(node_id) + "_id");
headerLine.add(String.valueOf(node_id) + "_span");
for (String annoName : match.getValue()) {
headerLine.add(String.valueOf(node_id) + "_anno_" + annoName);
}
}
for (String key : metakeys) {
headerLine.add("meta_" + key);
}
out.append(StringUtils.join(headerLine, "\t"));
out.append("\n");
}
// output nodes in the order of the matches
SortedMap<Integer, String> contentLine = new TreeMap<>();
for (SNode node : this.getMatchedNodes(graph)) {
List<String> nodeLine = new ArrayList<>();
// export id
RelannisNodeFeature feats = RelannisNodeFeature.extract(node);
nodeLine.add(String.valueOf(feats.getInternalID()));
// export spanned text
String span = graph.getText(node);
if (span != null)
nodeLine.add(graph.getText(node));
else
nodeLine.add("");
// export annotations
int node_id = node.getFeature(AnnisConstants.ANNIS_NS, AnnisConstants.FEAT_MATCHEDNODE).getValue_SNUMERIC().intValue();
for (String annoName : annotationsForMatchedNodes.get(node_id)) {
SAnnotation anno = node.getAnnotation(annoName);
if (anno != null) {
nodeLine.add(anno.getValue_STEXT());
} else
nodeLine.add("'NULL'");
}
// add everything to line
contentLine.put(node_id, StringUtils.join(nodeLine, "\t"));
}
out.append(StringUtils.join(contentLine.values(), "\t"));
// TODO cache the metadata
if (!metakeys.isEmpty()) {
// TODO is this the best way to get the corpus name?
String corpus_name = CommonHelper.getCorpusPath(java.net.URI.create(graph.getDocument().getId())).get(0);
List<Annotation> asList = Helper.getMetaData(corpus_name, graph.getDocument().getName());
for (Annotation anno : asList) {
if (metakeys.contains(anno.getName()))
out.append("\t" + anno.getValue());
}
}
out.append("\n");
}
use of org.corpus_tools.salt.core.SAnnotation in project ANNIS by korpling.
the class LegacyGraphConverter method addRelation.
private static void addRelation(Class<? extends SRelation> clazz, String type, Collection<SAnnotation> annotations, SNode source, SNode target, Set<SLayer> relLayers, long pre, long componentID, Map<SNode, AnnisNode> allNodes, AnnotationGraph annoGraph) {
Edge aEdge = new Edge();
aEdge.setSource(allNodes.get(source));
aEdge.setDestination(allNodes.get(target));
aEdge.setEdgeType(EdgeType.UNKNOWN);
aEdge.setPre(pre);
aEdge.setComponentID(componentID);
if (!relLayers.isEmpty()) {
aEdge.setNamespace(relLayers.iterator().next().getName());
}
aEdge.setName(type);
if (SDominanceRelation.class.isAssignableFrom(clazz)) {
aEdge.setEdgeType(EdgeType.DOMINANCE);
} else if (SPointingRelation.class.isAssignableFrom(clazz)) {
aEdge.setEdgeType(EdgeType.POINTING_RELATION);
} else if (SSpanningRelation.class.isAssignableFrom(clazz)) {
aEdge.setEdgeType(EdgeType.COVERAGE);
}
for (SAnnotation sAnno : annotations) {
aEdge.addAnnotation(new Annotation(sAnno.getNamespace(), sAnno.getName(), sAnno.getValue_STEXT()));
}
annoGraph.addEdge(aEdge);
aEdge.getDestination().addIncomingEdge(aEdge);
if (aEdge.getSource() != null) {
aEdge.getSource().addOutgoingEdge(aEdge);
}
}
use of org.corpus_tools.salt.core.SAnnotation in project ANNIS by korpling.
the class LegacyGraphConverter method convertToAnnotationGraph.
public static AnnotationGraph convertToAnnotationGraph(SDocumentGraph docGraph, List<Long> matchedNodeIDs) {
Set<Long> matchSet = new HashSet<>(matchedNodeIDs);
AnnotationGraph annoGraph = new AnnotationGraph();
List<String> pathList = CommonHelper.getCorpusPath(docGraph.getDocument().getGraph(), docGraph.getDocument());
annoGraph.setPath(pathList.toArray(new String[pathList.size()]));
annoGraph.setDocumentName(docGraph.getDocument().getName());
Map<SNode, AnnisNode> allNodes = new HashMap<>();
for (SNode sNode : docGraph.getNodes()) {
SFeature featNodeRaw = sNode.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE));
if (featNodeRaw != null) {
RelannisNodeFeature featNode = (RelannisNodeFeature) featNodeRaw.getValue();
long internalID = featNode.getInternalID();
AnnisNode aNode = new AnnisNode(internalID);
for (SAnnotation sAnno : sNode.getAnnotations()) {
aNode.addNodeAnnotation(new Annotation(sAnno.getNamespace(), sAnno.getName(), sAnno.getValue_STEXT()));
}
aNode.setName(sNode.getName());
Set<SLayer> layers = sNode.getLayers();
if (!layers.isEmpty()) {
aNode.setNamespace(layers.iterator().next().getName());
}
RelannisNodeFeature feat = (RelannisNodeFeature) sNode.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE)).getValue();
if (sNode instanceof SToken) {
List<DataSourceSequence> seqList = docGraph.getOverlappedDataSourceSequence(sNode, SALT_TYPE.STEXT_OVERLAPPING_RELATION);
if (seqList != null) {
DataSourceSequence seq = seqList.get(0);
Preconditions.checkNotNull(seq, "DataSourceSequence is null for token %s", sNode.getId());
SSequentialDS seqDS = seq.getDataSource();
Preconditions.checkNotNull(seqDS, "SSequentalDS is null for token %s", sNode.getId());
Preconditions.checkNotNull(seqDS.getData(), "SSequentalDS data is null for token %s", sNode.getId());
String seqDSData = (String) seqDS.getData();
Preconditions.checkNotNull(seqDSData, "casted SSequentalDS data is null for token %s", sNode.getId());
Preconditions.checkNotNull(seq.getStart(), "SSequentalDS start is null for token %s", sNode.getId());
Preconditions.checkNotNull(seq.getEnd(), "SSequentalDS end is null for supposed token %s", sNode.getId());
int start = seq.getStart().intValue();
int end = seq.getEnd().intValue();
Preconditions.checkState(start >= 0 && start <= end && end <= seqDSData.length(), "Illegal start or end of textual DS for token (start %s, end: %s)", sNode.getId(), start, end);
String spannedText = seqDSData.substring(start, end);
Preconditions.checkNotNull(spannedText, "spanned text is null for supposed token %s (start: %s, end: %s)", sNode.getId(), start, end);
aNode.setSpannedText(spannedText);
aNode.setToken(true);
aNode.setTokenIndex(feat.getTokenIndex());
}
} else {
aNode.setToken(false);
aNode.setTokenIndex(null);
}
aNode.setCorpus(feat.getCorpusRef());
aNode.setTextId(feat.getTextRef());
aNode.setLeft(feat.getLeft());
aNode.setLeftToken(feat.getLeftToken());
aNode.setRight(feat.getRight());
aNode.setRightToken(feat.getRightToken());
if (matchSet.contains(aNode.getId())) {
aNode.setMatchedNodeInQuery((long) matchedNodeIDs.indexOf(aNode.getId()) + 1);
annoGraph.getMatchedNodeIds().add(aNode.getId());
} else {
aNode.setMatchedNodeInQuery(null);
}
annoGraph.addNode(aNode);
allNodes.put(sNode, aNode);
}
}
for (SRelation rel : docGraph.getRelations()) {
RelannisEdgeFeature featRelation = RelannisEdgeFeature.extract(rel);
if (featRelation != null) {
addRelation(rel, featRelation.getPre(), featRelation.getComponentID(), allNodes, annoGraph);
}
}
// add relations with empty relation name for every dominance relation
List<SDominanceRelation> dominanceRelations = new LinkedList<>(docGraph.getDominanceRelations());
for (SDominanceRelation rel : dominanceRelations) {
RelannisEdgeFeature featEdge = RelannisEdgeFeature.extract(rel);
if (featEdge != null && featEdge.getArtificialDominanceComponent() != null && featEdge.getArtificialDominancePre() != null) {
addRelation(SDominanceRelation.class, null, rel.getAnnotations(), rel.getSource(), rel.getTarget(), rel.getLayers(), featEdge.getArtificialDominancePre(), featEdge.getArtificialDominanceComponent(), allNodes, annoGraph);
}
}
return annoGraph;
}
use of org.corpus_tools.salt.core.SAnnotation in project ANNIS by korpling.
the class TimelineReconstructorTest method testBematacDialog.
/**
* Tests a sample dialog reconstruction.
* The dialog is this one: https://korpling.org/annis3/?id=44b60a56-31da-4469-b438-62fdb67f28f1
*
* The Salt which was generated by ANNIS is loaded and the virtual tokenization is removed.
* It is checked if
* <ul>
* <li>the newly created tokenization is correct</li>
* <li>spans cover the correct token</li>
* </ul>
*/
@Test
public void testBematacDialog() {
SDocumentGraph docGraph = SaltUtil.loadDocumentGraph(URI.createURI(getClass().getResource("SampleDialog.salt").toString()));
Map<String, String> anno2order = new HashMap<>();
anno2order.put("default_ns::instructee_utt", "instructee_dipl");
anno2order.put("default_ns::instructor_utt", "instructor_dipl");
TimelineReconstructor.removeVirtualTokenization(docGraph, anno2order);
// instructor_dipl, instructor_norm, instructee_dipl, instructee_norm, instructee_extra, break
List<STextualDS> texts = docGraph.getTextualDSs();
assertEquals(6, texts.size());
STextualDS instructorDipl = findTextualDSByName("instructor_dipl", texts);
assertNotNull(instructorDipl);
assertEquals("in Richtung des Toasters gehst ja gehst", instructorDipl.getText());
DataSourceSequence<Integer> seq = new DataSourceSequence<>();
seq.setDataSource(instructorDipl);
seq.setStart(instructorDipl.getStart());
seq.setEnd(instructorDipl.getEnd());
List<SToken> instructorDiplToken = docGraph.getTokensBySequence(seq);
assertEquals(7, instructorDiplToken.size());
assertEquals("in", docGraph.getText(instructorDiplToken.get(0)));
assertEquals("Richtung", docGraph.getText(instructorDiplToken.get(1)));
assertEquals("des", docGraph.getText(instructorDiplToken.get(2)));
assertEquals("Toasters", docGraph.getText(instructorDiplToken.get(3)));
assertEquals("gehst", docGraph.getText(instructorDiplToken.get(4)));
assertEquals("ja", docGraph.getText(instructorDiplToken.get(5)));
assertEquals("gehst", docGraph.getText(instructorDiplToken.get(6)));
// check that the other real spans are now connected with the token
List<SNode> uttNode = docGraph.getNodesByName("sSpan1294");
assertNotNull(uttNode);
assertEquals(1, uttNode.size());
SAnnotation uttAnno = uttNode.get(0).getAnnotation("default_ns::instructor_utt");
assertNotNull(uttAnno);
assertEquals("utt", uttAnno.getValue_STEXT());
List<SRelation> uttOutRelations = uttNode.get(0).getOutRelations();
assertNotNull(uttOutRelations);
assertEquals(5, uttOutRelations.size());
for (SRelation rel : uttOutRelations) {
assertTrue(rel instanceof SSpanningRelation);
assertEquals(instructorDipl, CommonHelper.getTextualDSForNode((SNode) rel.getTarget(), docGraph));
}
STextualDS instructorNorm = findTextualDSByName("instructor_norm", texts);
assertNotNull(instructorNorm);
assertEquals("in Richtung des Toasters gehst ja gehst", instructorNorm.getText());
STextualDS instructeeDipl = findTextualDSByName("instructee_dipl", texts);
assertNotNull(instructeeDipl);
assertEquals("mhm ich geh in Richtung des Toasters okay", instructeeDipl.getText());
STextualDS instructeeNorm = findTextualDSByName("instructee_norm", texts);
assertNotNull(instructeeNorm);
assertEquals("ich gehe in Richtung des Toasters okay", instructeeNorm.getText());
STextualDS instructeeExtra = findTextualDSByName("instructee_extra", texts);
assertNotNull(instructeeExtra);
assertEquals("zeichnet", instructeeExtra.getText());
STextualDS breakText = findTextualDSByName("break", texts);
assertNotNull(breakText);
assertEquals("0,7 0,5", breakText.getText());
}
Aggregations