use of org.corpus_tools.salt.util.DataSourceSequence in project ANNIS by korpling.
the class CorefVisualizer method writeOutput.
/**
* writes Output for the CorefVisualizer
* @param writer writer to write with
*/
@Override
public void writeOutput(VisualizerInput input, Writer w) {
// root html element
Html html = new Html();
Head head = new Head();
Body body = new Body();
html.removeXmlns();
html.appendChild(head);
html.appendChild(body);
try {
LinkedList<String> fonts = new LinkedList<String>();
if (input.getFont() != null) {
Link linkFont = new Link();
linkFont.setHref(input.getFont().getUrl());
head.appendChild(linkFont);
fonts.add(input.getFont().getName());
}
fonts.add("serif");
Link linkJQueryUI = new Link();
linkJQueryUI.setHref(input.getResourcePath("coref/jquery-ui-1.11.4.custom/jquery-ui.min.css"));
linkJQueryUI.setRel("stylesheet");
linkJQueryUI.setType("text/css");
head.appendChild(linkJQueryUI);
Link linkJQueryUIStructure = new Link();
linkJQueryUIStructure.setHref(input.getResourcePath("coref/jquery-ui-1.11.4.custom/jquery-ui.structure.min.css"));
linkJQueryUIStructure.setRel("stylesheet");
linkJQueryUIStructure.setType("text/css");
head.appendChild(linkJQueryUIStructure);
Script scriptJquery = new Script("text/javascript");
scriptJquery.setSrc(input.getResourcePath("coref/jquery.js"));
head.appendChild(scriptJquery);
Script scriptUI = new Script("text/javascript");
scriptUI.setSrc(input.getResourcePath("coref/jquery-ui-1.11.4.custom/jquery-ui.min.js"));
head.appendChild(scriptUI);
Link linkCoref = new Link();
linkCoref.setHref(input.getResourcePath("coref/coref.css"));
linkCoref.setRel("stylesheet");
linkCoref.setType("text/css");
head.appendChild(linkCoref);
Script scriptCoref = new Script("text/javascript");
scriptCoref.setSrc(input.getResourcePath("coref/CorefVisualizer.js"));
head.appendChild(scriptCoref);
body.setStyle("font-family: '" + StringUtils.join(fonts, "', '") + "';");
// get Info
globalIndex = 0;
tokensOfNode = new HashMap<String, List<String>>();
referentList = new LinkedList<TReferent>();
komponent = new LinkedList<TComponent>();
referentOfToken = new HashMap<String, HashMap<Long, Integer>>();
componentOfToken = new HashMap<String, List<Long>>();
componenttype = new LinkedList<TComponenttype>();
SDocument saltDoc = input.getDocument();
SDocumentGraph saltGraph = saltDoc.getDocumentGraph();
if (saltGraph == null) {
body.setText("An Error occured: Could not get Graph of Result (Graph == null).");
return;
}
List<SRelation<SNode, SNode>> edgeList = saltGraph.getRelations();
if (edgeList == null) {
return;
}
for (SRelation rawRel : edgeList) {
if (includeEdge(rawRel, input.getNamespace())) {
SPointingRelation rel = (SPointingRelation) rawRel;
String relType = componentNameForRelation(rel);
visitedNodes = new LinkedList<String>();
// got type for this?
boolean gotIt = false;
int componentnr;
for (componentnr = 0; componentnr < componenttype.size(); componentnr++) {
if (componenttype.get(componentnr) != null && componenttype.get(componentnr).type != null && componenttype.get(componentnr).nodeList != null && componenttype.get(componentnr).type.equals(relType) && componenttype.get(componentnr).nodeList.contains(rel.getSource().getId())) {
gotIt = true;
break;
}
}
TComponent currentComponent;
TComponenttype currentComponenttype;
if (gotIt) {
currentComponent = komponent.get(componentnr);
currentComponenttype = componenttype.get(componentnr);
} else {
currentComponenttype = new TComponenttype();
currentComponenttype.type = relType;
componenttype.add(currentComponenttype);
componentnr = komponent.size();
currentComponent = new TComponent();
currentComponent.type = relType;
currentComponent.tokenList = new LinkedList<String>();
komponent.add(currentComponent);
currentComponenttype.nodeList.add(rel.getSource().getId());
}
TReferent ref = new TReferent();
ref.annotations = new HashSet<SerializableAnnotation>();
for (SAnnotation anno : rel.getAnnotations()) {
ref.annotations.add(new SerializableAnnotation(anno));
}
ref.component = componentnr;
referentList.add(ref);
List<String> currentTokens = getAllTokens(rel.getSource(), componentNameForRelation(rel), currentComponenttype, componentnr, input.getNamespace());
// neu
setReferent(rel.getTarget(), globalIndex, 0);
// neu
setReferent(rel.getSource(), globalIndex, 1);
for (String s : currentTokens) {
if (!currentComponent.tokenList.contains(s)) {
currentComponent.tokenList.add(s);
}
}
globalIndex++;
}
}
colorlist = new HashMap<Integer, Integer>();
// A list containing all the generated HTML elements, one list entry
// for each text.
List<List<Node>> nodesPerText = new LinkedList<List<Node>>();
// write output for each text separatly
List<STextualDS> texts = saltGraph.getTextualDSs();
if (texts != null && !texts.isEmpty()) {
for (STextualDS t : texts) {
DataSourceSequence<Integer> sequence = new DataSourceSequence<>(t, 0, (t.getText() != null) ? t.getText().length() : 0);
List<SToken> token = saltGraph.getSortedTokenByText(saltGraph.getTokensBySequence(sequence));
if (token != null) {
boolean validText = true;
if (Boolean.parseBoolean(input.getMappings().getProperty("hide_empty", "false"))) {
validText = false;
// check if the text contains any matching annotations
for (SToken tok : token) {
/*
* The token is only added to this map if an valid edge
* (according to the resolver trigger) conntected to
* this token was found.
*/
if (referentOfToken.get(tok.getId()) != null && !referentOfToken.get(tok.getId()).isEmpty()) {
validText = true;
break;
}
}
}
if (validText) {
List<Node> nodes = outputSingleText(token, input);
nodesPerText.add(nodes);
}
}
}
// end for each STexutalDS
/*
* Append the generated output to the body, wrap in table if necessary.
*/
// present all texts as columns side by side if using multiple texts
Table tableTexts = new Table();
Tr trTextRow = new Tr();
trTextRow.setCSSClass("textRow");
// only append wrapper table if we have multiple texts
if (nodesPerText.size() > 1) {
body.appendChild(tableTexts);
tableTexts.appendChild(trTextRow);
}
for (List<Node> nodes : nodesPerText) {
// multi-text mode?
if (nodesPerText.size() > 1) {
Td tdSingleText = new Td();
trTextRow.appendChild(tdSingleText);
tdSingleText.setCSSClass("text");
tdSingleText.appendChild(nodes);
} else {
body.appendChild(nodes);
}
}
} else {
Text errorTxt = new Text("Could not find any texts for the " + input.getNamespace() + " node namespace (layer).");
body.appendChild(errorTxt);
}
// write HTML4 transitional doctype
w.append(new Doctype(DocumentType.HTMLTransitional).write());
// append the html tree
w.append(html.write());
} catch (IOException ex) {
log.error(null, ex);
}
}
use of org.corpus_tools.salt.util.DataSourceSequence in project ANNIS by korpling.
the class RSTImpl method getText.
/**
* Gets the overlapping token as string from a node, which are direct
* dominated by this node.
*
* @param currNode
* @return is null, if there is no relation to a token, or there is more then
* one STEXT is overlapped by this node
*/
private String getText(SToken currNode) {
List<DataSourceSequence> sSequences = ((SDocumentGraph) currNode.getGraph()).getOverlappedDataSourceSequence(currNode, SALT_TYPE.STEXT_OVERLAPPING_RELATION);
// only support one text for spanns
if (sSequences == null || sSequences.size() != 1) {
log.error("rst supports only one text and only text level");
return null;
}
log.debug("sSequences {}", sSequences.toString());
/**
* Check if it is a text data structure. As described in the salt manual in
* chapter "5.8 More specific nodes and relations" the start and end point
* of a range of token is stored in superordinate node of type SSequentialDS
*/
if (sSequences.get(0).getDataSource() instanceof STextualDS) {
STextualDS text = ((STextualDS) sSequences.get(0).getDataSource());
int start = sSequences.get(0).getStart().intValue();
int end = sSequences.get(0).getEnd().intValue();
return text.getText().substring(start, end);
}
// something fundamentally goes wrong
log.error("{} instead of {}", sSequences.get(0).getDataSource().getClass().getName(), STextualDS.class.getName());
return null;
}
use of org.corpus_tools.salt.util.DataSourceSequence in project ANNIS by korpling.
the class LegacyGraphConverter method convertToAnnotationGraph.
public static AnnotationGraph convertToAnnotationGraph(SDocumentGraph docGraph, List<Long> matchedNodeIDs) {
Set<Long> matchSet = new HashSet<>(matchedNodeIDs);
AnnotationGraph annoGraph = new AnnotationGraph();
List<String> pathList = CommonHelper.getCorpusPath(docGraph.getDocument().getGraph(), docGraph.getDocument());
annoGraph.setPath(pathList.toArray(new String[pathList.size()]));
annoGraph.setDocumentName(docGraph.getDocument().getName());
Map<SNode, AnnisNode> allNodes = new HashMap<>();
for (SNode sNode : docGraph.getNodes()) {
SFeature featNodeRaw = sNode.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE));
if (featNodeRaw != null) {
RelannisNodeFeature featNode = (RelannisNodeFeature) featNodeRaw.getValue();
long internalID = featNode.getInternalID();
AnnisNode aNode = new AnnisNode(internalID);
for (SAnnotation sAnno : sNode.getAnnotations()) {
aNode.addNodeAnnotation(new Annotation(sAnno.getNamespace(), sAnno.getName(), sAnno.getValue_STEXT()));
}
aNode.setName(sNode.getName());
Set<SLayer> layers = sNode.getLayers();
if (!layers.isEmpty()) {
aNode.setNamespace(layers.iterator().next().getName());
}
RelannisNodeFeature feat = (RelannisNodeFeature) sNode.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE)).getValue();
if (sNode instanceof SToken) {
List<DataSourceSequence> seqList = docGraph.getOverlappedDataSourceSequence(sNode, SALT_TYPE.STEXT_OVERLAPPING_RELATION);
if (seqList != null) {
DataSourceSequence seq = seqList.get(0);
Preconditions.checkNotNull(seq, "DataSourceSequence is null for token %s", sNode.getId());
SSequentialDS seqDS = seq.getDataSource();
Preconditions.checkNotNull(seqDS, "SSequentalDS is null for token %s", sNode.getId());
Preconditions.checkNotNull(seqDS.getData(), "SSequentalDS data is null for token %s", sNode.getId());
String seqDSData = (String) seqDS.getData();
Preconditions.checkNotNull(seqDSData, "casted SSequentalDS data is null for token %s", sNode.getId());
Preconditions.checkNotNull(seq.getStart(), "SSequentalDS start is null for token %s", sNode.getId());
Preconditions.checkNotNull(seq.getEnd(), "SSequentalDS end is null for supposed token %s", sNode.getId());
int start = seq.getStart().intValue();
int end = seq.getEnd().intValue();
Preconditions.checkState(start >= 0 && start <= end && end <= seqDSData.length(), "Illegal start or end of textual DS for token (start %s, end: %s)", sNode.getId(), start, end);
String spannedText = seqDSData.substring(start, end);
Preconditions.checkNotNull(spannedText, "spanned text is null for supposed token %s (start: %s, end: %s)", sNode.getId(), start, end);
aNode.setSpannedText(spannedText);
aNode.setToken(true);
aNode.setTokenIndex(feat.getTokenIndex());
}
} else {
aNode.setToken(false);
aNode.setTokenIndex(null);
}
aNode.setCorpus(feat.getCorpusRef());
aNode.setTextId(feat.getTextRef());
aNode.setLeft(feat.getLeft());
aNode.setLeftToken(feat.getLeftToken());
aNode.setRight(feat.getRight());
aNode.setRightToken(feat.getRightToken());
if (matchSet.contains(aNode.getId())) {
aNode.setMatchedNodeInQuery((long) matchedNodeIDs.indexOf(aNode.getId()) + 1);
annoGraph.getMatchedNodeIds().add(aNode.getId());
} else {
aNode.setMatchedNodeInQuery(null);
}
annoGraph.addNode(aNode);
allNodes.put(sNode, aNode);
}
}
for (SRelation rel : docGraph.getRelations()) {
RelannisEdgeFeature featRelation = RelannisEdgeFeature.extract(rel);
if (featRelation != null) {
addRelation(rel, featRelation.getPre(), featRelation.getComponentID(), allNodes, annoGraph);
}
}
// add relations with empty relation name for every dominance relation
List<SDominanceRelation> dominanceRelations = new LinkedList<>(docGraph.getDominanceRelations());
for (SDominanceRelation rel : dominanceRelations) {
RelannisEdgeFeature featEdge = RelannisEdgeFeature.extract(rel);
if (featEdge != null && featEdge.getArtificialDominanceComponent() != null && featEdge.getArtificialDominancePre() != null) {
addRelation(SDominanceRelation.class, null, rel.getAnnotations(), rel.getSource(), rel.getTarget(), rel.getLayers(), featEdge.getArtificialDominancePre(), featEdge.getArtificialDominanceComponent(), allNodes, annoGraph);
}
}
return annoGraph;
}
use of org.corpus_tools.salt.util.DataSourceSequence in project ANNIS by korpling.
the class TimelineReconstructorTest method testBematacDialog.
/**
* Tests a sample dialog reconstruction.
* The dialog is this one: https://korpling.org/annis3/?id=44b60a56-31da-4469-b438-62fdb67f28f1
*
* The Salt which was generated by ANNIS is loaded and the virtual tokenization is removed.
* It is checked if
* <ul>
* <li>the newly created tokenization is correct</li>
* <li>spans cover the correct token</li>
* </ul>
*/
@Test
public void testBematacDialog() {
SDocumentGraph docGraph = SaltUtil.loadDocumentGraph(URI.createURI(getClass().getResource("SampleDialog.salt").toString()));
Map<String, String> anno2order = new HashMap<>();
anno2order.put("default_ns::instructee_utt", "instructee_dipl");
anno2order.put("default_ns::instructor_utt", "instructor_dipl");
TimelineReconstructor.removeVirtualTokenization(docGraph, anno2order);
// instructor_dipl, instructor_norm, instructee_dipl, instructee_norm, instructee_extra, break
List<STextualDS> texts = docGraph.getTextualDSs();
assertEquals(6, texts.size());
STextualDS instructorDipl = findTextualDSByName("instructor_dipl", texts);
assertNotNull(instructorDipl);
assertEquals("in Richtung des Toasters gehst ja gehst", instructorDipl.getText());
DataSourceSequence<Integer> seq = new DataSourceSequence<>();
seq.setDataSource(instructorDipl);
seq.setStart(instructorDipl.getStart());
seq.setEnd(instructorDipl.getEnd());
List<SToken> instructorDiplToken = docGraph.getTokensBySequence(seq);
assertEquals(7, instructorDiplToken.size());
assertEquals("in", docGraph.getText(instructorDiplToken.get(0)));
assertEquals("Richtung", docGraph.getText(instructorDiplToken.get(1)));
assertEquals("des", docGraph.getText(instructorDiplToken.get(2)));
assertEquals("Toasters", docGraph.getText(instructorDiplToken.get(3)));
assertEquals("gehst", docGraph.getText(instructorDiplToken.get(4)));
assertEquals("ja", docGraph.getText(instructorDiplToken.get(5)));
assertEquals("gehst", docGraph.getText(instructorDiplToken.get(6)));
// check that the other real spans are now connected with the token
List<SNode> uttNode = docGraph.getNodesByName("sSpan1294");
assertNotNull(uttNode);
assertEquals(1, uttNode.size());
SAnnotation uttAnno = uttNode.get(0).getAnnotation("default_ns::instructor_utt");
assertNotNull(uttAnno);
assertEquals("utt", uttAnno.getValue_STEXT());
List<SRelation> uttOutRelations = uttNode.get(0).getOutRelations();
assertNotNull(uttOutRelations);
assertEquals(5, uttOutRelations.size());
for (SRelation rel : uttOutRelations) {
assertTrue(rel instanceof SSpanningRelation);
assertEquals(instructorDipl, CommonHelper.getTextualDSForNode((SNode) rel.getTarget(), docGraph));
}
STextualDS instructorNorm = findTextualDSByName("instructor_norm", texts);
assertNotNull(instructorNorm);
assertEquals("in Richtung des Toasters gehst ja gehst", instructorNorm.getText());
STextualDS instructeeDipl = findTextualDSByName("instructee_dipl", texts);
assertNotNull(instructeeDipl);
assertEquals("mhm ich geh in Richtung des Toasters okay", instructeeDipl.getText());
STextualDS instructeeNorm = findTextualDSByName("instructee_norm", texts);
assertNotNull(instructeeNorm);
assertEquals("ich gehe in Richtung des Toasters okay", instructeeNorm.getText());
STextualDS instructeeExtra = findTextualDSByName("instructee_extra", texts);
assertNotNull(instructeeExtra);
assertEquals("zeichnet", instructeeExtra.getText());
STextualDS breakText = findTextualDSByName("break", texts);
assertNotNull(breakText);
assertEquals("0,7 0,5", breakText.getText());
}
use of org.corpus_tools.salt.util.DataSourceSequence in project ANNIS by korpling.
the class VakyarthaDependencyTree method getText.
/**
* Get the text which is overlapped by the SNode.
*
* @return Empty string, if there are no token overlapped by the node.
*/
private String getText(SNode node, VisualizerInput input) {
SDocumentGraph sDocumentGraph = input.getSResult().getDocumentGraph();
List<DataSourceSequence> sequences = sDocumentGraph.getOverlappedDataSourceSequence(node, SALT_TYPE.STEXT_OVERLAPPING_RELATION);
if (sequences != null && sequences.size() > 0) {
return ((STextualDS) sequences.get(0).getDataSource()).getText().substring(sequences.get(0).getStart().intValue(), sequences.get(0).getEnd().intValue());
}
return "";
}
Aggregations