use of org.corpus_tools.salt.core.SNode in project ANNIS by korpling.
the class RSTImpl method getOutGoingEdgeTypeAnnotation.
private JSONArray getOutGoingEdgeTypeAnnotation(SNode node) throws JSONException {
List<SRelation<SNode, SNode>> out = node.getGraph().getOutRelations(node.getId());
String type;
Set<SAnnotation> annos;
JSONArray edgeData = new JSONArray();
// check if there is a pointing relation
if (out == null) {
return edgeData;
}
for (SRelation<SNode, SNode> edge : out) {
if (!(edge instanceof SRelation) || edge.getTarget() instanceof SToken) {
continue;
}
type = ((SRelation) edge).getType();
String sTypeAsString = "edge";
if (type != null && !type.isEmpty()) {
sTypeAsString = type;
}
JSONObject jsonEdge = new JSONObject();
edgeData.put(jsonEdge);
jsonEdge.put("sType", sTypeAsString);
if (((SRelation) edge).getTarget() instanceof SNode) {
/**
* Invert the direction of the RST-edge.
*/
if (getRSTType().equals(sTypeAsString)) {
jsonEdge.put("to", getUniStrId(node));
jsonEdge.put("from", getUniStrId((SNode) ((SRelation) edge).getTarget()));
} else {
jsonEdge.put("from", getUniStrId(node));
jsonEdge.put("to", getUniStrId((SNode) ((SRelation) edge).getTarget()));
}
} else {
throw new JSONException("could not cast to SNode");
}
annos = edge.getAnnotations();
if (annos != null) {
for (SAnnotation anno : annos) {
getOrCreateArray(jsonEdge, "annotation").put(anno.getValue_STEXT());
}
}
}
return edgeData;
}
use of org.corpus_tools.salt.core.SNode in project ANNIS by korpling.
the class SaltAnnotateExtractorTest method testLayerNodes.
@Test
public void testLayerNodes() throws SQLException {
SaltProject project = instance.extractData(resultSetProviderSingleText.getResultSet());
assertNotNull(project);
SDocumentGraph g = project.getCorpusGraphs().get(0).getDocuments().get(0).getDocumentGraph();
List<SNode> n = new ArrayList<>(g.getLayerByName("exmaralda").get(0).getNodes());
Collections.sort(n, new NameComparator());
assertEquals(9, n.size());
assertEquals("Focus_newInfSeg_10", n.get(0).getName());
assertEquals("Focus_newInfSeg_9", n.get(1).getName());
assertEquals("Inf-StatSeg_29", n.get(2).getName());
assertEquals("Inf-StatSeg_30", n.get(3).getName());
assertEquals("NPSeg_29", n.get(4).getName());
assertEquals("NPSeg_30", n.get(5).getName());
assertEquals("PPSeg_7", n.get(6).getName());
assertEquals("SentSeg_10", n.get(7).getName());
assertEquals("SentSeg_9", n.get(8).getName());
n = new ArrayList<>(g.getLayerByName("mmax").get(0).getNodes());
Collections.sort(n, new NameComparator());
assertEquals(5, n.size());
assertEquals("primmarkSeg_1000154", n.get(0).getName());
assertEquals("primmarkSeg_60", n.get(1).getName());
assertEquals("sentenceSeg_50010", n.get(2).getName());
assertEquals("sentenceSeg_50011", n.get(3).getName());
assertEquals("sentenceSeg_5009", n.get(4).getName());
n = new ArrayList<>(g.getLayerByName("tiger").get(0).getNodes());
Collections.sort(n, new NameComparator());
assertEquals(10, n.size());
assertEquals("const_50", n.get(0).getName());
assertEquals("const_52", n.get(1).getName());
assertEquals("const_54", n.get(2).getName());
assertEquals("const_55", n.get(3).getName());
assertEquals("const_56", n.get(4).getName());
assertEquals("const_57", n.get(5).getName());
assertEquals("const_58", n.get(6).getName());
assertEquals("const_59", n.get(7).getName());
assertEquals("const_60", n.get(8).getName());
assertEquals("const_61", n.get(9).getName());
n = new ArrayList<>(g.getLayerByName("default_ns").get(0).getNodes());
Collections.sort(n, new NameComparator());
assertEquals(12, n.size());
assertEquals("tok_150", n.get(0).getName());
assertEquals("tok_151", n.get(1).getName());
assertEquals("tok_152", n.get(2).getName());
assertEquals("tok_153", n.get(3).getName());
assertEquals("tok_154", n.get(4).getName());
assertEquals("tok_155", n.get(5).getName());
assertEquals("tok_156", n.get(6).getName());
assertEquals("tok_157", n.get(7).getName());
assertEquals("tok_158", n.get(8).getName());
assertEquals("tok_159", n.get(9).getName());
assertEquals("tok_160", n.get(10).getName());
assertEquals("tok_161", n.get(11).getName());
n = new ArrayList<>(g.getLayerByName("rst").get(0).getNodes());
Collections.sort(n, new NameComparator());
assertEquals(9, n.size());
assertEquals("u0", n.get(0).getName());
assertEquals("u10", n.get(1).getName());
assertEquals("u11", n.get(2).getName());
assertEquals("u12", n.get(3).getName());
assertEquals("u20", n.get(4).getName());
assertEquals("u23", n.get(5).getName());
assertEquals("u24", n.get(6).getName());
assertEquals("u27", n.get(7).getName());
assertEquals("u28", n.get(8).getName());
assertEquals(0, g.getLayerByName("dep").get(0).getNodes().size());
}
use of org.corpus_tools.salt.core.SNode in project ANNIS by korpling.
the class TextColumnExporter method outputText.
/**
* Writes the specified record (if applicable, as multiple result lines) from query result set to the output file.
*
* @param graph the org.corpus_tools.salt.common.SDocumentGraph representation of a specified record
* @param alignmc a boolean, which indicates, whether the data should be aligned by match numbers or not
* @param recordNumber the number of record within the record set
* @param out the specified Writer
*
* @throws IOException, if an I/O error occurs
*/
@Override
public void outputText(SDocumentGraph graph, boolean alignmc, int recordNumber, Writer out) throws IOException {
String currSpeakerName = "";
String prevSpeakerName = "";
if (graph != null) {
List<SToken> orderedToken = graph.getSortedTokenByText();
if (orderedToken != null) {
// iterate over token
ListIterator<SToken> it = orderedToken.listIterator();
long lastTokenWasMatched = -1;
boolean noPreviousTokenInLine = false;
// if match number == 0, reset global variables and output warning, if necessary
if (recordNumber == 0) {
isFirstSpeakerWithMatch = true;
counterGlobal = 0;
// create warning message
String numbersString = "";
String warnMessage = "";
StringBuilder sb = new StringBuilder();
List<Integer> copyOfFilterNumbersSetByUser = new ArrayList<Integer>();
for (Long filterNumber : filterNumbersSetByUser) {
copyOfFilterNumbersSetByUser.add(Integer.parseInt(String.valueOf(filterNumber)));
}
for (Integer matchNumberGlobal : matchNumbersGlobal) {
copyOfFilterNumbersSetByUser.remove(matchNumberGlobal);
}
Collections.sort(copyOfFilterNumbersSetByUser);
if (!copyOfFilterNumbersSetByUser.isEmpty()) {
for (Integer filterNumber : copyOfFilterNumbersSetByUser) {
sb.append(filterNumber + ", ");
}
if (copyOfFilterNumbersSetByUser.size() == 1) {
numbersString = "number";
} else {
numbersString = "numbers";
}
warnMessage = "1. Filter " + numbersString + " " + sb.toString().substring(0, sb.lastIndexOf(",")) + " couldn't be represented.";
}
if (alignmc && !dataIsAlignable) {
if (!warnMessage.isEmpty()) {
warnMessage += (NEWLINE + NEWLINE + "2. ");
} else {
warnMessage += "1. ";
}
warnMessage += "You have tried to align matches by node number via check box." + "Unfortunately this option is not applicable for this data set, " + "so the data couldn't be aligned.";
}
if (!warnMessage.isEmpty()) {
String warnCaption = "Some export options couldn't be realized.";
Notification warn = new Notification(warnCaption, warnMessage, Notification.Type.WARNING_MESSAGE);
warn.setDelayMsec(20000);
warn.show(Page.getCurrent());
}
}
// global variables reset; warning issued
int matchesWrittenForSpeaker = 0;
while (it.hasNext()) {
SToken tok = it.next();
counterGlobal++;
// get current speaker name
String name;
if ((name = CommonHelper.getTextualDSForNode(tok, graph).getName()) == null) {
name = "";
}
currSpeakerName = (recordNumber + 1) + "_" + name;
// if speaker has no matches, skip token
if (speakerHasMatches.get(currSpeakerName) == false) {
prevSpeakerName = currSpeakerName;
// continue;
} else // if speaker has matches
{
// if the current speaker is new, write header and append his name
if (!currSpeakerName.equals(prevSpeakerName)) {
// reset the counter of matches, which were written for this speaker
matchesWrittenForSpeaker = 0;
if (isFirstSpeakerWithMatch) {
out.append("match_number" + TAB_MARK);
out.append("speaker" + TAB_MARK);
// write header for meta data columns
if (!listOfMetakeys.isEmpty()) {
for (String metakey : listOfMetakeys) {
out.append(metakey + TAB_MARK);
}
}
out.append("left_context" + TAB_MARK);
String prefixAlignmc = "match_";
String prefix = "match_column";
String middle_context = "middle_context_";
if (alignmc && dataIsAlignable) {
for (int i = 0; i < orderedMatchNumbersGlobal.size(); i++) {
out.append(prefixAlignmc + orderedMatchNumbersGlobal.get(i) + TAB_MARK);
if (i < orderedMatchNumbersGlobal.size() - 1) {
out.append(middle_context + (i + 1) + TAB_MARK);
}
}
} else {
for (int i = 0; i < maxMatchesPerLine; i++) {
out.append(prefix + TAB_MARK);
if (i < (maxMatchesPerLine - 1)) {
out.append(middle_context + (i + 1) + TAB_MARK);
}
}
}
out.append("right_context");
out.append(NEWLINE);
isFirstSpeakerWithMatch = false;
} else {
out.append(NEWLINE);
}
out.append(String.valueOf(recordNumber + 1) + TAB_MARK);
String trimmedName = "";
if (currSpeakerName.indexOf("_") < currSpeakerName.length()) {
trimmedName = currSpeakerName.substring(currSpeakerName.indexOf("_") + 1);
}
out.append(trimmedName + TAB_MARK);
// write meta data
if (!listOfMetakeys.isEmpty()) {
// get metadata
String docName = graph.getDocument().getName();
List<String> corpusPath = CommonHelper.getCorpusPath(graph.getDocument().getGraph(), graph.getDocument());
String corpusName = corpusPath.get(corpusPath.size() - 1);
corpusName = urlPathEscape.escape(corpusName);
List<Annotation> metadata = Helper.getMetaData(corpusName, docName);
Map<String, String> annosWithoutNamespace = new HashMap<String, String>();
Map<String, Map<String, String>> annosWithNamespace = new HashMap<String, Map<String, String>>();
// put metadata annotations into hash maps for better access
for (Annotation metaAnno : metadata) {
String ns;
Map<String, String> data = new HashMap<String, String>();
data.put(metaAnno.getName(), metaAnno.getValue());
// a namespace is present
if ((ns = metaAnno.getNamespace()) != null && !ns.isEmpty()) {
Map<String, String> nsMetadata = new HashMap<String, String>();
if (annosWithNamespace.get(ns) != null) {
nsMetadata = annosWithNamespace.get(ns);
}
nsMetadata.putAll(data);
annosWithNamespace.put(ns, nsMetadata);
} else {
annosWithoutNamespace.putAll(data);
}
}
for (String metakey : listOfMetakeys) {
String metaValue = "";
// try to get meta value specific for current speaker
if (!trimmedName.isEmpty() && annosWithNamespace.containsKey(trimmedName)) {
Map<String, String> speakerAnnos = annosWithNamespace.get(trimmedName);
if (speakerAnnos.containsKey(metakey)) {
metaValue = speakerAnnos.get(metakey).trim();
}
}
// try to get meta value, if metaValue is not set
if (metaValue.isEmpty() && annosWithoutNamespace.containsKey(metakey)) {
metaValue = annosWithoutNamespace.get(metakey).trim();
}
out.append(metaValue + TAB_MARK);
}
}
// metadata written
lastTokenWasMatched = -1;
noPreviousTokenInLine = true;
}
// header, speaker name and metadata ready
// default to space as separator
String separator = SPACE;
List<SNode> root = new LinkedList<>();
root.add(tok);
Long matchedNode;
// token matched
if ((matchedNode = tokenToMatchNumber.get(counterGlobal)) != null) {
// is dominated by a (new) matched node, thus use tab to separate the non-matches from the matches
if (lastTokenWasMatched < 0) {
if (alignmc && dataIsAlignable) {
int orderInList = orderedMatchNumbersGlobal.indexOf(matchedNode);
if (orderInList >= matchesWrittenForSpeaker) {
int diff = orderInList - matchesWrittenForSpeaker;
matchesWrittenForSpeaker++;
StringBuilder sb = new StringBuilder(TAB_MARK);
for (int i = 0; i < diff; i++) {
sb.append(TAB_MARK + TAB_MARK);
matchesWrittenForSpeaker++;
}
separator = sb.toString();
}
} else {
separator = TAB_MARK;
}
} else if (lastTokenWasMatched != matchedNode) {
// always leave an empty column between two matches, even if there is no actual context
if (alignmc && dataIsAlignable) {
int orderInList = orderedMatchNumbersGlobal.indexOf(matchedNode);
if (orderInList >= matchesWrittenForSpeaker) {
int diff = orderInList - matchesWrittenForSpeaker;
matchesWrittenForSpeaker++;
StringBuilder sb = new StringBuilder(TAB_MARK + TAB_MARK);
for (int i = 0; i < diff; i++) {
sb.append(TAB_MARK + TAB_MARK);
matchesWrittenForSpeaker++;
}
separator = sb.toString();
}
} else {
separator = TAB_MARK + TAB_MARK;
}
}
lastTokenWasMatched = matchedNode;
} else // token not matched, but last token matched
if (lastTokenWasMatched >= 0) {
// handle crossing edges
if (!tokenToMatchNumber.containsKey(counterGlobal) && tokenToMatchNumber.containsKey(counterGlobal - 1) && tokenToMatchNumber.containsKey(counterGlobal + 1)) {
if (Objects.equals(tokenToMatchNumber.get(counterGlobal - 1), tokenToMatchNumber.get(counterGlobal + 1))) {
separator = SPACE;
lastTokenWasMatched = tokenToMatchNumber.get(counterGlobal + 1);
} else {
separator = TAB_MARK;
lastTokenWasMatched = -1;
}
} else // mark the end of a match with the tab
{
separator = TAB_MARK;
lastTokenWasMatched = -1;
}
}
// if tok is the first token in the line and not matched, set separator to empty string
if (noPreviousTokenInLine && separator.equals(SPACE)) {
separator = "";
}
out.append(separator);
// append the current token
out.append(graph.getText(tok));
noPreviousTokenInLine = false;
prevSpeakerName = currSpeakerName;
}
}
}
}
}
use of org.corpus_tools.salt.core.SNode in project ANNIS by korpling.
the class CSVMultiTokExporter method outputText.
/**
* Takes a match and outputs a csv-line
*
* @param graph
* @param alignmc
* @param matchNumber
* @param out
*
* @throws java.io.IOException
*/
@Override
public void outputText(SDocumentGraph graph, boolean alignmc, int matchNumber, Writer out) throws IOException, IllegalArgumentException {
// first match
if (matchNumber == 0) {
// output header
List<String> headerLine = new ArrayList<>();
for (Map.Entry<Integer, TreeSet<String>> match : annotationsForMatchedNodes.entrySet()) {
int node_id = match.getKey();
headerLine.add(String.valueOf(node_id) + "_id");
headerLine.add(String.valueOf(node_id) + "_span");
for (String annoName : match.getValue()) {
headerLine.add(String.valueOf(node_id) + "_anno_" + annoName);
}
}
for (String key : metakeys) {
headerLine.add("meta_" + key);
}
out.append(StringUtils.join(headerLine, "\t"));
out.append("\n");
}
// output nodes in the order of the matches
SortedMap<Integer, String> contentLine = new TreeMap<>();
for (SNode node : this.getMatchedNodes(graph)) {
List<String> nodeLine = new ArrayList<>();
// export id
RelannisNodeFeature feats = RelannisNodeFeature.extract(node);
nodeLine.add(String.valueOf(feats.getInternalID()));
// export spanned text
String span = graph.getText(node);
if (span != null)
nodeLine.add(graph.getText(node));
else
nodeLine.add("");
// export annotations
int node_id = node.getFeature(AnnisConstants.ANNIS_NS, AnnisConstants.FEAT_MATCHEDNODE).getValue_SNUMERIC().intValue();
for (String annoName : annotationsForMatchedNodes.get(node_id)) {
SAnnotation anno = node.getAnnotation(annoName);
if (anno != null) {
nodeLine.add(anno.getValue_STEXT());
} else
nodeLine.add("'NULL'");
}
// add everything to line
contentLine.put(node_id, StringUtils.join(nodeLine, "\t"));
}
out.append(StringUtils.join(contentLine.values(), "\t"));
// TODO cache the metadata
if (!metakeys.isEmpty()) {
// TODO is this the best way to get the corpus name?
String corpus_name = CommonHelper.getCorpusPath(java.net.URI.create(graph.getDocument().getId())).get(0);
List<Annotation> asList = Helper.getMetaData(corpus_name, graph.getDocument().getName());
for (Annotation anno : asList) {
if (metakeys.contains(anno.getName()))
out.append("\t" + anno.getValue());
}
}
out.append("\n");
}
use of org.corpus_tools.salt.core.SNode in project ANNIS by korpling.
the class Helper method calculateMarkedAndCoveredIDs.
public static Map<String, Long> calculateMarkedAndCoveredIDs(SDocument doc, List<SNode> segNodes, String segmentationName) {
Map<String, Long> initialCovered = new HashMap<>();
// add all covered nodes
for (SNode n : doc.getDocumentGraph().getNodes()) {
SFeature featMatched = n.getFeature(ANNIS_NS, FEAT_MATCHEDNODE);
Long match = featMatched == null ? null : featMatched.getValue_SNUMERIC();
if (match != null) {
initialCovered.put(n.getId(), match);
}
}
// calculate covered nodes
CoveredMatchesCalculator cmc = new CoveredMatchesCalculator(doc.getDocumentGraph(), initialCovered);
Map<String, Long> covered = cmc.getMatchedAndCovered();
if (segmentationName != null) {
// filter token
Map<SToken, Long> coveredToken = new HashMap<>();
for (Map.Entry<String, Long> e : covered.entrySet()) {
SNode n = doc.getDocumentGraph().getNode(e.getKey());
if (n instanceof SToken) {
coveredToken.put((SToken) n, e.getValue());
}
}
for (SNode segNode : segNodes) {
RelannisNodeFeature featSegNode = (RelannisNodeFeature) segNode.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
if (!covered.containsKey(segNode.getId())) {
long leftTok = featSegNode.getLeftToken();
long rightTok = featSegNode.getRightToken();
// check for each covered token if this segment is covering it
for (Map.Entry<SToken, Long> e : coveredToken.entrySet()) {
RelannisNodeFeature featTok = (RelannisNodeFeature) e.getKey().getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
long entryTokenIndex = featTok.getTokenIndex();
if (entryTokenIndex <= rightTok && entryTokenIndex >= leftTok) {
// add this segmentation node to the covered set
covered.put(segNode.getId(), e.getValue());
break;
}
}
// end for each covered token
}
// end if not already contained
}
// end for each segmentation node
}
return covered;
}
Aggregations