use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class EventExtractor method computeDisplayAnnotations.
/**
* Returns the annotations to display according to the mappings configuration.
*
* This will check the "annos" and "annos_regex" paramters for determining.
* the annotations to display. It also iterates over all nodes of the graph
* matching the type.
*
* @param input The input for the visualizer.
* @param type Which type of nodes to include
* @return
*/
public static List<String> computeDisplayAnnotations(VisualizerInput input, Class<? extends SNode> type) {
if (input == null) {
return new LinkedList<>();
}
SDocumentGraph graph = input.getDocument().getDocumentGraph();
Set<String> annoPool = SToken.class.isAssignableFrom(type) ? getAnnotationLevelSet(graph, null, type) : getAnnotationLevelSet(graph, input.getNamespace(), type);
List<String> annos = new LinkedList<>(annoPool);
String annosConfiguration = input.getMappings().getProperty(MAPPING_ANNOS_KEY);
if (annosConfiguration != null && annosConfiguration.trim().length() > 0) {
String[] split = annosConfiguration.split(",");
annos.clear();
for (String s : split) {
s = s.trim();
// is regular expression?
if (s.startsWith("/") && s.endsWith("/")) {
// go over all remaining items in our pool of all annotations and
// check if they match
Pattern regex = Pattern.compile(StringUtils.strip(s, "/"));
LinkedList<String> matchingAnnos = new LinkedList<>();
for (String a : annoPool) {
if (regex.matcher(a).matches()) {
matchingAnnos.add(a);
}
}
annos.addAll(matchingAnnos);
annoPool.removeAll(matchingAnnos);
} else {
annos.add(s);
annoPool.remove(s);
}
}
}
// filter already found annotation names by regular expression
// if this was given as mapping
String regexFilterRaw = input.getMappings().getProperty(MAPPING_ANNO_REGEX_KEY);
if (regexFilterRaw != null) {
try {
Pattern regexFilter = Pattern.compile(regexFilterRaw);
ListIterator<String> itAnnos = annos.listIterator();
while (itAnnos.hasNext()) {
String a = itAnnos.next();
// remove entry if not matching
if (!regexFilter.matcher(a).matches()) {
itAnnos.remove();
}
}
} catch (PatternSyntaxException ex) {
log.warn("invalid regular expression in mapping for grid visualizer", ex);
}
}
return annos;
}
use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class RSTImpl method getText.
/**
* Gets the overlapping token as string from a node, which are direct
* dominated by this node.
*
* @param currNode
* @return is null, if there is no relation to a token, or there is more then
* one STEXT is overlapped by this node
*/
private String getText(SToken currNode) {
List<DataSourceSequence> sSequences = ((SDocumentGraph) currNode.getGraph()).getOverlappedDataSourceSequence(currNode, SALT_TYPE.STEXT_OVERLAPPING_RELATION);
// only support one text for spanns
if (sSequences == null || sSequences.size() != 1) {
log.error("rst supports only one text and only text level");
return null;
}
log.debug("sSequences {}", sSequences.toString());
/**
* Check if it is a text data structure. As described in the salt manual in
* chapter "5.8 More specific nodes and relations" the start and end point
* of a range of token is stored in superordinate node of type SSequentialDS
*/
if (sSequences.get(0).getDataSource() instanceof STextualDS) {
STextualDS text = ((STextualDS) sSequences.get(0).getDataSource());
int start = sSequences.get(0).getStart().intValue();
int end = sSequences.get(0).getEnd().intValue();
return text.getText().substring(start, end);
}
// something fundamentally goes wrong
log.error("{} instead of {}", sSequences.get(0).getDataSource().getClass().getName(), STextualDS.class.getName());
return null;
}
use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class SaltAnnotateExtractorTest method testLayerNodes.
@Test
public void testLayerNodes() throws SQLException {
SaltProject project = instance.extractData(resultSetProviderSingleText.getResultSet());
assertNotNull(project);
SDocumentGraph g = project.getCorpusGraphs().get(0).getDocuments().get(0).getDocumentGraph();
List<SNode> n = new ArrayList<>(g.getLayerByName("exmaralda").get(0).getNodes());
Collections.sort(n, new NameComparator());
assertEquals(9, n.size());
assertEquals("Focus_newInfSeg_10", n.get(0).getName());
assertEquals("Focus_newInfSeg_9", n.get(1).getName());
assertEquals("Inf-StatSeg_29", n.get(2).getName());
assertEquals("Inf-StatSeg_30", n.get(3).getName());
assertEquals("NPSeg_29", n.get(4).getName());
assertEquals("NPSeg_30", n.get(5).getName());
assertEquals("PPSeg_7", n.get(6).getName());
assertEquals("SentSeg_10", n.get(7).getName());
assertEquals("SentSeg_9", n.get(8).getName());
n = new ArrayList<>(g.getLayerByName("mmax").get(0).getNodes());
Collections.sort(n, new NameComparator());
assertEquals(5, n.size());
assertEquals("primmarkSeg_1000154", n.get(0).getName());
assertEquals("primmarkSeg_60", n.get(1).getName());
assertEquals("sentenceSeg_50010", n.get(2).getName());
assertEquals("sentenceSeg_50011", n.get(3).getName());
assertEquals("sentenceSeg_5009", n.get(4).getName());
n = new ArrayList<>(g.getLayerByName("tiger").get(0).getNodes());
Collections.sort(n, new NameComparator());
assertEquals(10, n.size());
assertEquals("const_50", n.get(0).getName());
assertEquals("const_52", n.get(1).getName());
assertEquals("const_54", n.get(2).getName());
assertEquals("const_55", n.get(3).getName());
assertEquals("const_56", n.get(4).getName());
assertEquals("const_57", n.get(5).getName());
assertEquals("const_58", n.get(6).getName());
assertEquals("const_59", n.get(7).getName());
assertEquals("const_60", n.get(8).getName());
assertEquals("const_61", n.get(9).getName());
n = new ArrayList<>(g.getLayerByName("default_ns").get(0).getNodes());
Collections.sort(n, new NameComparator());
assertEquals(12, n.size());
assertEquals("tok_150", n.get(0).getName());
assertEquals("tok_151", n.get(1).getName());
assertEquals("tok_152", n.get(2).getName());
assertEquals("tok_153", n.get(3).getName());
assertEquals("tok_154", n.get(4).getName());
assertEquals("tok_155", n.get(5).getName());
assertEquals("tok_156", n.get(6).getName());
assertEquals("tok_157", n.get(7).getName());
assertEquals("tok_158", n.get(8).getName());
assertEquals("tok_159", n.get(9).getName());
assertEquals("tok_160", n.get(10).getName());
assertEquals("tok_161", n.get(11).getName());
n = new ArrayList<>(g.getLayerByName("rst").get(0).getNodes());
Collections.sort(n, new NameComparator());
assertEquals(9, n.size());
assertEquals("u0", n.get(0).getName());
assertEquals("u10", n.get(1).getName());
assertEquals("u11", n.get(2).getName());
assertEquals("u12", n.get(3).getName());
assertEquals("u20", n.get(4).getName());
assertEquals("u23", n.get(5).getName());
assertEquals("u24", n.get(6).getName());
assertEquals("u27", n.get(7).getName());
assertEquals("u28", n.get(8).getName());
assertEquals(0, g.getLayerByName("dep").get(0).getNodes().size());
}
use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class SaltAnnotateExtractorTest method testMultipleTextGeneration.
@Test
public void testMultipleTextGeneration() throws SQLException {
SaltProject project = instance.extractData(resultSetProviderMultiText.getResultSet());
assertNotNull(project);
SDocumentGraph g = project.getCorpusGraphs().get(0).getDocuments().get(0).getDocumentGraph();
assertEquals(3, g.getTextualDSs().size());
}
use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class LegacyGraphConverter method convertToAnnotationGraph.
public static AnnotationGraph convertToAnnotationGraph(SDocument document) {
SDocumentGraph docGraph = document.getDocumentGraph();
SFeature featMatchedIDs = docGraph.getFeature(ANNIS_NS, FEAT_MATCHEDIDS);
Match match = new Match();
if (featMatchedIDs != null && featMatchedIDs.getValue_STEXT() != null) {
match = Match.parseFromString(featMatchedIDs.getValue_STEXT(), ',');
}
// get matched node names by using the IDs
List<Long> matchedNodeIDs = new ArrayList<>();
for (URI u : match.getSaltIDs()) {
SNode node = docGraph.getNode(u.toASCIIString());
if (node == null) {
// that's weird, fallback to the id
log.warn("Could not get matched node from id {}", u.toASCIIString());
matchedNodeIDs.add(-1l);
} else {
RelannisNodeFeature relANNISFeat = (RelannisNodeFeature) node.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE)).getValue();
matchedNodeIDs.add(relANNISFeat.getInternalID());
}
}
AnnotationGraph result = convertToAnnotationGraph(docGraph, matchedNodeIDs);
return result;
}
Aggregations