use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class VisJsComponent method computeDisplayedRelAnnotations.
/**
* Returns the annotations to display according to the mappings configuration.
*
* This will check the "relation" parameter for determining the annotations to display.
* It also iterates over all nodes of the graph
* matching the type.
*
* @param input The input for the visualizer.
* @param type Which type of relations to include
* @return
*/
private static List<String> computeDisplayedRelAnnotations(VisualizerInput input, String relAnnosConfiguration, Class<? extends SRelation> type) {
if (input == null) {
return new LinkedList<>();
}
SDocumentGraph graph = input.getDocument().getDocumentGraph();
Set<String> annotationPool = getRelationLevelSet(graph, null, type);
List<String> confAnnotations = new LinkedList<>(annotationPool);
if (relAnnosConfiguration != null && relAnnosConfiguration.trim().length() > 0) {
String[] confSplit = relAnnosConfiguration.split(",");
confAnnotations.clear();
for (String entry : confSplit) {
entry = entry.trim();
// is regular expression?
if (entry.startsWith("/") && entry.endsWith("/")) {
// go over all remaining items in our pool of all annotations and
// check if they match
Pattern regex = Pattern.compile(StringUtils.strip(entry, "/"));
LinkedList<String> matchingAnnotations = new LinkedList<>();
for (String anno : annotationPool) {
if (regex.matcher(anno).matches()) {
matchingAnnotations.add(anno);
}
}
confAnnotations.addAll(matchingAnnotations);
annotationPool.removeAll(matchingAnnotations);
} else {
confAnnotations.add(entry);
annotationPool.remove(entry);
}
}
}
return confAnnotations;
}
use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class SaltAnnotateExtractorTest method testLayerNames.
@Test
public void testLayerNames() throws SQLException {
SaltProject project = instance.extractData(resultSetProviderSingleText.getResultSet());
assertNotNull(project);
SDocumentGraph g = project.getCorpusGraphs().get(0).getDocuments().get(0).getDocumentGraph();
List<SLayer> layers = new ArrayList<>(g.getLayers());
Collections.sort(layers, new NameComparator());
assertEquals(6, layers.size());
assertEquals("default_ns", layers.get(0).getName());
assertEquals("dep", layers.get(1).getName());
assertEquals("exmaralda", layers.get(2).getName());
assertEquals("mmax", layers.get(3).getName());
assertEquals("rst", layers.get(4).getName());
assertEquals("tiger", layers.get(5).getName());
}
use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class SaltAnnotateExtractorTest method testLayerRelations.
@Test
public void testLayerRelations() throws SQLException {
SaltProject project = instance.extractData(resultSetProviderSingleText.getResultSet());
assertNotNull(project);
SDocumentGraph g = project.getCorpusGraphs().get(0).getDocuments().get(0).getDocumentGraph();
// dep //
List<SRelation<SNode, SNode>> e = new ArrayList<>(g.getLayerByName("dep").get(0).getRelations());
Collections.sort(e, new EdgeComparator());
assertEquals(9, e.size());
assertEquals("tok_150", e.get(0).getSource().getName());
assertEquals("tok_151", e.get(0).getTarget().getName());
assertEquals("tok_152", e.get(1).getSource().getName());
assertEquals("tok_153", e.get(1).getTarget().getName());
assertEquals("tok_156", e.get(2).getSource().getName());
assertEquals("tok_154", e.get(2).getTarget().getName());
assertEquals("tok_156", e.get(3).getSource().getName());
assertEquals("tok_155", e.get(3).getTarget().getName());
assertEquals("tok_156", e.get(4).getSource().getName());
assertEquals("tok_157", e.get(4).getTarget().getName());
assertEquals("tok_157", e.get(5).getSource().getName());
assertEquals("tok_158", e.get(5).getTarget().getName());
assertEquals("tok_158", e.get(6).getSource().getName());
assertEquals("tok_160", e.get(6).getTarget().getName());
assertEquals("tok_160", e.get(7).getSource().getName());
assertEquals("tok_159", e.get(7).getTarget().getName());
assertEquals("tok_160", e.get(8).getSource().getName());
assertEquals("tok_161", e.get(8).getTarget().getName());
// exmaralda //
e = new ArrayList<>(g.getLayerByName("exmaralda").get(0).getRelations());
Collections.sort(e, new EdgeComparator());
assertEquals(30, e.size());
assertEquals("Focus_newInfSeg_10", e.get(0).getSource().getName());
assertEquals("tok_154", e.get(0).getTarget().getName());
assertEquals("Focus_newInfSeg_10", e.get(1).getSource().getName());
assertEquals("tok_155", e.get(1).getTarget().getName());
assertEquals("Focus_newInfSeg_10", e.get(2).getSource().getName());
assertEquals("tok_156", e.get(2).getTarget().getName());
assertEquals("Focus_newInfSeg_10", e.get(3).getSource().getName());
assertEquals("tok_157", e.get(3).getTarget().getName());
assertEquals("Focus_newInfSeg_10", e.get(4).getSource().getName());
assertEquals("tok_158", e.get(4).getTarget().getName());
assertEquals("Focus_newInfSeg_10", e.get(5).getSource().getName());
assertEquals("tok_159", e.get(5).getTarget().getName());
assertEquals("Focus_newInfSeg_10", e.get(6).getSource().getName());
assertEquals("tok_160", e.get(6).getTarget().getName());
assertEquals("Focus_newInfSeg_9", e.get(7).getSource().getName());
assertEquals("tok_150", e.get(7).getTarget().getName());
assertEquals("Focus_newInfSeg_9", e.get(8).getSource().getName());
assertEquals("tok_151", e.get(8).getTarget().getName());
assertEquals("Focus_newInfSeg_9", e.get(9).getSource().getName());
assertEquals("tok_152", e.get(9).getTarget().getName());
assertEquals("Inf-StatSeg_29", e.get(10).getSource().getName());
assertEquals("tok_150", e.get(10).getTarget().getName());
assertEquals("Inf-StatSeg_29", e.get(11).getSource().getName());
assertEquals("tok_151", e.get(11).getTarget().getName());
assertEquals("Inf-StatSeg_30", e.get(12).getSource().getName());
assertEquals("tok_155", e.get(12).getTarget().getName());
assertEquals("NPSeg_29", e.get(13).getSource().getName());
assertEquals("tok_150", e.get(13).getTarget().getName());
assertEquals("NPSeg_29", e.get(14).getSource().getName());
assertEquals("tok_151", e.get(14).getTarget().getName());
assertEquals("NPSeg_30", e.get(15).getSource().getName());
assertEquals("tok_155", e.get(15).getTarget().getName());
assertEquals("PPSeg_7", e.get(16).getSource().getName());
assertEquals("tok_150", e.get(16).getTarget().getName());
assertEquals("PPSeg_7", e.get(17).getSource().getName());
assertEquals("tok_151", e.get(17).getTarget().getName());
assertEquals("SentSeg_10", e.get(18).getSource().getName());
assertEquals("tok_154", e.get(18).getTarget().getName());
assertEquals("SentSeg_10", e.get(19).getSource().getName());
assertEquals("tok_155", e.get(19).getTarget().getName());
assertEquals("SentSeg_10", e.get(20).getSource().getName());
assertEquals("tok_156", e.get(20).getTarget().getName());
assertEquals("SentSeg_10", e.get(21).getSource().getName());
assertEquals("tok_157", e.get(21).getTarget().getName());
assertEquals("SentSeg_10", e.get(22).getSource().getName());
assertEquals("tok_158", e.get(22).getTarget().getName());
assertEquals("SentSeg_10", e.get(23).getSource().getName());
assertEquals("tok_159", e.get(23).getTarget().getName());
assertEquals("SentSeg_10", e.get(24).getSource().getName());
assertEquals("tok_160", e.get(24).getTarget().getName());
assertEquals("SentSeg_10", e.get(25).getSource().getName());
assertEquals("tok_161", e.get(25).getTarget().getName());
assertEquals("SentSeg_9", e.get(26).getSource().getName());
assertEquals("tok_150", e.get(26).getTarget().getName());
assertEquals("SentSeg_9", e.get(27).getSource().getName());
assertEquals("tok_151", e.get(27).getTarget().getName());
assertEquals("SentSeg_9", e.get(28).getSource().getName());
assertEquals("tok_152", e.get(28).getTarget().getName());
assertEquals("SentSeg_9", e.get(29).getSource().getName());
assertEquals("tok_153", e.get(29).getTarget().getName());
// mmax, only control samples //
e = new ArrayList<>(g.getLayerByName("mmax").get(0).getRelations());
Collections.sort(e, new EdgeComparator());
assertEquals(14, e.size());
assertEquals("primmarkSeg_60", e.get(1).getSource().getName());
assertEquals("tok_150", e.get(1).getTarget().getName());
assertEquals("sentenceSeg_50010", e.get(7).getSource().getName());
assertEquals("tok_158", e.get(7).getTarget().getName());
// tiger, only control samples //
e = new ArrayList<>(g.getLayerByName("tiger").get(0).getRelations());
Collections.sort(e, new EdgeComparator());
assertEquals(17, e.size());
assertEquals("const_59", e.get(9).getSource().getName());
assertEquals("tok_160", e.get(9).getTarget().getName());
assertEquals("const_61", e.get(16).getSource().getName());
assertEquals("tok_156", e.get(16).getTarget().getName());
// urml, only control samples //
e = new ArrayList<>(g.getLayerByName("rst").get(0).getRelations());
Collections.sort(e, new EdgeComparator());
assertEquals(20, e.size());
assertEquals("u0", e.get(0).getSource().getName());
assertEquals("u28", e.get(0).getTarget().getName());
assertEquals("u11", e.get(5).getSource().getName());
assertEquals("tok_153", e.get(5).getTarget().getName());
}
use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class SaltAnnotateExtractorTest method testRelationType.
@Test
public void testRelationType() throws SQLException {
SaltProject project = instance.extractData(resultSetProviderSingleText.getResultSet());
assertNotNull(project);
SDocumentGraph g = project.getCorpusGraphs().get(0).getDocuments().get(0).getDocumentGraph();
for (SRelation<? extends SNode, ? extends SNode> r : g.getRelations()) {
if (!(r instanceof STextualRelation)) {
assertEquals(1, r.getLayers().size());
String layerName = r.getLayers().iterator().next().getName();
if ("exmaralda".equals(layerName) || "urml".equals(layerName) || "mmax".equals(layerName)) {
assertTrue("instance of SSpanningRelation", r instanceof SSpanningRelation);
} else if ("dep".equals(layerName)) {
assertTrue("instance of SPointingRelation", r instanceof SPointingRelation);
} else if ("tiger".equals(layerName)) {
assertTrue("instance of SDominanceRelation", r instanceof SDominanceRelation);
}
}
}
}
use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class SaltAnnotateExtractor method extractData.
@Override
public SaltProject extractData(ResultSet resultSet) throws SQLException, DataAccessException {
SaltProject project = SaltFactory.createSaltProject();
try {
SCorpusGraph corpusGraph = null;
SDocumentGraph graph = null;
// fn: parent information (pre and component) id to node
FastInverseMap<Long, SNode> nodeByRankID = new FastInverseMap<>();
TreeSet<Long> allTextIDs = new TreeSet<>();
TreeMap<Long, String> tokenTexts = new TreeMap<>();
TreeMap<Long, SToken> tokenByIndex = new TreeMap<>();
TreeMap<String, TreeMap<Long, String>> nodeBySegmentationPath = new TreeMap<>();
Map<String, ComponentEntry> componentForSpan = new HashMap<>();
// clear mapping functions for this graph
// assumes that the result set is sorted by key, pre
nodeByRankID.clear();
SDocument document = null;
AtomicInteger numberOfRelations = new AtomicInteger();
int match_index = 0;
SolutionKey<?> key = createSolutionKey();
int counter = 0;
while (resultSet.next()) {
if (counter % 1000 == 0) {
log.debug("handling resultset row {}", counter);
}
counter++;
// List<String> annotationGraphKey =
key.retrieveKey(resultSet);
if (key.isNewKey()) {
// create the text for the last graph
if (graph != null && document != null) {
createMissingSpanningRelations(graph, nodeByRankID, tokenByIndex, componentForSpan, numberOfRelations);
createPrimaryTexts(graph, allTextIDs, tokenTexts, tokenByIndex);
addOrderingRelations(graph, nodeBySegmentationPath);
}
// new match, reset everything
nodeByRankID.clear();
tokenTexts.clear();
tokenByIndex.clear();
componentForSpan.clear();
Integer matchstart = resultSet.getInt("matchstart");
corpusGraph = SaltFactory.createSCorpusGraph();
corpusGraph.setName("match_" + (match_index + matchstart));
project.addCorpusGraph(corpusGraph);
graph = SaltFactory.createSDocumentGraph();
document = SaltFactory.createSDocument();
document.setDocumentGraphLocation(org.eclipse.emf.common.util.URI.createFileURI(Files.createTempDir().getAbsolutePath()));
List<String> path = corpusPathExtractor.extractCorpusPath(resultSet, "path");
SCorpus toplevelCorpus = SaltFactory.createSCorpus();
toplevelCorpus.setName(path.get(0));
corpusGraph.addNode(toplevelCorpus);
Validate.isTrue(path.size() >= 2, "Corpus path must be have at least two members (toplevel and document)");
SCorpus corpus = toplevelCorpus;
for (int i = 1; i < path.size() - 1; i++) {
SCorpus subcorpus = SaltFactory.createSCorpus();
subcorpus.setName(path.get(i));
corpusGraph.addSubCorpus(corpus, subcorpus);
corpus = subcorpus;
}
document.setName(path.get(path.size() - 1));
document.setId("" + match_index);
corpusGraph.addDocument(corpus, document);
document.setDocumentGraph(graph);
match_index++;
}
// end if new key
// get node data
SNode node = createOrFindNewNode(resultSet, graph, allTextIDs, tokenTexts, tokenByIndex, nodeBySegmentationPath, key, nodeByRankID);
long rankID = longValue(resultSet, RANK_TABLE, "id");
long componentID = longValue(resultSet, COMPONENT_TABLE, "id");
if (!resultSet.wasNull()) {
nodeByRankID.put(rankID, node);
createRelation(resultSet, graph, nodeByRankID, node, numberOfRelations);
if (node instanceof SSpan) {
componentForSpan.put(node.getId(), new ComponentEntry(componentID, 'c', stringValue(resultSet, COMPONENT_TABLE, "namespace"), stringValue(resultSet, COMPONENT_TABLE, "name")));
}
}
}
// the last match needs a primary text, too
if (graph != null) {
createMissingSpanningRelations(graph, nodeByRankID, tokenByIndex, componentForSpan, numberOfRelations);
createPrimaryTexts(graph, allTextIDs, tokenTexts, tokenByIndex);
addOrderingRelations(graph, nodeBySegmentationPath);
}
} catch (Exception ex) {
log.error("could not map result set to SaltProject", ex);
}
return project;
}
Aggregations