use of org.corpus_tools.salt.common.SCorpusGraph in project ANNIS by korpling.
the class SaltAnnotateExtractorTest method testCorpusGraph.
@Test
public void testCorpusGraph() throws Exception {
SaltProject project = instance.extractData(resultSetProviderSingleText.getResultSet());
assertNotNull(project);
assertEquals(1, project.getCorpusGraphs().size());
SCorpusGraph corpusGraph = project.getCorpusGraphs().get(0);
assertEquals(1, corpusGraph.getCorpora().size());
assertEquals("pcc2", corpusGraph.getCorpora().get(0).getName());
assertEquals(1, corpusGraph.getDocuments().size());
assertEquals("4282", corpusGraph.getDocuments().get(0).getName());
}
use of org.corpus_tools.salt.common.SCorpusGraph in project ANNIS by korpling.
the class SaltAnnotateExtractor method extractData.
@Override
public SaltProject extractData(ResultSet resultSet) throws SQLException, DataAccessException {
SaltProject project = SaltFactory.createSaltProject();
try {
SCorpusGraph corpusGraph = null;
SDocumentGraph graph = null;
// fn: parent information (pre and component) id to node
FastInverseMap<Long, SNode> nodeByRankID = new FastInverseMap<>();
TreeSet<Long> allTextIDs = new TreeSet<>();
TreeMap<Long, String> tokenTexts = new TreeMap<>();
TreeMap<Long, SToken> tokenByIndex = new TreeMap<>();
TreeMap<String, TreeMap<Long, String>> nodeBySegmentationPath = new TreeMap<>();
Map<String, ComponentEntry> componentForSpan = new HashMap<>();
// clear mapping functions for this graph
// assumes that the result set is sorted by key, pre
nodeByRankID.clear();
SDocument document = null;
AtomicInteger numberOfRelations = new AtomicInteger();
int match_index = 0;
SolutionKey<?> key = createSolutionKey();
int counter = 0;
while (resultSet.next()) {
if (counter % 1000 == 0) {
log.debug("handling resultset row {}", counter);
}
counter++;
// List<String> annotationGraphKey =
key.retrieveKey(resultSet);
if (key.isNewKey()) {
// create the text for the last graph
if (graph != null && document != null) {
createMissingSpanningRelations(graph, nodeByRankID, tokenByIndex, componentForSpan, numberOfRelations);
createPrimaryTexts(graph, allTextIDs, tokenTexts, tokenByIndex);
addOrderingRelations(graph, nodeBySegmentationPath);
}
// new match, reset everything
nodeByRankID.clear();
tokenTexts.clear();
tokenByIndex.clear();
componentForSpan.clear();
Integer matchstart = resultSet.getInt("matchstart");
corpusGraph = SaltFactory.createSCorpusGraph();
corpusGraph.setName("match_" + (match_index + matchstart));
project.addCorpusGraph(corpusGraph);
graph = SaltFactory.createSDocumentGraph();
document = SaltFactory.createSDocument();
document.setDocumentGraphLocation(org.eclipse.emf.common.util.URI.createFileURI(Files.createTempDir().getAbsolutePath()));
List<String> path = corpusPathExtractor.extractCorpusPath(resultSet, "path");
SCorpus toplevelCorpus = SaltFactory.createSCorpus();
toplevelCorpus.setName(path.get(0));
corpusGraph.addNode(toplevelCorpus);
Validate.isTrue(path.size() >= 2, "Corpus path must be have at least two members (toplevel and document)");
SCorpus corpus = toplevelCorpus;
for (int i = 1; i < path.size() - 1; i++) {
SCorpus subcorpus = SaltFactory.createSCorpus();
subcorpus.setName(path.get(i));
corpusGraph.addSubCorpus(corpus, subcorpus);
corpus = subcorpus;
}
document.setName(path.get(path.size() - 1));
document.setId("" + match_index);
corpusGraph.addDocument(corpus, document);
document.setDocumentGraph(graph);
match_index++;
}
// end if new key
// get node data
SNode node = createOrFindNewNode(resultSet, graph, allTextIDs, tokenTexts, tokenByIndex, nodeBySegmentationPath, key, nodeByRankID);
long rankID = longValue(resultSet, RANK_TABLE, "id");
long componentID = longValue(resultSet, COMPONENT_TABLE, "id");
if (!resultSet.wasNull()) {
nodeByRankID.put(rankID, node);
createRelation(resultSet, graph, nodeByRankID, node, numberOfRelations);
if (node instanceof SSpan) {
componentForSpan.put(node.getId(), new ComponentEntry(componentID, 'c', stringValue(resultSet, COMPONENT_TABLE, "namespace"), stringValue(resultSet, COMPONENT_TABLE, "name")));
}
}
}
// the last match needs a primary text, too
if (graph != null) {
createMissingSpanningRelations(graph, nodeByRankID, tokenByIndex, componentForSpan, numberOfRelations);
createPrimaryTexts(graph, allTextIDs, tokenTexts, tokenByIndex);
addOrderingRelations(graph, nodeBySegmentationPath);
}
} catch (Exception ex) {
log.error("could not map result set to SaltProject", ex);
}
return project;
}
use of org.corpus_tools.salt.common.SCorpusGraph in project ANNIS by korpling.
the class AutoTokQuery method analyzingQuery.
@Override
public void analyzingQuery(SaltProject saltProject) {
List<SToken> tokens = new ArrayList<>();
for (SCorpusGraph g : saltProject.getCorpusGraphs()) {
if (g != null) {
for (SDocument doc : g.getDocuments()) {
SDocumentGraph docGraph = doc.getDocumentGraph();
List<SNode> sNodes = docGraph.getNodes();
if (sNodes != null) {
for (SNode n : sNodes) {
if (n instanceof SToken) {
tokens.add((SToken) n);
}
}
}
}
}
}
// select one random token from the result
if (!tokens.isEmpty()) {
int tries = 10;
int r = new Random().nextInt(tokens.size() - 1);
String text = CommonHelper.getSpannedText(tokens.get(r));
while ("".equals(text) && tries > 0) {
r = new Random().nextInt(tokens.size() - 1);
text = CommonHelper.getSpannedText(tokens.get(r));
tries--;
}
if ("".equals(text)) {
finalAql = null;
} else {
finalAql = "\"" + text + "\"";
}
}
}
use of org.corpus_tools.salt.common.SCorpusGraph in project ANNIS by korpling.
the class ResultViewPanel method createPanels.
private List<SingleResultPanel> createPanels(SaltProject p, int localMatchIndex, long globalOffset) {
List<SingleResultPanel> result = new LinkedList<>();
int i = 0;
for (SCorpusGraph corpusGraph : p.getCorpusGraphs()) {
SDocument doc = corpusGraph.getDocuments().get(0);
Match m = new Match();
if (allMatches != null && localMatchIndex >= 0 && localMatchIndex < allMatches.size()) {
m = allMatches.get(localMatchIndex);
}
SingleResultPanel panel = new SingleResultPanel(doc, m, i + globalOffset, new ResolverProviderImpl(cacheResolver), ps, sui, getVisibleTokenAnnos(), segmentationName, controller, instanceConfig, initialQuery);
i++;
panel.setWidth("100%");
panel.setHeight("-1px");
result.add(panel);
}
return result;
}
use of org.corpus_tools.salt.common.SCorpusGraph in project ANNIS by korpling.
the class CommonHelper method getTokenAnnotationLevelSet.
public static Set<String> getTokenAnnotationLevelSet(SaltProject p) {
Set<String> result = new TreeSet<String>();
for (SCorpusGraph corpusGraphs : p.getCorpusGraphs()) {
for (SDocument doc : corpusGraphs.getDocuments()) {
SDocumentGraph g = doc.getDocumentGraph();
result.addAll(getTokenAnnotationLevelSet(g));
}
}
return result;
}
Aggregations