Search in sources :

Example 21 with SaltProject

use of org.corpus_tools.salt.common.SaltProject in project ANNIS by korpling.

the class SaltAnnotateExtractorTest method testLayerRelations.

@Test
public void testLayerRelations() throws SQLException {
    SaltProject project = instance.extractData(resultSetProviderSingleText.getResultSet());
    assertNotNull(project);
    SDocumentGraph g = project.getCorpusGraphs().get(0).getDocuments().get(0).getDocumentGraph();
    // dep //
    List<SRelation<SNode, SNode>> e = new ArrayList<>(g.getLayerByName("dep").get(0).getRelations());
    Collections.sort(e, new EdgeComparator());
    assertEquals(9, e.size());
    assertEquals("tok_150", e.get(0).getSource().getName());
    assertEquals("tok_151", e.get(0).getTarget().getName());
    assertEquals("tok_152", e.get(1).getSource().getName());
    assertEquals("tok_153", e.get(1).getTarget().getName());
    assertEquals("tok_156", e.get(2).getSource().getName());
    assertEquals("tok_154", e.get(2).getTarget().getName());
    assertEquals("tok_156", e.get(3).getSource().getName());
    assertEquals("tok_155", e.get(3).getTarget().getName());
    assertEquals("tok_156", e.get(4).getSource().getName());
    assertEquals("tok_157", e.get(4).getTarget().getName());
    assertEquals("tok_157", e.get(5).getSource().getName());
    assertEquals("tok_158", e.get(5).getTarget().getName());
    assertEquals("tok_158", e.get(6).getSource().getName());
    assertEquals("tok_160", e.get(6).getTarget().getName());
    assertEquals("tok_160", e.get(7).getSource().getName());
    assertEquals("tok_159", e.get(7).getTarget().getName());
    assertEquals("tok_160", e.get(8).getSource().getName());
    assertEquals("tok_161", e.get(8).getTarget().getName());
    // exmaralda //
    e = new ArrayList<>(g.getLayerByName("exmaralda").get(0).getRelations());
    Collections.sort(e, new EdgeComparator());
    assertEquals(30, e.size());
    assertEquals("Focus_newInfSeg_10", e.get(0).getSource().getName());
    assertEquals("tok_154", e.get(0).getTarget().getName());
    assertEquals("Focus_newInfSeg_10", e.get(1).getSource().getName());
    assertEquals("tok_155", e.get(1).getTarget().getName());
    assertEquals("Focus_newInfSeg_10", e.get(2).getSource().getName());
    assertEquals("tok_156", e.get(2).getTarget().getName());
    assertEquals("Focus_newInfSeg_10", e.get(3).getSource().getName());
    assertEquals("tok_157", e.get(3).getTarget().getName());
    assertEquals("Focus_newInfSeg_10", e.get(4).getSource().getName());
    assertEquals("tok_158", e.get(4).getTarget().getName());
    assertEquals("Focus_newInfSeg_10", e.get(5).getSource().getName());
    assertEquals("tok_159", e.get(5).getTarget().getName());
    assertEquals("Focus_newInfSeg_10", e.get(6).getSource().getName());
    assertEquals("tok_160", e.get(6).getTarget().getName());
    assertEquals("Focus_newInfSeg_9", e.get(7).getSource().getName());
    assertEquals("tok_150", e.get(7).getTarget().getName());
    assertEquals("Focus_newInfSeg_9", e.get(8).getSource().getName());
    assertEquals("tok_151", e.get(8).getTarget().getName());
    assertEquals("Focus_newInfSeg_9", e.get(9).getSource().getName());
    assertEquals("tok_152", e.get(9).getTarget().getName());
    assertEquals("Inf-StatSeg_29", e.get(10).getSource().getName());
    assertEquals("tok_150", e.get(10).getTarget().getName());
    assertEquals("Inf-StatSeg_29", e.get(11).getSource().getName());
    assertEquals("tok_151", e.get(11).getTarget().getName());
    assertEquals("Inf-StatSeg_30", e.get(12).getSource().getName());
    assertEquals("tok_155", e.get(12).getTarget().getName());
    assertEquals("NPSeg_29", e.get(13).getSource().getName());
    assertEquals("tok_150", e.get(13).getTarget().getName());
    assertEquals("NPSeg_29", e.get(14).getSource().getName());
    assertEquals("tok_151", e.get(14).getTarget().getName());
    assertEquals("NPSeg_30", e.get(15).getSource().getName());
    assertEquals("tok_155", e.get(15).getTarget().getName());
    assertEquals("PPSeg_7", e.get(16).getSource().getName());
    assertEquals("tok_150", e.get(16).getTarget().getName());
    assertEquals("PPSeg_7", e.get(17).getSource().getName());
    assertEquals("tok_151", e.get(17).getTarget().getName());
    assertEquals("SentSeg_10", e.get(18).getSource().getName());
    assertEquals("tok_154", e.get(18).getTarget().getName());
    assertEquals("SentSeg_10", e.get(19).getSource().getName());
    assertEquals("tok_155", e.get(19).getTarget().getName());
    assertEquals("SentSeg_10", e.get(20).getSource().getName());
    assertEquals("tok_156", e.get(20).getTarget().getName());
    assertEquals("SentSeg_10", e.get(21).getSource().getName());
    assertEquals("tok_157", e.get(21).getTarget().getName());
    assertEquals("SentSeg_10", e.get(22).getSource().getName());
    assertEquals("tok_158", e.get(22).getTarget().getName());
    assertEquals("SentSeg_10", e.get(23).getSource().getName());
    assertEquals("tok_159", e.get(23).getTarget().getName());
    assertEquals("SentSeg_10", e.get(24).getSource().getName());
    assertEquals("tok_160", e.get(24).getTarget().getName());
    assertEquals("SentSeg_10", e.get(25).getSource().getName());
    assertEquals("tok_161", e.get(25).getTarget().getName());
    assertEquals("SentSeg_9", e.get(26).getSource().getName());
    assertEquals("tok_150", e.get(26).getTarget().getName());
    assertEquals("SentSeg_9", e.get(27).getSource().getName());
    assertEquals("tok_151", e.get(27).getTarget().getName());
    assertEquals("SentSeg_9", e.get(28).getSource().getName());
    assertEquals("tok_152", e.get(28).getTarget().getName());
    assertEquals("SentSeg_9", e.get(29).getSource().getName());
    assertEquals("tok_153", e.get(29).getTarget().getName());
    // mmax, only control samples //
    e = new ArrayList<>(g.getLayerByName("mmax").get(0).getRelations());
    Collections.sort(e, new EdgeComparator());
    assertEquals(14, e.size());
    assertEquals("primmarkSeg_60", e.get(1).getSource().getName());
    assertEquals("tok_150", e.get(1).getTarget().getName());
    assertEquals("sentenceSeg_50010", e.get(7).getSource().getName());
    assertEquals("tok_158", e.get(7).getTarget().getName());
    // tiger, only control samples //
    e = new ArrayList<>(g.getLayerByName("tiger").get(0).getRelations());
    Collections.sort(e, new EdgeComparator());
    assertEquals(17, e.size());
    assertEquals("const_59", e.get(9).getSource().getName());
    assertEquals("tok_160", e.get(9).getTarget().getName());
    assertEquals("const_61", e.get(16).getSource().getName());
    assertEquals("tok_156", e.get(16).getTarget().getName());
    // urml, only control samples //
    e = new ArrayList<>(g.getLayerByName("rst").get(0).getRelations());
    Collections.sort(e, new EdgeComparator());
    assertEquals(20, e.size());
    assertEquals("u0", e.get(0).getSource().getName());
    assertEquals("u28", e.get(0).getTarget().getName());
    assertEquals("u11", e.get(5).getSource().getName());
    assertEquals("tok_153", e.get(5).getTarget().getName());
}
Also used : SRelation(org.corpus_tools.salt.core.SRelation) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) ArrayList(java.util.ArrayList) SaltProject(org.corpus_tools.salt.common.SaltProject) Test(org.junit.Test)

Example 22 with SaltProject

use of org.corpus_tools.salt.common.SaltProject in project ANNIS by korpling.

the class SaltAnnotateExtractorTest method testRelationType.

@Test
public void testRelationType() throws SQLException {
    SaltProject project = instance.extractData(resultSetProviderSingleText.getResultSet());
    assertNotNull(project);
    SDocumentGraph g = project.getCorpusGraphs().get(0).getDocuments().get(0).getDocumentGraph();
    for (SRelation<? extends SNode, ? extends SNode> r : g.getRelations()) {
        if (!(r instanceof STextualRelation)) {
            assertEquals(1, r.getLayers().size());
            String layerName = r.getLayers().iterator().next().getName();
            if ("exmaralda".equals(layerName) || "urml".equals(layerName) || "mmax".equals(layerName)) {
                assertTrue("instance of SSpanningRelation", r instanceof SSpanningRelation);
            } else if ("dep".equals(layerName)) {
                assertTrue("instance of SPointingRelation", r instanceof SPointingRelation);
            } else if ("tiger".equals(layerName)) {
                assertTrue("instance of SDominanceRelation", r instanceof SDominanceRelation);
            }
        }
    }
}
Also used : SPointingRelation(org.corpus_tools.salt.common.SPointingRelation) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) STextualRelation(org.corpus_tools.salt.common.STextualRelation) SSpanningRelation(org.corpus_tools.salt.common.SSpanningRelation) SaltProject(org.corpus_tools.salt.common.SaltProject) SDominanceRelation(org.corpus_tools.salt.common.SDominanceRelation) Test(org.junit.Test)

Example 23 with SaltProject

use of org.corpus_tools.salt.common.SaltProject in project ANNIS by korpling.

the class SaltAnnotateExtractor method extractData.

@Override
public SaltProject extractData(ResultSet resultSet) throws SQLException, DataAccessException {
    SaltProject project = SaltFactory.createSaltProject();
    try {
        SCorpusGraph corpusGraph = null;
        SDocumentGraph graph = null;
        // fn: parent information (pre and component) id to node
        FastInverseMap<Long, SNode> nodeByRankID = new FastInverseMap<>();
        TreeSet<Long> allTextIDs = new TreeSet<>();
        TreeMap<Long, String> tokenTexts = new TreeMap<>();
        TreeMap<Long, SToken> tokenByIndex = new TreeMap<>();
        TreeMap<String, TreeMap<Long, String>> nodeBySegmentationPath = new TreeMap<>();
        Map<String, ComponentEntry> componentForSpan = new HashMap<>();
        // clear mapping functions for this graph
        // assumes that the result set is sorted by key, pre
        nodeByRankID.clear();
        SDocument document = null;
        AtomicInteger numberOfRelations = new AtomicInteger();
        int match_index = 0;
        SolutionKey<?> key = createSolutionKey();
        int counter = 0;
        while (resultSet.next()) {
            if (counter % 1000 == 0) {
                log.debug("handling resultset row {}", counter);
            }
            counter++;
            // List<String> annotationGraphKey =
            key.retrieveKey(resultSet);
            if (key.isNewKey()) {
                // create the text for the last graph
                if (graph != null && document != null) {
                    createMissingSpanningRelations(graph, nodeByRankID, tokenByIndex, componentForSpan, numberOfRelations);
                    createPrimaryTexts(graph, allTextIDs, tokenTexts, tokenByIndex);
                    addOrderingRelations(graph, nodeBySegmentationPath);
                }
                // new match, reset everything
                nodeByRankID.clear();
                tokenTexts.clear();
                tokenByIndex.clear();
                componentForSpan.clear();
                Integer matchstart = resultSet.getInt("matchstart");
                corpusGraph = SaltFactory.createSCorpusGraph();
                corpusGraph.setName("match_" + (match_index + matchstart));
                project.addCorpusGraph(corpusGraph);
                graph = SaltFactory.createSDocumentGraph();
                document = SaltFactory.createSDocument();
                document.setDocumentGraphLocation(org.eclipse.emf.common.util.URI.createFileURI(Files.createTempDir().getAbsolutePath()));
                List<String> path = corpusPathExtractor.extractCorpusPath(resultSet, "path");
                SCorpus toplevelCorpus = SaltFactory.createSCorpus();
                toplevelCorpus.setName(path.get(0));
                corpusGraph.addNode(toplevelCorpus);
                Validate.isTrue(path.size() >= 2, "Corpus path must be have at least two members (toplevel and document)");
                SCorpus corpus = toplevelCorpus;
                for (int i = 1; i < path.size() - 1; i++) {
                    SCorpus subcorpus = SaltFactory.createSCorpus();
                    subcorpus.setName(path.get(i));
                    corpusGraph.addSubCorpus(corpus, subcorpus);
                    corpus = subcorpus;
                }
                document.setName(path.get(path.size() - 1));
                document.setId("" + match_index);
                corpusGraph.addDocument(corpus, document);
                document.setDocumentGraph(graph);
                match_index++;
            }
            // end if new key
            // get node data
            SNode node = createOrFindNewNode(resultSet, graph, allTextIDs, tokenTexts, tokenByIndex, nodeBySegmentationPath, key, nodeByRankID);
            long rankID = longValue(resultSet, RANK_TABLE, "id");
            long componentID = longValue(resultSet, COMPONENT_TABLE, "id");
            if (!resultSet.wasNull()) {
                nodeByRankID.put(rankID, node);
                createRelation(resultSet, graph, nodeByRankID, node, numberOfRelations);
                if (node instanceof SSpan) {
                    componentForSpan.put(node.getId(), new ComponentEntry(componentID, 'c', stringValue(resultSet, COMPONENT_TABLE, "namespace"), stringValue(resultSet, COMPONENT_TABLE, "name")));
                }
            }
        }
        // the last match needs a primary text, too
        if (graph != null) {
            createMissingSpanningRelations(graph, nodeByRankID, tokenByIndex, componentForSpan, numberOfRelations);
            createPrimaryTexts(graph, allTextIDs, tokenTexts, tokenByIndex);
            addOrderingRelations(graph, nodeBySegmentationPath);
        }
    } catch (Exception ex) {
        log.error("could not map result set to SaltProject", ex);
    }
    return project;
}
Also used : SNode(org.corpus_tools.salt.core.SNode) SSpan(org.corpus_tools.salt.common.SSpan) HashMap(java.util.HashMap) SCorpusGraph(org.corpus_tools.salt.common.SCorpusGraph) SToken(org.corpus_tools.salt.common.SToken) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) TreeSet(java.util.TreeSet) SDocument(org.corpus_tools.salt.common.SDocument) SaltProject(org.corpus_tools.salt.common.SaltProject) TreeMap(java.util.TreeMap) SaltException(org.corpus_tools.salt.exceptions.SaltException) DataAccessException(org.springframework.dao.DataAccessException) SQLException(java.sql.SQLException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SCorpus(org.corpus_tools.salt.common.SCorpus) AtomicInteger(java.util.concurrent.atomic.AtomicInteger)

Example 24 with SaltProject

use of org.corpus_tools.salt.common.SaltProject in project ANNIS by korpling.

the class LegacyGraphConverterTest method testConvertToAOM.

/**
 * Test of convertToAOM method, of class LegacyGraphConverter.
 */
@Test
public void testConvertToAOM() throws SQLException {
    SaltAnnotateExtractor saltExtractor = new SaltAnnotateExtractor() {

        @Override
        protected SolutionKey<?> createSolutionKey() {
            PostgreSqlArraySolutionKey<Long> key = new PostgreSqlArraySolutionKey<>();
            key.setKeyColumnName("key");
            key.setIdColumnName("id");
            return key;
        }
    };
    CorpusPathExtractor corpusPathExtractor = new ArrayCorpusPathExtractor();
    saltExtractor.setCorpusPathExtractor(corpusPathExtractor);
    TestAnnotateSqlGenerator.setupOuterQueryFactsTableColumnAliases(saltExtractor);
    List<Match> matches = new ArrayList<>();
    matches.add(Match.parseFromString("salt:/pcc2/4282/#tok_155 tiger::pos::salt:/pcc2/4282#tok_156"));
    MatchGroup matchGroup = new MatchGroup(matches);
    SaltProject p = saltExtractor.extractData(new CsvResultSetProvider(annis.sqlgen.SaltAnnotateExtractorTest.class.getResourceAsStream("SampleAnnotateResult.csv")).getResultSet());
    SaltAnnotateExtractor.addMatchInformation(p, matchGroup);
    List<AnnotationGraph> expected = aomSqlGen.extractData(new CsvResultSetProvider(annis.sqlgen.SaltAnnotateExtractorTest.class.getResourceAsStream("SampleAnnotateResult.csv")).getResultSet());
    List<AnnotationGraph> result = LegacyGraphConverter.convertToAOM(p);
    assertEquals(expected.size(), result.size());
    Iterator<AnnotationGraph> itGraphExpected = expected.iterator();
    Iterator<AnnotationGraph> itGraphResult = result.iterator();
    while (itGraphExpected.hasNext() && itGraphResult.hasNext()) {
        AnnotationGraph graphExpected = itGraphExpected.next();
        AnnotationGraph graphResult = itGraphResult.next();
        List<AnnisNode> nodeListExpected = graphExpected.getNodes();
        List<AnnisNode> nodeListResult = graphResult.getNodes();
        assertEquals(nodeListExpected.size(), nodeListResult.size());
        Collections.sort(nodeListExpected, new Comparator<AnnisNode>() {

            @Override
            public int compare(AnnisNode arg0, AnnisNode arg1) {
                return Long.valueOf(arg0.getId()).compareTo(Long.valueOf(arg1.getId()));
            }
        });
        Collections.sort(nodeListResult, new Comparator<AnnisNode>() {

            @Override
            public int compare(AnnisNode arg0, AnnisNode arg1) {
                return Long.valueOf(arg0.getId()).compareTo(Long.valueOf(arg1.getId()));
            }
        });
        Iterator<AnnisNode> itNodeExpected = nodeListExpected.iterator();
        Iterator<AnnisNode> itNodeResult = nodeListResult.iterator();
        while (itNodeExpected.hasNext() && itNodeResult.hasNext()) {
            checkAnnisNodeEqual(itNodeExpected.next(), itNodeResult.next());
        }
    }
}
Also used : ArrayCorpusPathExtractor(annis.sqlgen.ArrayCorpusPathExtractor) CorpusPathExtractor(annis.sqlgen.CorpusPathExtractor) ArrayList(java.util.ArrayList) SaltProject(org.corpus_tools.salt.common.SaltProject) PostgreSqlArraySolutionKey(annis.sqlgen.PostgreSqlArraySolutionKey) SaltAnnotateExtractor(annis.sqlgen.SaltAnnotateExtractor) Match(annis.service.objects.Match) AnnotationGraph(annis.model.AnnotationGraph) ArrayCorpusPathExtractor(annis.sqlgen.ArrayCorpusPathExtractor) MatchGroup(annis.service.objects.MatchGroup) AnnisNode(annis.model.AnnisNode) CsvResultSetProvider(annis.test.CsvResultSetProvider) Test(org.junit.Test)

Example 25 with SaltProject

use of org.corpus_tools.salt.common.SaltProject in project ANNIS by korpling.

the class BenchmarkTest method mapSalt_SonderbaresKraeuterBuch.

@Test
public void mapSalt_SonderbaresKraeuterBuch() {
    assumeTrue(ridgesCorpusID.size() > 0);
    SaltProject p = annisDao.retrieveAnnotationGraph("Ridges_Herbology_Version_2.0", "sonderbares.kraeuterbuch.16175.11-21", null);
    assertEquals(1, p.getCorpusGraphs().size());
}
Also used : SaltProject(org.corpus_tools.salt.common.SaltProject) Test(org.junit.Test)

Aggregations

SaltProject (org.corpus_tools.salt.common.SaltProject)34 Test (org.junit.Test)11 Match (annis.service.objects.Match)8 WebResource (com.sun.jersey.api.client.WebResource)8 LinkedList (java.util.LinkedList)8 SDocumentGraph (org.corpus_tools.salt.common.SDocumentGraph)8 MatchGroup (annis.service.objects.MatchGroup)7 UniformInterfaceException (com.sun.jersey.api.client.UniformInterfaceException)6 ArrayList (java.util.ArrayList)6 SDocument (org.corpus_tools.salt.common.SDocument)6 HashMap (java.util.HashMap)4 SCorpusGraph (org.corpus_tools.salt.common.SCorpusGraph)4 SNode (org.corpus_tools.salt.core.SNode)4 VisualizerInput (annis.libgui.visualizers.VisualizerInput)3 QueryData (annis.ql.parser.QueryData)3 SubgraphFilter (annis.service.objects.SubgraphFilter)3 AnnotateQueryData (annis.sqlgen.extensions.AnnotateQueryData)3 LimitOffsetQueryData (annis.sqlgen.extensions.LimitOffsetQueryData)3 IOException (java.io.IOException)3 AnnisCorpusAccessException (annis.exceptions.AnnisCorpusAccessException)2