Search in sources :

Example 1 with SCorpusGraph

use of org.corpus_tools.salt.common.SCorpusGraph in project ANNIS by korpling.

the class QueryDaoImpl method exportCorpus.

@Override
@Transactional(readOnly = true)
public void exportCorpus(String toplevelCorpus, File outputDirectory) {
    // check if the corpus really exists
    mapCorpusNameToId(toplevelCorpus);
    SaltProject corpusProject = SaltFactory.createSaltProject();
    SCorpusGraph corpusGraph = SaltFactory.createSCorpusGraph();
    corpusGraph.setSaltProject(corpusProject);
    SCorpus rootCorpus = corpusGraph.createCorpus(null, toplevelCorpus);
    // add all root metadata
    for (Annotation metaAnno : listCorpusAnnotations(toplevelCorpus)) {
        rootCorpus.createMetaAnnotation(metaAnno.getNamespace(), metaAnno.getName(), metaAnno.getValue());
    }
    File documentRootDir = new File(outputDirectory, toplevelCorpus);
    if (!outputDirectory.exists()) {
        if (!outputDirectory.mkdirs()) {
            log.warn("Could not create output directory \"{}\" for exporting the corpus", outputDirectory.getAbsolutePath());
        }
    }
    List<Annotation> docs = listDocuments(toplevelCorpus);
    int i = 1;
    for (Annotation docAnno : docs) {
        log.info("Loading document {} from database ({}/{})", docAnno.getName(), i, docs.size());
        SaltProject docProject = retrieveAnnotationGraph(toplevelCorpus, docAnno.getName(), null);
        if (docProject != null && docProject.getCorpusGraphs() != null && !docProject.getCorpusGraphs().isEmpty()) {
            List<Annotation> docMetaData = listCorpusAnnotations(toplevelCorpus, docAnno.getName(), true);
            SCorpusGraph docCorpusGraph = docProject.getCorpusGraphs().get(0);
            // TODO: we could re-use the actual corpus structure instead of just adding a flat list of documents
            if (docCorpusGraph.getDocuments() != null) {
                for (SDocument doc : docCorpusGraph.getDocuments()) {
                    log.info("Removing SFeatures from {} ({}/{})", docAnno.getName(), i, docs.size());
                    // remove all ANNIS specific features that require a special Java class
                    SDocumentGraph graph = doc.getDocumentGraph();
                    if (graph != null) {
                        if (graph.getNodes() != null) {
                            for (SNode n : graph.getNodes()) {
                                n.removeLabel(AnnisConstants.ANNIS_NS, AnnisConstants.FEAT_RELANNIS_NODE);
                            }
                        }
                        if (graph.getRelations() != null) {
                            for (SRelation e : graph.getRelations()) {
                                e.removeLabel(AnnisConstants.ANNIS_NS, AnnisConstants.FEAT_RELANNIS_EDGE);
                            }
                        }
                    }
                    log.info("Saving document {} ({}/{})", doc.getName(), i, docs.size());
                    SaltUtil.saveDocumentGraph(graph, URI.createFileURI(new File(documentRootDir, doc.getName() + "." + SaltUtil.FILE_ENDING_SALT_XML).getAbsolutePath()));
                    SDocument docCopy = corpusGraph.createDocument(rootCorpus, doc.getName());
                    log.info("Adding metadata to document {} ({}/{})", doc.getName(), i, docs.size());
                    for (Annotation metaAnno : docMetaData) {
                        docCopy.createMetaAnnotation(metaAnno.getNamespace(), metaAnno.getName(), metaAnno.getValue());
                    }
                }
            }
        }
        i++;
    }
    // end for each document
    // save the actual SaltProject
    log.info("Saving corpus structure");
    File projectFile = new File(outputDirectory, SaltUtil.FILE_SALT_PROJECT);
    SaltXML10Writer writer = new SaltXML10Writer(projectFile);
    writer.writeSaltProject(corpusProject);
}
Also used : SCorpus(org.corpus_tools.salt.common.SCorpus) SRelation(org.corpus_tools.salt.core.SRelation) SNode(org.corpus_tools.salt.core.SNode) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) SDocument(org.corpus_tools.salt.common.SDocument) SaltXML10Writer(org.corpus_tools.salt.util.internal.persistence.SaltXML10Writer) SaltProject(org.corpus_tools.salt.common.SaltProject) File(java.io.File) Annotation(annis.model.Annotation) SCorpusGraph(org.corpus_tools.salt.common.SCorpusGraph) Transactional(org.springframework.transaction.annotation.Transactional)

Example 2 with SCorpusGraph

use of org.corpus_tools.salt.common.SCorpusGraph in project ANNIS by korpling.

the class AutoSimpleRegexQuery method analyzingQuery.

@Override
public void analyzingQuery(SaltProject saltProject) {
    List<String> tokens = new ArrayList<>();
    for (SCorpusGraph g : saltProject.getCorpusGraphs()) {
        if (g != null) {
            for (SDocument doc : g.getDocuments()) {
                SDocumentGraph docGraph = doc.getDocumentGraph();
                List<SNode> sNodes = docGraph.getNodes();
                if (sNodes != null) {
                    for (SNode n : sNodes) {
                        if (n instanceof SToken) {
                            tokens.add(CommonHelper.getSpannedText((SToken) n));
                        }
                    }
                }
            }
        }
    }
    // try to find a word with which is contained twice with Capitalize letter.
    text = null;
    for (int i = 0; i < tokens.size(); i++) {
        for (int j = i + 1; j < tokens.size(); j++) {
            if (tokens.get(i).equalsIgnoreCase(tokens.get(j))) {
                if (tokens.get(i).length() > 1 && ((Character.isLowerCase(tokens.get(i).charAt(0)) && Character.isUpperCase(tokens.get(j).charAt(0))) || (Character.isLowerCase(tokens.get(j).charAt(0)) && Character.isUpperCase(tokens.get(i).charAt(0))))) {
                    text = tokens.get(i);
                    break;
                }
            }
        }
    }
    if (text != null) {
        Character upperLetter = Character.toUpperCase(text.charAt(0));
        Character lowerLetter = Character.toLowerCase(text.charAt(0));
        String rest = StringUtils.substring(text, -(text.length() - 1));
        finalAQL = "/[" + upperLetter + lowerLetter + "]" + rest + "/";
    } else {
        // select one random token from the result
        int tries = 10;
        int r = new Random().nextInt(tokens.size() - 1);
        text = tokens.get(r);
        while ("".equals(text) && tries > 0) {
            r = new Random().nextInt(tokens.size() - 1);
            text = tokens.get(r);
            tries--;
        }
        if (!"".equals(text) && text.length() > 1) {
            Character upperLetter = Character.toUpperCase(text.charAt(0));
            Character lowerLetter = Character.toLowerCase(text.charAt(0));
            String rest = StringUtils.substring(text, -(text.length() - 1));
            finalAQL = "/[" + upperLetter + lowerLetter + "]" + rest + "/";
        } else {
            finalAQL = "";
        }
    }
}
Also used : SToken(org.corpus_tools.salt.common.SToken) SNode(org.corpus_tools.salt.core.SNode) Random(java.util.Random) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) ArrayList(java.util.ArrayList) SDocument(org.corpus_tools.salt.common.SDocument) SCorpusGraph(org.corpus_tools.salt.common.SCorpusGraph)

Example 3 with SCorpusGraph

use of org.corpus_tools.salt.common.SCorpusGraph in project ANNIS by korpling.

the class SaltAnnotateExtractor method addMatchInformation.

/**
 * Sets additional match (global) information about the matched nodes and
 * annotations.
 *
 * This will add the {@link AnnisConstants#FEAT_MATCHEDIDS) to all {@link SDocument} elements of the
 * salt project.
 *
 * @param p The salt project to add the features to.
 * @param matchGroup A list of matches in the same order as the corpus graphs
 * of the salt project.
 */
public static void addMatchInformation(SaltProject p, MatchGroup matchGroup) {
    int matchIndex = 0;
    for (Match m : matchGroup.getMatches()) {
        // get the corresponding SDocument of the salt project
        SCorpusGraph corpusGraph = p.getCorpusGraphs().get(matchIndex);
        SDocument doc = corpusGraph.getDocuments().get(0);
        setMatchedIDs(doc.getDocumentGraph(), m);
        matchIndex++;
    }
}
Also used : SDocument(org.corpus_tools.salt.common.SDocument) Match(annis.service.objects.Match) SCorpusGraph(org.corpus_tools.salt.common.SCorpusGraph)

Example 4 with SCorpusGraph

use of org.corpus_tools.salt.common.SCorpusGraph in project ANNIS by korpling.

the class SaltBasedExporter method convertSaltProject.

/**
 * Iterates over all matches (modelled as corpus graphs) and calls {@link #convertText(de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.sDocumentStructure.SDocumentGraph, java.util.List, java.util.Map, int, java.io.Writer) } for
 * the single document graph.
 * @param p
 * @param annoKeys
 * @param args
 * @param alignmc
 * @param offset
 * @param out
 */
// invokes the createAdjacencyMatrix method, if nodeCount != null or outputText otherwise
private void convertSaltProject(SaltProject p, List<String> annoKeys, Map<String, String> args, boolean alignmc, int offset, Map<String, CorpusConfig> corpusConfigs, Writer out, Integer nodeCount) throws IOException, IllegalArgumentException {
    int recordNumber = offset;
    if (p != null && p.getCorpusGraphs() != null) {
        Map<String, String> spanAnno2order = null;
        boolean virtualTokenizationFromNamespace = false;
        Set<String> corpusNames = CommonHelper.getToplevelCorpusNames(p);
        if (!corpusNames.isEmpty()) {
            CorpusConfig config = corpusConfigs.get(corpusNames.iterator().next());
            if (config != null) {
                if ("true".equalsIgnoreCase(config.getConfig("virtual_tokenization_from_namespace"))) {
                    virtualTokenizationFromNamespace = true;
                } else {
                    String mappingRaw = config.getConfig("virtual_tokenization_mapping");
                    if (mappingRaw != null) {
                        spanAnno2order = new HashMap<>();
                        for (String singleMapping : Splitter.on(',').split(mappingRaw)) {
                            List<String> mappingParts = Splitter.on('=').splitToList(singleMapping);
                            if (mappingParts.size() >= 2) {
                                spanAnno2order.put(mappingParts.get(0), mappingParts.get(1));
                            }
                        }
                    }
                }
            }
        }
        for (SCorpusGraph corpusGraph : p.getCorpusGraphs()) {
            if (corpusGraph.getDocuments() != null) {
                for (SDocument doc : corpusGraph.getDocuments()) {
                    if (virtualTokenizationFromNamespace) {
                        TimelineReconstructor.removeVirtualTokenizationUsingNamespace(doc.getDocumentGraph());
                    } else if (spanAnno2order != null) {
                        // there is a definition how to map the virtual tokenization to a real one
                        TimelineReconstructor.removeVirtualTokenization(doc.getDocumentGraph(), spanAnno2order);
                    }
                    if (nodeCount != null) {
                        createAdjacencyMatrix(doc.getDocumentGraph(), args, recordNumber++, nodeCount);
                    } else {
                        outputText(doc.getDocumentGraph(), alignmc, recordNumber++, out);
                    }
                }
            }
        }
    }
}
Also used : SDocument(org.corpus_tools.salt.common.SDocument) CorpusConfig(annis.service.objects.CorpusConfig) SCorpusGraph(org.corpus_tools.salt.common.SCorpusGraph)

Example 5 with SCorpusGraph

use of org.corpus_tools.salt.common.SCorpusGraph in project ANNIS by korpling.

the class EmbeddedVisUI method generateVisFromRemoteURL.

private void generateVisFromRemoteURL(final String visName, final String rawUri, Map<String, String[]> args) {
    try {
        // find the matching visualizer
        final VisualizerPlugin visPlugin = this.getVisualizer(visName);
        if (visPlugin == null) {
            displayMessage("Unknown visualizer \"" + visName + "\"", "This ANNIS instance does not know the given visualizer.");
            return;
        }
        URI uri = new URI(rawUri);
        // fetch content of the URI
        Client client = null;
        AnnisUser user = Helper.getUser();
        if (user != null) {
            client = user.getClient();
        }
        if (client == null) {
            client = Helper.createRESTClient();
        }
        final WebResource saltRes = client.resource(uri);
        displayLoadingIndicator();
        // copy the arguments for using them later in the callback
        final Map<String, String[]> argsCopy = new LinkedHashMap<>(args);
        Background.runWithCallback(new Callable<SaltProject>() {

            @Override
            public SaltProject call() throws Exception {
                return saltRes.get(SaltProject.class);
            }
        }, new FutureCallback<SaltProject>() {

            @Override
            public void onFailure(Throwable t) {
                displayMessage("Could not query the result.", t.getMessage());
            }

            @Override
            public void onSuccess(SaltProject p) {
                // TODO: allow to display several visualizers when there is more than one document
                SCorpusGraph firstCorpusGraph = null;
                SDocument doc = null;
                if (p.getCorpusGraphs() != null && !p.getCorpusGraphs().isEmpty()) {
                    firstCorpusGraph = p.getCorpusGraphs().get(0);
                    if (firstCorpusGraph.getDocuments() != null && !firstCorpusGraph.getDocuments().isEmpty()) {
                        doc = firstCorpusGraph.getDocuments().get(0);
                    }
                }
                if (doc == null) {
                    displayMessage("No documents found in provided URL.", "");
                    return;
                }
                if (argsCopy.containsKey(KEY_INSTANCE)) {
                    Map<String, InstanceConfig> allConfigs = loadInstanceConfig();
                    InstanceConfig newConfig = allConfigs.get(argsCopy.get(KEY_INSTANCE)[0]);
                    if (newConfig != null) {
                        setInstanceConfig(newConfig);
                    }
                }
                // now it is time to load the actual defined instance fonts
                loadInstanceFonts();
                // generate the visualizer
                VisualizerInput visInput = new VisualizerInput();
                visInput.setDocument(doc);
                if (getInstanceConfig() != null && getInstanceConfig().getFont() != null) {
                    visInput.setFont(getInstanceFont());
                }
                Properties mappings = new Properties();
                for (Map.Entry<String, String[]> e : argsCopy.entrySet()) {
                    if (!KEY_SALT.equals(e.getKey()) && e.getValue().length > 0) {
                        mappings.put(e.getKey(), e.getValue()[0]);
                    }
                }
                visInput.setMappings(mappings);
                String[] namespace = argsCopy.get(KEY_NAMESPACE);
                if (namespace != null && namespace.length > 0) {
                    visInput.setNamespace(namespace[0]);
                } else {
                    visInput.setNamespace(null);
                }
                String baseText = null;
                if (argsCopy.containsKey(KEY_BASE_TEXT)) {
                    String[] value = argsCopy.get(KEY_BASE_TEXT);
                    if (value.length > 0) {
                        baseText = value[0];
                    }
                }
                List<SNode> segNodes = CommonHelper.getSortedSegmentationNodes(baseText, doc.getDocumentGraph());
                if (argsCopy.containsKey(KEY_MATCH)) {
                    String[] rawMatch = argsCopy.get(KEY_MATCH);
                    if (rawMatch.length > 0) {
                        // enhance the graph with match information from the arguments
                        Match match = Match.parseFromString(rawMatch[0]);
                        Helper.addMatchToDocumentGraph(match, doc);
                    }
                }
                Map<String, String> markedColorMap = new HashMap<>();
                Map<String, String> exactMarkedMap = Helper.calculateColorsForMarkedExact(doc);
                Map<String, Long> markedAndCovered = Helper.calculateMarkedAndCoveredIDs(doc, segNodes, baseText);
                Helper.calulcateColorsForMarkedAndCovered(doc, markedAndCovered, markedColorMap);
                visInput.setMarkedAndCovered(markedAndCovered);
                visInput.setMarkableMap(markedColorMap);
                visInput.setMarkableExactMap(exactMarkedMap);
                visInput.setContextPath(Helper.getContext());
                String template = Helper.getContext() + "/Resource/" + visName + "/%s";
                visInput.setResourcePathTemplate(template);
                visInput.setSegmentationName(baseText);
                // TODO: which other thing do we have to provide?
                Component c = visPlugin.createComponent(visInput, null);
                // add the styles
                c.addStyleName("corpus-font");
                c.addStyleName("vis-content");
                Link link = new Link();
                link.setCaption("Show in ANNIS search interface");
                link.setIcon(ANNISFontIcon.LOGO);
                link.setVisible(false);
                link.addStyleName("dontprint");
                link.setTargetName("_blank");
                if (argsCopy.containsKey(KEY_SEARCH_INTERFACE)) {
                    String[] interfaceLink = argsCopy.get(KEY_SEARCH_INTERFACE);
                    if (interfaceLink.length > 0) {
                        link.setResource(new ExternalResource(interfaceLink[0]));
                        link.setVisible(true);
                    }
                }
                VerticalLayout layout = new VerticalLayout(link, c);
                layout.setComponentAlignment(link, Alignment.TOP_LEFT);
                layout.setSpacing(true);
                layout.setMargin(true);
                setContent(layout);
                IDGenerator.assignID(link);
            }
        });
    } catch (URISyntaxException ex) {
        displayMessage("Invalid URL", "The provided URL is malformed:<br />" + ex.getMessage());
    } catch (LoginDataLostException ex) {
        displayMessage("LoginData Lost", "No login data available any longer in the session:<br /> " + ex.getMessage());
    } catch (UniformInterfaceException ex) {
        if (ex.getResponse().getStatus() == Response.Status.FORBIDDEN.getStatusCode()) {
            displayMessage("Corpus access forbidden", "You are not allowed to access this corpus. " + "Please login at the <a target=\"_blank\" href=\"" + Helper.getContext() + "\">main application</a> first and then reload this page.");
        } else {
            displayMessage("Service error", ex.getMessage());
        }
    } catch (ClientHandlerException ex) {
        displayMessage("Could not generate the visualization because the ANNIS service reported an error.", ex.getMessage());
    } catch (Throwable ex) {
        displayMessage("Could not generate the visualization.", ex.getMessage() == null ? ("An unknown error of type " + ex.getClass().getSimpleName()) + " occured." : ex.getMessage());
    }
}
Also used : VisualizerPlugin(annis.libgui.visualizers.VisualizerPlugin) WebResource(com.sun.jersey.api.client.WebResource) URISyntaxException(java.net.URISyntaxException) Properties(java.util.Properties) URI(java.net.URI) LinkedHashMap(java.util.LinkedHashMap) SCorpusGraph(org.corpus_tools.salt.common.SCorpusGraph) Match(annis.service.objects.Match) InstanceConfig(annis.libgui.InstanceConfig) VerticalLayout(com.vaadin.ui.VerticalLayout) List(java.util.List) LinkedList(java.util.LinkedList) Client(com.sun.jersey.api.client.Client) Component(com.vaadin.ui.Component) ClientHandlerException(com.sun.jersey.api.client.ClientHandlerException) SDocument(org.corpus_tools.salt.common.SDocument) SaltProject(org.corpus_tools.salt.common.SaltProject) ExternalResource(com.vaadin.server.ExternalResource) AnnisUser(annis.libgui.AnnisUser) URISyntaxException(java.net.URISyntaxException) LoginDataLostException(annis.libgui.LoginDataLostException) UniformInterfaceException(com.sun.jersey.api.client.UniformInterfaceException) ClientHandlerException(com.sun.jersey.api.client.ClientHandlerException) UniformInterfaceException(com.sun.jersey.api.client.UniformInterfaceException) VisualizerInput(annis.libgui.visualizers.VisualizerInput) LoginDataLostException(annis.libgui.LoginDataLostException) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Link(com.vaadin.ui.Link)

Aggregations

SCorpusGraph (org.corpus_tools.salt.common.SCorpusGraph)13 SDocument (org.corpus_tools.salt.common.SDocument)12 SDocumentGraph (org.corpus_tools.salt.common.SDocumentGraph)7 SaltProject (org.corpus_tools.salt.common.SaltProject)5 SNode (org.corpus_tools.salt.core.SNode)5 Match (annis.service.objects.Match)3 LinkedList (java.util.LinkedList)3 SCorpus (org.corpus_tools.salt.common.SCorpus)3 SToken (org.corpus_tools.salt.common.SToken)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 Random (java.util.Random)2 TreeSet (java.util.TreeSet)2 AnnisUser (annis.libgui.AnnisUser)1 InstanceConfig (annis.libgui.InstanceConfig)1 LoginDataLostException (annis.libgui.LoginDataLostException)1 ResolverProviderImpl (annis.libgui.ResolverProviderImpl)1 VisualizerInput (annis.libgui.visualizers.VisualizerInput)1 VisualizerPlugin (annis.libgui.visualizers.VisualizerPlugin)1 Annotation (annis.model.Annotation)1