use of org.corpus_tools.salt.common.SCorpusGraph in project ANNIS by korpling.
the class QueryDaoImpl method exportCorpus.
@Override
@Transactional(readOnly = true)
public void exportCorpus(String toplevelCorpus, File outputDirectory) {
// check if the corpus really exists
mapCorpusNameToId(toplevelCorpus);
SaltProject corpusProject = SaltFactory.createSaltProject();
SCorpusGraph corpusGraph = SaltFactory.createSCorpusGraph();
corpusGraph.setSaltProject(corpusProject);
SCorpus rootCorpus = corpusGraph.createCorpus(null, toplevelCorpus);
// add all root metadata
for (Annotation metaAnno : listCorpusAnnotations(toplevelCorpus)) {
rootCorpus.createMetaAnnotation(metaAnno.getNamespace(), metaAnno.getName(), metaAnno.getValue());
}
File documentRootDir = new File(outputDirectory, toplevelCorpus);
if (!outputDirectory.exists()) {
if (!outputDirectory.mkdirs()) {
log.warn("Could not create output directory \"{}\" for exporting the corpus", outputDirectory.getAbsolutePath());
}
}
List<Annotation> docs = listDocuments(toplevelCorpus);
int i = 1;
for (Annotation docAnno : docs) {
log.info("Loading document {} from database ({}/{})", docAnno.getName(), i, docs.size());
SaltProject docProject = retrieveAnnotationGraph(toplevelCorpus, docAnno.getName(), null);
if (docProject != null && docProject.getCorpusGraphs() != null && !docProject.getCorpusGraphs().isEmpty()) {
List<Annotation> docMetaData = listCorpusAnnotations(toplevelCorpus, docAnno.getName(), true);
SCorpusGraph docCorpusGraph = docProject.getCorpusGraphs().get(0);
// TODO: we could re-use the actual corpus structure instead of just adding a flat list of documents
if (docCorpusGraph.getDocuments() != null) {
for (SDocument doc : docCorpusGraph.getDocuments()) {
log.info("Removing SFeatures from {} ({}/{})", docAnno.getName(), i, docs.size());
// remove all ANNIS specific features that require a special Java class
SDocumentGraph graph = doc.getDocumentGraph();
if (graph != null) {
if (graph.getNodes() != null) {
for (SNode n : graph.getNodes()) {
n.removeLabel(AnnisConstants.ANNIS_NS, AnnisConstants.FEAT_RELANNIS_NODE);
}
}
if (graph.getRelations() != null) {
for (SRelation e : graph.getRelations()) {
e.removeLabel(AnnisConstants.ANNIS_NS, AnnisConstants.FEAT_RELANNIS_EDGE);
}
}
}
log.info("Saving document {} ({}/{})", doc.getName(), i, docs.size());
SaltUtil.saveDocumentGraph(graph, URI.createFileURI(new File(documentRootDir, doc.getName() + "." + SaltUtil.FILE_ENDING_SALT_XML).getAbsolutePath()));
SDocument docCopy = corpusGraph.createDocument(rootCorpus, doc.getName());
log.info("Adding metadata to document {} ({}/{})", doc.getName(), i, docs.size());
for (Annotation metaAnno : docMetaData) {
docCopy.createMetaAnnotation(metaAnno.getNamespace(), metaAnno.getName(), metaAnno.getValue());
}
}
}
}
i++;
}
// end for each document
// save the actual SaltProject
log.info("Saving corpus structure");
File projectFile = new File(outputDirectory, SaltUtil.FILE_SALT_PROJECT);
SaltXML10Writer writer = new SaltXML10Writer(projectFile);
writer.writeSaltProject(corpusProject);
}
use of org.corpus_tools.salt.common.SCorpusGraph in project ANNIS by korpling.
the class AutoSimpleRegexQuery method analyzingQuery.
@Override
public void analyzingQuery(SaltProject saltProject) {
List<String> tokens = new ArrayList<>();
for (SCorpusGraph g : saltProject.getCorpusGraphs()) {
if (g != null) {
for (SDocument doc : g.getDocuments()) {
SDocumentGraph docGraph = doc.getDocumentGraph();
List<SNode> sNodes = docGraph.getNodes();
if (sNodes != null) {
for (SNode n : sNodes) {
if (n instanceof SToken) {
tokens.add(CommonHelper.getSpannedText((SToken) n));
}
}
}
}
}
}
// try to find a word with which is contained twice with Capitalize letter.
text = null;
for (int i = 0; i < tokens.size(); i++) {
for (int j = i + 1; j < tokens.size(); j++) {
if (tokens.get(i).equalsIgnoreCase(tokens.get(j))) {
if (tokens.get(i).length() > 1 && ((Character.isLowerCase(tokens.get(i).charAt(0)) && Character.isUpperCase(tokens.get(j).charAt(0))) || (Character.isLowerCase(tokens.get(j).charAt(0)) && Character.isUpperCase(tokens.get(i).charAt(0))))) {
text = tokens.get(i);
break;
}
}
}
}
if (text != null) {
Character upperLetter = Character.toUpperCase(text.charAt(0));
Character lowerLetter = Character.toLowerCase(text.charAt(0));
String rest = StringUtils.substring(text, -(text.length() - 1));
finalAQL = "/[" + upperLetter + lowerLetter + "]" + rest + "/";
} else {
// select one random token from the result
int tries = 10;
int r = new Random().nextInt(tokens.size() - 1);
text = tokens.get(r);
while ("".equals(text) && tries > 0) {
r = new Random().nextInt(tokens.size() - 1);
text = tokens.get(r);
tries--;
}
if (!"".equals(text) && text.length() > 1) {
Character upperLetter = Character.toUpperCase(text.charAt(0));
Character lowerLetter = Character.toLowerCase(text.charAt(0));
String rest = StringUtils.substring(text, -(text.length() - 1));
finalAQL = "/[" + upperLetter + lowerLetter + "]" + rest + "/";
} else {
finalAQL = "";
}
}
}
use of org.corpus_tools.salt.common.SCorpusGraph in project ANNIS by korpling.
the class SaltAnnotateExtractor method addMatchInformation.
/**
* Sets additional match (global) information about the matched nodes and
* annotations.
*
* This will add the {@link AnnisConstants#FEAT_MATCHEDIDS) to all {@link SDocument} elements of the
* salt project.
*
* @param p The salt project to add the features to.
* @param matchGroup A list of matches in the same order as the corpus graphs
* of the salt project.
*/
public static void addMatchInformation(SaltProject p, MatchGroup matchGroup) {
int matchIndex = 0;
for (Match m : matchGroup.getMatches()) {
// get the corresponding SDocument of the salt project
SCorpusGraph corpusGraph = p.getCorpusGraphs().get(matchIndex);
SDocument doc = corpusGraph.getDocuments().get(0);
setMatchedIDs(doc.getDocumentGraph(), m);
matchIndex++;
}
}
use of org.corpus_tools.salt.common.SCorpusGraph in project ANNIS by korpling.
the class SaltBasedExporter method convertSaltProject.
/**
* Iterates over all matches (modelled as corpus graphs) and calls {@link #convertText(de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.sDocumentStructure.SDocumentGraph, java.util.List, java.util.Map, int, java.io.Writer) } for
* the single document graph.
* @param p
* @param annoKeys
* @param args
* @param alignmc
* @param offset
* @param out
*/
// invokes the createAdjacencyMatrix method, if nodeCount != null or outputText otherwise
private void convertSaltProject(SaltProject p, List<String> annoKeys, Map<String, String> args, boolean alignmc, int offset, Map<String, CorpusConfig> corpusConfigs, Writer out, Integer nodeCount) throws IOException, IllegalArgumentException {
int recordNumber = offset;
if (p != null && p.getCorpusGraphs() != null) {
Map<String, String> spanAnno2order = null;
boolean virtualTokenizationFromNamespace = false;
Set<String> corpusNames = CommonHelper.getToplevelCorpusNames(p);
if (!corpusNames.isEmpty()) {
CorpusConfig config = corpusConfigs.get(corpusNames.iterator().next());
if (config != null) {
if ("true".equalsIgnoreCase(config.getConfig("virtual_tokenization_from_namespace"))) {
virtualTokenizationFromNamespace = true;
} else {
String mappingRaw = config.getConfig("virtual_tokenization_mapping");
if (mappingRaw != null) {
spanAnno2order = new HashMap<>();
for (String singleMapping : Splitter.on(',').split(mappingRaw)) {
List<String> mappingParts = Splitter.on('=').splitToList(singleMapping);
if (mappingParts.size() >= 2) {
spanAnno2order.put(mappingParts.get(0), mappingParts.get(1));
}
}
}
}
}
}
for (SCorpusGraph corpusGraph : p.getCorpusGraphs()) {
if (corpusGraph.getDocuments() != null) {
for (SDocument doc : corpusGraph.getDocuments()) {
if (virtualTokenizationFromNamespace) {
TimelineReconstructor.removeVirtualTokenizationUsingNamespace(doc.getDocumentGraph());
} else if (spanAnno2order != null) {
// there is a definition how to map the virtual tokenization to a real one
TimelineReconstructor.removeVirtualTokenization(doc.getDocumentGraph(), spanAnno2order);
}
if (nodeCount != null) {
createAdjacencyMatrix(doc.getDocumentGraph(), args, recordNumber++, nodeCount);
} else {
outputText(doc.getDocumentGraph(), alignmc, recordNumber++, out);
}
}
}
}
}
}
use of org.corpus_tools.salt.common.SCorpusGraph in project ANNIS by korpling.
the class EmbeddedVisUI method generateVisFromRemoteURL.
private void generateVisFromRemoteURL(final String visName, final String rawUri, Map<String, String[]> args) {
try {
// find the matching visualizer
final VisualizerPlugin visPlugin = this.getVisualizer(visName);
if (visPlugin == null) {
displayMessage("Unknown visualizer \"" + visName + "\"", "This ANNIS instance does not know the given visualizer.");
return;
}
URI uri = new URI(rawUri);
// fetch content of the URI
Client client = null;
AnnisUser user = Helper.getUser();
if (user != null) {
client = user.getClient();
}
if (client == null) {
client = Helper.createRESTClient();
}
final WebResource saltRes = client.resource(uri);
displayLoadingIndicator();
// copy the arguments for using them later in the callback
final Map<String, String[]> argsCopy = new LinkedHashMap<>(args);
Background.runWithCallback(new Callable<SaltProject>() {
@Override
public SaltProject call() throws Exception {
return saltRes.get(SaltProject.class);
}
}, new FutureCallback<SaltProject>() {
@Override
public void onFailure(Throwable t) {
displayMessage("Could not query the result.", t.getMessage());
}
@Override
public void onSuccess(SaltProject p) {
// TODO: allow to display several visualizers when there is more than one document
SCorpusGraph firstCorpusGraph = null;
SDocument doc = null;
if (p.getCorpusGraphs() != null && !p.getCorpusGraphs().isEmpty()) {
firstCorpusGraph = p.getCorpusGraphs().get(0);
if (firstCorpusGraph.getDocuments() != null && !firstCorpusGraph.getDocuments().isEmpty()) {
doc = firstCorpusGraph.getDocuments().get(0);
}
}
if (doc == null) {
displayMessage("No documents found in provided URL.", "");
return;
}
if (argsCopy.containsKey(KEY_INSTANCE)) {
Map<String, InstanceConfig> allConfigs = loadInstanceConfig();
InstanceConfig newConfig = allConfigs.get(argsCopy.get(KEY_INSTANCE)[0]);
if (newConfig != null) {
setInstanceConfig(newConfig);
}
}
// now it is time to load the actual defined instance fonts
loadInstanceFonts();
// generate the visualizer
VisualizerInput visInput = new VisualizerInput();
visInput.setDocument(doc);
if (getInstanceConfig() != null && getInstanceConfig().getFont() != null) {
visInput.setFont(getInstanceFont());
}
Properties mappings = new Properties();
for (Map.Entry<String, String[]> e : argsCopy.entrySet()) {
if (!KEY_SALT.equals(e.getKey()) && e.getValue().length > 0) {
mappings.put(e.getKey(), e.getValue()[0]);
}
}
visInput.setMappings(mappings);
String[] namespace = argsCopy.get(KEY_NAMESPACE);
if (namespace != null && namespace.length > 0) {
visInput.setNamespace(namespace[0]);
} else {
visInput.setNamespace(null);
}
String baseText = null;
if (argsCopy.containsKey(KEY_BASE_TEXT)) {
String[] value = argsCopy.get(KEY_BASE_TEXT);
if (value.length > 0) {
baseText = value[0];
}
}
List<SNode> segNodes = CommonHelper.getSortedSegmentationNodes(baseText, doc.getDocumentGraph());
if (argsCopy.containsKey(KEY_MATCH)) {
String[] rawMatch = argsCopy.get(KEY_MATCH);
if (rawMatch.length > 0) {
// enhance the graph with match information from the arguments
Match match = Match.parseFromString(rawMatch[0]);
Helper.addMatchToDocumentGraph(match, doc);
}
}
Map<String, String> markedColorMap = new HashMap<>();
Map<String, String> exactMarkedMap = Helper.calculateColorsForMarkedExact(doc);
Map<String, Long> markedAndCovered = Helper.calculateMarkedAndCoveredIDs(doc, segNodes, baseText);
Helper.calulcateColorsForMarkedAndCovered(doc, markedAndCovered, markedColorMap);
visInput.setMarkedAndCovered(markedAndCovered);
visInput.setMarkableMap(markedColorMap);
visInput.setMarkableExactMap(exactMarkedMap);
visInput.setContextPath(Helper.getContext());
String template = Helper.getContext() + "/Resource/" + visName + "/%s";
visInput.setResourcePathTemplate(template);
visInput.setSegmentationName(baseText);
// TODO: which other thing do we have to provide?
Component c = visPlugin.createComponent(visInput, null);
// add the styles
c.addStyleName("corpus-font");
c.addStyleName("vis-content");
Link link = new Link();
link.setCaption("Show in ANNIS search interface");
link.setIcon(ANNISFontIcon.LOGO);
link.setVisible(false);
link.addStyleName("dontprint");
link.setTargetName("_blank");
if (argsCopy.containsKey(KEY_SEARCH_INTERFACE)) {
String[] interfaceLink = argsCopy.get(KEY_SEARCH_INTERFACE);
if (interfaceLink.length > 0) {
link.setResource(new ExternalResource(interfaceLink[0]));
link.setVisible(true);
}
}
VerticalLayout layout = new VerticalLayout(link, c);
layout.setComponentAlignment(link, Alignment.TOP_LEFT);
layout.setSpacing(true);
layout.setMargin(true);
setContent(layout);
IDGenerator.assignID(link);
}
});
} catch (URISyntaxException ex) {
displayMessage("Invalid URL", "The provided URL is malformed:<br />" + ex.getMessage());
} catch (LoginDataLostException ex) {
displayMessage("LoginData Lost", "No login data available any longer in the session:<br /> " + ex.getMessage());
} catch (UniformInterfaceException ex) {
if (ex.getResponse().getStatus() == Response.Status.FORBIDDEN.getStatusCode()) {
displayMessage("Corpus access forbidden", "You are not allowed to access this corpus. " + "Please login at the <a target=\"_blank\" href=\"" + Helper.getContext() + "\">main application</a> first and then reload this page.");
} else {
displayMessage("Service error", ex.getMessage());
}
} catch (ClientHandlerException ex) {
displayMessage("Could not generate the visualization because the ANNIS service reported an error.", ex.getMessage());
} catch (Throwable ex) {
displayMessage("Could not generate the visualization.", ex.getMessage() == null ? ("An unknown error of type " + ex.getClass().getSimpleName()) + " occured." : ex.getMessage());
}
}
Aggregations