use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class AutoTokQuery method analyzingQuery.
@Override
public void analyzingQuery(SaltProject saltProject) {
List<SToken> tokens = new ArrayList<>();
for (SCorpusGraph g : saltProject.getCorpusGraphs()) {
if (g != null) {
for (SDocument doc : g.getDocuments()) {
SDocumentGraph docGraph = doc.getDocumentGraph();
List<SNode> sNodes = docGraph.getNodes();
if (sNodes != null) {
for (SNode n : sNodes) {
if (n instanceof SToken) {
tokens.add((SToken) n);
}
}
}
}
}
}
// select one random token from the result
if (!tokens.isEmpty()) {
int tries = 10;
int r = new Random().nextInt(tokens.size() - 1);
String text = CommonHelper.getSpannedText(tokens.get(r));
while ("".equals(text) && tries > 0) {
r = new Random().nextInt(tokens.size() - 1);
text = CommonHelper.getSpannedText(tokens.get(r));
tries--;
}
if ("".equals(text)) {
finalAql = null;
} else {
finalAql = "\"" + text + "\"";
}
}
}
use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class CommonHelper method getTokenAnnotationLevelSet.
public static Set<String> getTokenAnnotationLevelSet(SaltProject p) {
Set<String> result = new TreeSet<String>();
for (SCorpusGraph corpusGraphs : p.getCorpusGraphs()) {
for (SDocument doc : corpusGraphs.getDocuments()) {
SDocumentGraph g = doc.getDocumentGraph();
result.addAll(getTokenAnnotationLevelSet(g));
}
}
return result;
}
use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class SaltProjectProvider method writeTo.
@Override
public void writeTo(SaltProject project, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException {
SaltXML10Writer writer = new SaltXML10Writer();
try {
XMLStreamWriter xml = outFactory.createXMLStreamWriter(entityStream, "UTF-8");
xml.writeStartDocument("1.1");
xml.writeCharacters("\n");
long startTime = System.currentTimeMillis();
// output XMI root element
writer.writeXMIRootElement(xml);
for (SCorpusGraph corpusGraph : project.getCorpusGraphs()) {
for (SDocument doc : corpusGraph.getDocuments()) {
// make sure that any ANNIS feature on the document is copied to the document graph
SDocumentGraph docGraph = doc.getDocumentGraph();
for (SFeature feat : doc.getFeatures()) {
if (AnnisConstants.ANNIS_NS.equals(feat.getNamespace())) {
SFeature newFeat = SaltFactory.createSFeature();
feat.copy(newFeat);
docGraph.addFeature(newFeat);
}
}
writer.writeObjects(xml, docGraph);
}
}
xml.writeEndDocument();
long endTime = System.currentTimeMillis();
log.debug("Saving XMI (" + mediaType.toString() + ") needed {} ms", endTime - startTime);
} catch (XMLStreamException ex) {
log.error("exception when serializing SDocumentGraph", ex);
}
}
use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class SaltProjectProvider method readFrom.
@Override
public SaltProject readFrom(Class<SaltProject> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, String> httpHeaders, InputStream entityStream) throws IOException, WebApplicationException {
SaltProject result = SaltFactory.createSaltProject();
SAXParser parser;
XMLReader xmlReader;
SAXParserFactory factory = SAXParserFactory.newInstance();
MixedContentHandler handler = new MixedContentHandler();
try {
parser = factory.newSAXParser();
xmlReader = parser.getXMLReader();
xmlReader.setContentHandler(handler);
InputSource source = new InputSource(entityStream);
source.setEncoding("UTF-8");
xmlReader.parse(source);
for (SDocumentGraph g : handler.getDocGraphs()) {
// create a separate corpus graph for each document
SCorpusGraph corpusGraph = SaltFactory.createSCorpusGraph();
SCorpus parentCorpus = null;
SDocument doc = null;
List<SNode> nodes = g.getNodes();
Iterator<String> it;
if (nodes != null && !nodes.isEmpty()) {
// the path of each node ID is always the document/corpus path
it = nodes.get(0).getPath().segmentsList().iterator();
} else {
// Old salt versions had a separate ID for the document graph
// which was the document name with the suffix "_graph".
// Thus this method of getting the corpus path is only the fallback.
it = g.getPath().segmentsList().iterator();
}
while (it.hasNext()) {
String name = it.next();
if (it.hasNext()) {
// this is a sub-corpus
parentCorpus = corpusGraph.createCorpus(parentCorpus, name);
} else {
// no more path elements left, must be a document
doc = corpusGraph.createDocument(parentCorpus, name);
break;
}
}
if (doc != null) {
result.addCorpusGraph(corpusGraph);
doc.setDocumentGraph(g);
}
}
} catch (ParserConfigurationException | SAXException ex) {
log.error("Error when parsing XMI", ex);
}
return result;
}
Aggregations