use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class FstLinkingEngineTest method setup.
@BeforeClass
public static void setup() throws Exception {
// get the working directory
// use property substitution to test this feature!
String prefix = System.getProperty("basedir") == null ? "." : "${basedir}";
String solrServerDir = prefix + TEST_INDEX_REL_PATH;
log.info("Test Solr Server Directory: {}", solrServerDir);
System.setProperty(ManagedSolrServer.MANAGED_SOLR_DIR_PROPERTY, solrServerDir);
SolrYardConfig config = new SolrYardConfig(TEST_YARD_ID, TEST_SOLR_CORE_NAME);
config.setAllowInitialisation(false);
//the dbpedia default data
config.setIndexConfigurationName(TEST_SOLR_CORE_CONFIGURATION);
//init from datafile provider
config.setAllowInitialisation(true);
config.setName("DBpedia.org default data");
config.setDescription("Data used for FstLinkingEngie tests");
// create the Yard used for the tests
IndexReference solrIndexRef = IndexReference.parse(config.getSolrServerLocation());
SolrServer server = StandaloneEmbeddedSolrServerProvider.getInstance().getSolrServer(solrIndexRef, config.getIndexConfigurationName());
Assert.assertNotNull("Unable to initialise SolrServer for testing", server);
core = ((EmbeddedSolrServer) server).getCoreContainer().getCore(solrIndexRef.getIndex());
Assert.assertNotNull("Unable to get SolrCore '" + config.getIndexConfigurationName() + "' from SolrServer " + server, core);
yard = new SolrYard(server, config, null);
//setup the index configuration
LanguageConfiguration langConf = new LanguageConfiguration("not.used", new String[] { "en;field=dbpedia-ont:surfaceForm;generate=true" });
fstConfig = new IndexConfiguration(langConf, core, FieldEncodingEnum.SolrYard, "");
fstConfig.setExecutorService(Executors.newFixedThreadPool(1));
fstConfig.setTypeField("rdf:type");
fstConfig.setRankingField("entityhub:entityRank");
//fstConfig.setEntityCacheManager(new FastLRUCacheManager(2048));
fstConfig.setOrigin(new PlainLiteralImpl(TEST_ORIGIN));
//activate the FST config
//activate this configuration
fstConfig.activate();
//validate that the index contains the expected entities
validateTestIndex();
//now create the FST models
List<Future<?>> creationTasks = new ArrayList<Future<?>>();
for (CorpusInfo corpus : fstConfig.getCorpora()) {
Assert.assertTrue("Failure in UnitTest - all FST models need to be generate=true", corpus.allowCreation);
if (!corpus.isFstFile()) {
//create a task on the FST corpus creation service
creationTasks.add(fstConfig.getExecutorService().submit(new CorpusCreationTask(fstConfig, corpus)));
}
}
//typical hardware
for (Future<?> future : creationTasks) {
try {
future.get(FST_CREATION_WAIT_TIME, TimeUnit.SECONDS);
} catch (TimeoutException e) {
// we assert on future.isDone instead
}
Assert.assertTrue("FST Model creation not finished after " + FST_CREATION_WAIT_TIME + "seconds", future.isDone());
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class Nif20MetadataEngine method writeSpan.
/**
* Writes basic information of the parsed span by using NIF 1.0 including the
* {@link SsoOntology} Sentence/Phrase/Word type based on
* the {@link Span#getType()}<p>
* As {@link AnalysedText} is based on the plain text version of the ContentItem
* this uses the {@link StringOntology#OffsetBasedString} notation.<p>
* <i>NOTE:</i> This DOES NOT write string relations, lemma, pos ... information
* that might be stored as {@link Annotation} with the parsed {@link Span}.
* @param graph the graph to add the triples
* @param base the base URI
* @param text the {@link AnalysedText}
* @param language the {@link Language} or <code>null</code> if not known
* @param span the {@link Span} to write.
* @return the {@link IRI} representing the parsed {@link Span} in the
* graph
*/
public IRI writeSpan(Graph graph, IRI base, AnalysedText text, Language language, Span span) {
IRI segment = Nif20Helper.getNifRFC5147URI(base, span.getStart(), span.getType() == SpanTypeEnum.Text ? -1 : span.getEnd());
if (!contextOnlyUriScheme || span.getType() == SpanTypeEnum.Text) {
graph.add(new TripleImpl(segment, RDF_TYPE, Nif20.RFC5147String.getUri()));
}
if (writeSelectors) {
if (span.getEnd() - span.getStart() < 100) {
graph.add(new TripleImpl(segment, Nif20.anchorOf.getUri(), new PlainLiteralImpl(span.getSpan(), language)));
} else {
graph.add(new TripleImpl(segment, Nif20.head.getUri(), new PlainLiteralImpl(span.getSpan().substring(0, 10), language)));
}
graph.add(new TripleImpl(segment, Nif20.beginIndex.getUri(), lf.createTypedLiteral(span.getStart())));
graph.add(new TripleImpl(segment, Nif20.endIndex.getUri(), lf.createTypedLiteral(span.getEnd())));
String content = text.getSpan();
if (span.getType() != SpanTypeEnum.Text) {
//prefix and suffix
int prefixStart = Math.max(0, span.getStart() - DEFAULT_PREFIX_SUFFIX_LENGTH);
graph.add(new TripleImpl(segment, Nif20.before.getUri(), new PlainLiteralImpl(content.substring(prefixStart, span.getStart()), language)));
int suffixEnd = Math.min(span.getEnd() + DEFAULT_PREFIX_SUFFIX_LENGTH, text.getEnd());
graph.add(new TripleImpl(segment, Nif20.after.getUri(), new PlainLiteralImpl(content.substring(span.getEnd(), suffixEnd), language)));
}
}
if (writeStringType) {
graph.add(new TripleImpl(segment, RDF_TYPE, Nif20.String.getUri()));
}
switch(span.getType()) {
case Token:
graph.add(new TripleImpl(segment, RDF_TYPE, Nif20.Word.getUri()));
break;
case Chunk:
graph.add(new TripleImpl(segment, RDF_TYPE, Nif20.Phrase.getUri()));
break;
case Sentence:
graph.add(new TripleImpl(segment, RDF_TYPE, Nif20.Sentence.getUri()));
break;
case Text:
graph.add(new TripleImpl(segment, RDF_TYPE, Nif20.Context.getUri()));
break;
default:
if (!writeStringType) {
graph.add(new TripleImpl(segment, RDF_TYPE, Nif20.String.getUri()));
}
}
return segment;
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class MultipartRequestTest method addTagAsTextAnnotation.
/**
* Utility that creates an {@link TechnicalClasses#ENHANCER_TEXTANNOTATION TextAnnotation}
* for the parsed contentItem, free text tag an user.
* @param graph the grpah to add the information
* @param contentItem the {@link ContentItem#getUri() uri} of the {@link ContentItem}
* @param tag the free text tag for the document
* @param tagType the type of the tag. Typically Stanbol supports: <ul>
* <li>{@link OntologicalClasses#DBPEDIA_PERSON}
* <li>{@link OntologicalClasses#DBPEDIA_ORGANISATION}
* <li>{@link OntologicalClasses#DBPEDIA_PLACE}
* </ul>
* But specific {@link EnhancementEngine}s might also process other types
* or even TextAnnotations without an type
* @param user the user that created the tag
* @return the uri of the created annotation
*/
private static final IRI addTagAsTextAnnotation(Graph graph, IRI contentItem, String tag, IRI tagType, RDFTerm user) {
IRI ta = new IRI("urn:user-annotation:" + EnhancementEngineHelper.randomUUID());
graph.add(new TripleImpl(ta, RDF.type, TechnicalClasses.ENHANCER_TEXTANNOTATION));
graph.add(new TripleImpl(ta, Properties.ENHANCER_EXTRACTED_FROM, contentItem));
if (tagType != null) {
graph.add(new TripleImpl(ta, Properties.DC_TYPE, tagType));
}
graph.add(new TripleImpl(ta, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(tag)));
graph.add(new TripleImpl(ta, RDF.type, TechnicalClasses.ENHANCER_ENHANCEMENT));
if (user != null) {
graph.add(new TripleImpl(ta, Properties.DC_CREATOR, user));
}
return ta;
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class IndexedGraphTest method createGraph.
private static void createGraph(Collection<Triple> tc, int triples, Long seed) {
Random rnd = new Random();
if (seed != null) {
rnd.setSeed(seed);
}
LiteralFactory lf = LiteralFactory.getInstance();
//randoms are in the range [0..3]
//literal
double l = 1.0;
//int
double i = l / 3;
//double
double d = l * 2 / 3;
//bNode
double b = 2.0;
//create new bNode
double nb = b - (l * 2 / 3);
double random;
BlankNodeOrIRI subject = null;
IRI predicate = null;
List<IRI> predicateList = new ArrayList<IRI>();
predicateList.add(RDF.first);
predicateList.add(RDF.rest);
predicateList.add(RDF.type);
predicateList.add(RDFS.label);
predicateList.add(RDFS.comment);
predicateList.add(RDFS.range);
predicateList.add(RDFS.domain);
predicateList.add(FOAF.name);
predicateList.add(FOAF.nick);
predicateList.add(FOAF.homepage);
predicateList.add(FOAF.age);
predicateList.add(FOAF.depiction);
String URI_PREFIX = "http://www.test.org/bigGraph/ref";
Language DE = new Language("de");
Language EN = new Language("en");
Iterator<IRI> predicates = predicateList.iterator();
List<BlankNode> bNodes = new ArrayList<BlankNode>();
bNodes.add(new BlankNode());
for (int count = 0; tc.size() < triples; count++) {
random = rnd.nextDouble() * 3;
if (random >= 2.5 || count == 0) {
if (random <= 2.75) {
subject = new IRI(URI_PREFIX + count);
} else {
int rndIndex = (int) ((random - 2.75) * bNodes.size() / (3.0 - 2.75));
subject = bNodes.get(rndIndex);
}
}
if (random > 2.0 || count == 0) {
if (!predicates.hasNext()) {
Collections.shuffle(predicateList, rnd);
predicates = predicateList.iterator();
}
predicate = predicates.next();
}
if (random <= l) {
//literal
if (random <= i) {
tc.add(new TripleImpl(subject, predicate, lf.createTypedLiteral(count)));
} else if (random <= d) {
tc.add(new TripleImpl(subject, predicate, lf.createTypedLiteral(random)));
} else {
Literal text;
if (random <= i) {
text = new PlainLiteralImpl("Literal for " + count);
} else if (random <= d) {
text = new PlainLiteralImpl("An English literal for " + count, EN);
} else {
text = new PlainLiteralImpl("Ein Deutsches Literal für " + count, DE);
}
tc.add(new TripleImpl(subject, predicate, text));
}
} else if (random <= b) {
//bnode
BlankNode bnode;
if (random <= nb) {
bnode = new BlankNode();
bNodes.add(bnode);
} else {
//>nb <b
int rndIndex = (int) ((random - nb) * bNodes.size() / (b - nb));
bnode = bNodes.get(rndIndex);
}
tc.add(new TripleImpl(subject, predicate, bnode));
} else {
//IRI
tc.add(new TripleImpl(subject, predicate, new IRI(URI_PREFIX + count * random)));
}
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class UIMAToTriples method computeEnhancements.
public void computeEnhancements(ContentItem ci) throws EngineException {
FeatureStructureListHolder holder;
LiteralFactory literalFactory = LiteralFactory.getInstance();
try {
IRI uimaIRI = new IRI(uimaUri);
logger.info(new StringBuilder("Trying to load holder for ref:").append(uimaUri).toString());
holder = ci.getPart(uimaIRI, FeatureStructureListHolder.class);
for (String source : sourceNames) {
logger.info(new StringBuilder("Processing UIMA source:").append(source).toString());
List<FeatureStructure> sourceList = holder.getFeatureStructureList(source);
if (sourceList != null) {
logger.info(new StringBuilder("UIMA source:").append(source).append(" contains ").append(sourceList.size()).append(" annotations.").toString());
} else {
logger.info(new StringBuilder("Source list is null:").append(source).toString());
continue;
}
for (FeatureStructure fs : sourceList) {
String typeName = fs.getTypeName();
logger.debug(new StringBuilder("Checking ").append(typeName).toString());
if (tnfs.checkFeatureStructureAllowed(typeName, fs.getFeatures())) {
logger.debug(new StringBuilder("Adding ").append(typeName).toString());
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
Graph metadata = ci.getMetadata();
String uriRefStr = uimaUri + ":" + typeName;
if (mappings.containsKey(typeName)) {
uriRefStr = mappings.get(typeName);
}
metadata.add(new TripleImpl(textAnnotation, DC_TYPE, new IRI(uriRefStr)));
if (fs.getFeature("begin") != null) {
metadata.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(fs.getFeature("begin").getValueAsInteger())));
}
if (fs.getFeature("end") != null) {
metadata.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(fs.getFeature("end").getValueAsInteger())));
}
if (fs.getCoveredText() != null && !fs.getCoveredText().isEmpty()) {
metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(fs.getCoveredText())));
}
for (Feature f : fs.getFeatures()) {
if (!f.getName().equals("begin") && !f.getName().equals("end") && tnfs.checkFeatureToConvert(typeName, f)) {
String predRefStr = uimaUri + ":" + f.getName();
if (mappings.containsKey(f.getName())) {
predRefStr = mappings.get(f.getName());
}
IRI predicate = new IRI(predRefStr);
metadata.add(new TripleImpl(textAnnotation, predicate, new PlainLiteralImpl(f.getValueAsString())));
}
}
}
}
}
} catch (NoSuchPartException e) {
logger.error(new StringBuilder("No UIMA results found with ref:").append(uimaUri).toString(), e);
}
}
Aggregations