Search in sources :

Example 26 with PlainLiteralImpl

use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.

the class FstLinkingEngineTest method setup.

@BeforeClass
public static void setup() throws Exception {
    // get the working directory
    // use property substitution to test this feature!
    String prefix = System.getProperty("basedir") == null ? "." : "${basedir}";
    String solrServerDir = prefix + TEST_INDEX_REL_PATH;
    log.info("Test Solr Server Directory: {}", solrServerDir);
    System.setProperty(ManagedSolrServer.MANAGED_SOLR_DIR_PROPERTY, solrServerDir);
    SolrYardConfig config = new SolrYardConfig(TEST_YARD_ID, TEST_SOLR_CORE_NAME);
    config.setAllowInitialisation(false);
    //the dbpedia default data
    config.setIndexConfigurationName(TEST_SOLR_CORE_CONFIGURATION);
    //init from datafile provider
    config.setAllowInitialisation(true);
    config.setName("DBpedia.org default data");
    config.setDescription("Data used for FstLinkingEngie tests");
    // create the Yard used for the tests
    IndexReference solrIndexRef = IndexReference.parse(config.getSolrServerLocation());
    SolrServer server = StandaloneEmbeddedSolrServerProvider.getInstance().getSolrServer(solrIndexRef, config.getIndexConfigurationName());
    Assert.assertNotNull("Unable to initialise SolrServer for testing", server);
    core = ((EmbeddedSolrServer) server).getCoreContainer().getCore(solrIndexRef.getIndex());
    Assert.assertNotNull("Unable to get SolrCore '" + config.getIndexConfigurationName() + "' from SolrServer " + server, core);
    yard = new SolrYard(server, config, null);
    //setup the index configuration
    LanguageConfiguration langConf = new LanguageConfiguration("not.used", new String[] { "en;field=dbpedia-ont:surfaceForm;generate=true" });
    fstConfig = new IndexConfiguration(langConf, core, FieldEncodingEnum.SolrYard, "");
    fstConfig.setExecutorService(Executors.newFixedThreadPool(1));
    fstConfig.setTypeField("rdf:type");
    fstConfig.setRankingField("entityhub:entityRank");
    //fstConfig.setEntityCacheManager(new FastLRUCacheManager(2048));
    fstConfig.setOrigin(new PlainLiteralImpl(TEST_ORIGIN));
    //activate the FST config
    //activate this configuration
    fstConfig.activate();
    //validate that the index contains the expected entities
    validateTestIndex();
    //now create the FST models
    List<Future<?>> creationTasks = new ArrayList<Future<?>>();
    for (CorpusInfo corpus : fstConfig.getCorpora()) {
        Assert.assertTrue("Failure in UnitTest - all FST models need to be generate=true", corpus.allowCreation);
        if (!corpus.isFstFile()) {
            //create a task on the FST corpus creation service
            creationTasks.add(fstConfig.getExecutorService().submit(new CorpusCreationTask(fstConfig, corpus)));
        }
    }
    //typical hardware
    for (Future<?> future : creationTasks) {
        try {
            future.get(FST_CREATION_WAIT_TIME, TimeUnit.SECONDS);
        } catch (TimeoutException e) {
        // we assert on future.isDone instead
        }
        Assert.assertTrue("FST Model creation not finished after " + FST_CREATION_WAIT_TIME + "seconds", future.isDone());
    }
}
Also used : PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) ArrayList(java.util.ArrayList) EmbeddedSolrServer(org.apache.solr.client.solrj.embedded.EmbeddedSolrServer) SolrServer(org.apache.solr.client.solrj.SolrServer) ManagedSolrServer(org.apache.stanbol.commons.solr.managed.ManagedSolrServer) SolrYard(org.apache.stanbol.entityhub.yard.solr.impl.SolrYard) SolrYardConfig(org.apache.stanbol.entityhub.yard.solr.impl.SolrYardConfig) IndexConfiguration(org.apache.stanbol.enhancer.engines.lucenefstlinking.IndexConfiguration) CorpusInfo(org.apache.stanbol.enhancer.engines.lucenefstlinking.CorpusInfo) Future(java.util.concurrent.Future) CorpusCreationTask(org.apache.stanbol.enhancer.engines.lucenefstlinking.CorpusCreationTask) LanguageConfiguration(org.apache.stanbol.enhancer.nlp.utils.LanguageConfiguration) IndexReference(org.apache.stanbol.commons.solr.IndexReference) EmbeddedSolrServer(org.apache.solr.client.solrj.embedded.EmbeddedSolrServer) TimeoutException(java.util.concurrent.TimeoutException) BeforeClass(org.junit.BeforeClass)

Example 27 with PlainLiteralImpl

use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.

the class Nif20MetadataEngine method writeSpan.

/**
     * Writes basic information of the parsed span by using NIF 1.0 including the
     * {@link SsoOntology} Sentence/Phrase/Word type based on 
     * the {@link Span#getType()}<p>
     * As {@link AnalysedText} is based on the plain text version of the ContentItem
     * this uses the {@link StringOntology#OffsetBasedString} notation.<p>
     * <i>NOTE:</i> This DOES NOT write string relations, lemma, pos ... information
     * that might be stored as {@link Annotation} with the parsed {@link Span}.
     * @param graph the graph to add the triples
     * @param base the base URI
     * @param text the {@link AnalysedText}
     * @param language the {@link Language} or <code>null</code> if not known
     * @param span the {@link Span} to write.
     * @return the {@link IRI} representing the parsed {@link Span} in the
     * graph
     */
public IRI writeSpan(Graph graph, IRI base, AnalysedText text, Language language, Span span) {
    IRI segment = Nif20Helper.getNifRFC5147URI(base, span.getStart(), span.getType() == SpanTypeEnum.Text ? -1 : span.getEnd());
    if (!contextOnlyUriScheme || span.getType() == SpanTypeEnum.Text) {
        graph.add(new TripleImpl(segment, RDF_TYPE, Nif20.RFC5147String.getUri()));
    }
    if (writeSelectors) {
        if (span.getEnd() - span.getStart() < 100) {
            graph.add(new TripleImpl(segment, Nif20.anchorOf.getUri(), new PlainLiteralImpl(span.getSpan(), language)));
        } else {
            graph.add(new TripleImpl(segment, Nif20.head.getUri(), new PlainLiteralImpl(span.getSpan().substring(0, 10), language)));
        }
        graph.add(new TripleImpl(segment, Nif20.beginIndex.getUri(), lf.createTypedLiteral(span.getStart())));
        graph.add(new TripleImpl(segment, Nif20.endIndex.getUri(), lf.createTypedLiteral(span.getEnd())));
        String content = text.getSpan();
        if (span.getType() != SpanTypeEnum.Text) {
            //prefix and suffix
            int prefixStart = Math.max(0, span.getStart() - DEFAULT_PREFIX_SUFFIX_LENGTH);
            graph.add(new TripleImpl(segment, Nif20.before.getUri(), new PlainLiteralImpl(content.substring(prefixStart, span.getStart()), language)));
            int suffixEnd = Math.min(span.getEnd() + DEFAULT_PREFIX_SUFFIX_LENGTH, text.getEnd());
            graph.add(new TripleImpl(segment, Nif20.after.getUri(), new PlainLiteralImpl(content.substring(span.getEnd(), suffixEnd), language)));
        }
    }
    if (writeStringType) {
        graph.add(new TripleImpl(segment, RDF_TYPE, Nif20.String.getUri()));
    }
    switch(span.getType()) {
        case Token:
            graph.add(new TripleImpl(segment, RDF_TYPE, Nif20.Word.getUri()));
            break;
        case Chunk:
            graph.add(new TripleImpl(segment, RDF_TYPE, Nif20.Phrase.getUri()));
            break;
        case Sentence:
            graph.add(new TripleImpl(segment, RDF_TYPE, Nif20.Sentence.getUri()));
            break;
        case Text:
            graph.add(new TripleImpl(segment, RDF_TYPE, Nif20.Context.getUri()));
            break;
        default:
            if (!writeStringType) {
                graph.add(new TripleImpl(segment, RDF_TYPE, Nif20.String.getUri()));
            }
    }
    return segment;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 28 with PlainLiteralImpl

use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.

the class MultipartRequestTest method addTagAsTextAnnotation.

/**
     * Utility that creates an {@link TechnicalClasses#ENHANCER_TEXTANNOTATION TextAnnotation}
     * for the parsed contentItem, free text tag an user. 
     * @param graph the grpah to add the information
     * @param contentItem the {@link ContentItem#getUri() uri} of the {@link ContentItem}
     * @param tag the free text tag for the document
     * @param tagType the type of the tag. Typically Stanbol supports: <ul>
     * <li>{@link OntologicalClasses#DBPEDIA_PERSON}
     * <li>{@link OntologicalClasses#DBPEDIA_ORGANISATION}
     * <li>{@link OntologicalClasses#DBPEDIA_PLACE}
     * </ul>
     * But specific {@link EnhancementEngine}s might also process other types
     * or even TextAnnotations without an type
     * @param user the user that created the tag
     * @return the uri of the created annotation
     */
private static final IRI addTagAsTextAnnotation(Graph graph, IRI contentItem, String tag, IRI tagType, RDFTerm user) {
    IRI ta = new IRI("urn:user-annotation:" + EnhancementEngineHelper.randomUUID());
    graph.add(new TripleImpl(ta, RDF.type, TechnicalClasses.ENHANCER_TEXTANNOTATION));
    graph.add(new TripleImpl(ta, Properties.ENHANCER_EXTRACTED_FROM, contentItem));
    if (tagType != null) {
        graph.add(new TripleImpl(ta, Properties.DC_TYPE, tagType));
    }
    graph.add(new TripleImpl(ta, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(tag)));
    graph.add(new TripleImpl(ta, RDF.type, TechnicalClasses.ENHANCER_ENHANCEMENT));
    if (user != null) {
        graph.add(new TripleImpl(ta, Properties.DC_CREATOR, user));
    }
    return ta;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 29 with PlainLiteralImpl

use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.

the class IndexedGraphTest method createGraph.

private static void createGraph(Collection<Triple> tc, int triples, Long seed) {
    Random rnd = new Random();
    if (seed != null) {
        rnd.setSeed(seed);
    }
    LiteralFactory lf = LiteralFactory.getInstance();
    //randoms are in the range [0..3]
    //literal
    double l = 1.0;
    //int
    double i = l / 3;
    //double
    double d = l * 2 / 3;
    //bNode
    double b = 2.0;
    //create new bNode
    double nb = b - (l * 2 / 3);
    double random;
    BlankNodeOrIRI subject = null;
    IRI predicate = null;
    List<IRI> predicateList = new ArrayList<IRI>();
    predicateList.add(RDF.first);
    predicateList.add(RDF.rest);
    predicateList.add(RDF.type);
    predicateList.add(RDFS.label);
    predicateList.add(RDFS.comment);
    predicateList.add(RDFS.range);
    predicateList.add(RDFS.domain);
    predicateList.add(FOAF.name);
    predicateList.add(FOAF.nick);
    predicateList.add(FOAF.homepage);
    predicateList.add(FOAF.age);
    predicateList.add(FOAF.depiction);
    String URI_PREFIX = "http://www.test.org/bigGraph/ref";
    Language DE = new Language("de");
    Language EN = new Language("en");
    Iterator<IRI> predicates = predicateList.iterator();
    List<BlankNode> bNodes = new ArrayList<BlankNode>();
    bNodes.add(new BlankNode());
    for (int count = 0; tc.size() < triples; count++) {
        random = rnd.nextDouble() * 3;
        if (random >= 2.5 || count == 0) {
            if (random <= 2.75) {
                subject = new IRI(URI_PREFIX + count);
            } else {
                int rndIndex = (int) ((random - 2.75) * bNodes.size() / (3.0 - 2.75));
                subject = bNodes.get(rndIndex);
            }
        }
        if (random > 2.0 || count == 0) {
            if (!predicates.hasNext()) {
                Collections.shuffle(predicateList, rnd);
                predicates = predicateList.iterator();
            }
            predicate = predicates.next();
        }
        if (random <= l) {
            //literal
            if (random <= i) {
                tc.add(new TripleImpl(subject, predicate, lf.createTypedLiteral(count)));
            } else if (random <= d) {
                tc.add(new TripleImpl(subject, predicate, lf.createTypedLiteral(random)));
            } else {
                Literal text;
                if (random <= i) {
                    text = new PlainLiteralImpl("Literal for " + count);
                } else if (random <= d) {
                    text = new PlainLiteralImpl("An English literal for " + count, EN);
                } else {
                    text = new PlainLiteralImpl("Ein Deutsches Literal für " + count, DE);
                }
                tc.add(new TripleImpl(subject, predicate, text));
            }
        } else if (random <= b) {
            //bnode
            BlankNode bnode;
            if (random <= nb) {
                bnode = new BlankNode();
                bNodes.add(bnode);
            } else {
                //>nb <b
                int rndIndex = (int) ((random - nb) * bNodes.size() / (b - nb));
                bnode = bNodes.get(rndIndex);
            }
            tc.add(new TripleImpl(subject, predicate, bnode));
        } else {
            //IRI
            tc.add(new TripleImpl(subject, predicate, new IRI(URI_PREFIX + count * random)));
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) ArrayList(java.util.ArrayList) BlankNode(org.apache.clerezza.commons.rdf.BlankNode) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) Random(java.util.Random) Language(org.apache.clerezza.commons.rdf.Language) Literal(org.apache.clerezza.commons.rdf.Literal) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 30 with PlainLiteralImpl

use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.

the class UIMAToTriples method computeEnhancements.

public void computeEnhancements(ContentItem ci) throws EngineException {
    FeatureStructureListHolder holder;
    LiteralFactory literalFactory = LiteralFactory.getInstance();
    try {
        IRI uimaIRI = new IRI(uimaUri);
        logger.info(new StringBuilder("Trying to load holder for ref:").append(uimaUri).toString());
        holder = ci.getPart(uimaIRI, FeatureStructureListHolder.class);
        for (String source : sourceNames) {
            logger.info(new StringBuilder("Processing UIMA source:").append(source).toString());
            List<FeatureStructure> sourceList = holder.getFeatureStructureList(source);
            if (sourceList != null) {
                logger.info(new StringBuilder("UIMA source:").append(source).append(" contains ").append(sourceList.size()).append(" annotations.").toString());
            } else {
                logger.info(new StringBuilder("Source list is null:").append(source).toString());
                continue;
            }
            for (FeatureStructure fs : sourceList) {
                String typeName = fs.getTypeName();
                logger.debug(new StringBuilder("Checking ").append(typeName).toString());
                if (tnfs.checkFeatureStructureAllowed(typeName, fs.getFeatures())) {
                    logger.debug(new StringBuilder("Adding ").append(typeName).toString());
                    IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                    Graph metadata = ci.getMetadata();
                    String uriRefStr = uimaUri + ":" + typeName;
                    if (mappings.containsKey(typeName)) {
                        uriRefStr = mappings.get(typeName);
                    }
                    metadata.add(new TripleImpl(textAnnotation, DC_TYPE, new IRI(uriRefStr)));
                    if (fs.getFeature("begin") != null) {
                        metadata.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(fs.getFeature("begin").getValueAsInteger())));
                    }
                    if (fs.getFeature("end") != null) {
                        metadata.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(fs.getFeature("end").getValueAsInteger())));
                    }
                    if (fs.getCoveredText() != null && !fs.getCoveredText().isEmpty()) {
                        metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(fs.getCoveredText())));
                    }
                    for (Feature f : fs.getFeatures()) {
                        if (!f.getName().equals("begin") && !f.getName().equals("end") && tnfs.checkFeatureToConvert(typeName, f)) {
                            String predRefStr = uimaUri + ":" + f.getName();
                            if (mappings.containsKey(f.getName())) {
                                predRefStr = mappings.get(f.getName());
                            }
                            IRI predicate = new IRI(predRefStr);
                            metadata.add(new TripleImpl(textAnnotation, predicate, new PlainLiteralImpl(f.getValueAsString())));
                        }
                    }
                }
            }
        }
    } catch (NoSuchPartException e) {
        logger.error(new StringBuilder("No UIMA results found with ref:").append(uimaUri).toString(), e);
    }
}
Also used : FeatureStructure(org.apache.stanbol.commons.caslight.FeatureStructure) IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) FeatureStructureListHolder(org.apache.stanbol.commons.caslight.FeatureStructureListHolder) NoSuchPartException(org.apache.stanbol.enhancer.servicesapi.NoSuchPartException) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Feature(org.apache.stanbol.commons.caslight.Feature) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory)

Aggregations

PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)82 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)69 IRI (org.apache.clerezza.commons.rdf.IRI)58 Graph (org.apache.clerezza.commons.rdf.Graph)34 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)20 Language (org.apache.clerezza.commons.rdf.Language)19 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)18 Literal (org.apache.clerezza.commons.rdf.Literal)16 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)16 IOException (java.io.IOException)14 HashMap (java.util.HashMap)13 Triple (org.apache.clerezza.commons.rdf.Triple)12 StringSource (org.apache.stanbol.enhancer.servicesapi.impl.StringSource)12 ArrayList (java.util.ArrayList)11 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)11 LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)10 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)10 Test (org.junit.Test)10 HashSet (java.util.HashSet)8 SOAPException (javax.xml.soap.SOAPException)6