Search in sources :

Example 36 with EngineException

use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.

the class UIMALocal method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
    if (contentPart == null) {
        throw new IllegalStateException("No ContentPart with an supported Mimetype '" + SUPPORTED_MIMETYPES + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
    }
    String text;
    try {
        text = ContentItemHelper.getText(contentPart.getValue());
    } catch (IOException e) {
        throw new InvalidContentException(this, ci, e);
    }
    JCas jcas;
    try {
        logger.info("Processing text with UIMA AE...");
        jcas = processText(text);
    } catch (ResourceInitializationException ex) {
        logger.error("Error initializing UIMA AE", ex);
        throw new EngineException("Error initializing UIMA AE", ex);
    } catch (AnalysisEngineProcessException ex) {
        logger.error("Error running UIMA AE", ex);
        throw new EngineException("Error running UIMA AE", ex);
    }
    // just for being sure
    if (jcas == null) {
        return;
    }
    for (String typeName : uimaTypeNames) {
        List<FeatureStructure> featureSetList = concertToCasLight(jcas, typeName);
        IRI uimaIRI = new IRI(uimaUri);
        FeatureStructureListHolder holder;
        ci.getLock().writeLock().lock();
        try {
            holder = ci.getPart(uimaIRI, FeatureStructureListHolder.class);
        } catch (NoSuchPartException e) {
            holder = new FeatureStructureListHolder();
            logger.info("Adding FeatureSet List Holder content part with uri:" + uimaUri);
            ci.addPart(uimaIRI, holder);
            logger.info(uimaUri + " content part added.");
        } finally {
            ci.getLock().writeLock().unlock();
        }
        ci.getLock().writeLock().lock();
        try {
            holder.addFeatureStructureList(uimaSourceName, featureSetList);
        } finally {
            ci.getLock().writeLock().unlock();
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) JCas(org.apache.uima.jcas.JCas) NoSuchPartException(org.apache.stanbol.enhancer.servicesapi.NoSuchPartException) IOException(java.io.IOException) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) FeatureStructure(org.apache.stanbol.commons.caslight.FeatureStructure) InvalidContentException(org.apache.stanbol.enhancer.servicesapi.InvalidContentException) ResourceInitializationException(org.apache.uima.resource.ResourceInitializationException) FeatureStructureListHolder(org.apache.stanbol.commons.caslight.FeatureStructureListHolder)

Example 37 with EngineException

use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.

the class ZemantaEnhancementEngineTest method tesetBioText.

@Test
public void tesetBioText() throws EngineException, IOException {
    ContentItem ci = wrapAsContentItem(BIO_DOMAIN_TEXT);
    try {
        zemantaEngine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        return;
    }
    JenaSerializerProvider serializer = new JenaSerializerProvider();
    serializer.serialize(System.out, ci.getMetadata(), TURTLE);
    Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(zemantaEngine.getClass().getName()));
    // deactivate require fise:confidence values for fise:TextAnnotations, because
    // the one used to group the TopicAnnotations does not have a confidence value
    int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), BIO_DOMAIN_TEXT, expectedValues);
    log.info(textAnnoNum + " TextAnnotations found ...");
    // adding null as expected for confidence makes it a required property
    expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
    int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
    log.info(entityAnnoNum + " EntityAnnotations found ...");
    int topicAnnoNum = EnhancementStructureHelper.validateAllTopicAnnotations(ci.getMetadata(), expectedValues);
    log.info(topicAnnoNum + " TopicAnnotations found ...");
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) JenaSerializerProvider(org.apache.clerezza.rdf.jena.serializer.JenaSerializerProvider) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 38 with EngineException

use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.

the class EntityDereferenceEngine method computeEnhancements.

@Override
public final void computeEnhancements(ContentItem ci) throws EngineException {
    if (offline && !dereferencer.supportsOfflineMode()) {
        // entity dereferencer does no longer support offline mode
        return;
    }
    log.debug("> dereference Entities for ContentItem {}", ci.getUri());
    long start = System.nanoTime();
    Map<String, Object> enhancemntProps = EnhancementEngineHelper.getEnhancementProperties(this, ci);
    final DereferenceContext derefContext;
    final Graph metadata = ci.getMetadata();
    Set<IRI> referencedEntities = new HashSet<IRI>();
    ci.getLock().readLock().lock();
    try {
        // (1) Create the DereferenceContext
        if (filterContentLanguages) {
            // parse the languages detected for the content
            Set<String> contentLanguages = new HashSet<String>();
            for (BlankNodeOrIRI langAnno : EnhancementEngineHelper.getLanguageAnnotations(metadata)) {
                contentLanguages.add(EnhancementEngineHelper.getString(metadata, langAnno, DC_LANGUAGE));
            }
            enhancemntProps.put(DereferenceContext.INTERNAL_CONTENT_LANGUAGES, contentLanguages);
        }
        // create the dereference context and handle possible configuration exceptions
        try {
            derefContext = contextFactory.createContext(this, enhancemntProps);
            derefContext.setOfflineMode(offline);
        } catch (DereferenceConfigurationException e) {
            StringBuilder message = new StringBuilder("Unsupported Derefernece Configuarion ");
            if (e.getProperty() != null) {
                message.append("for property '").append(e.getProperty()).append("' ");
            }
            message.append(" parsed via the EnhancementProperties of this request!");
            throw new EnhancementPropertyException(this, ci, e.getProperty(), message.toString(), e);
        }
        // parse the referenced entities from the graph
        // (2) read all Entities we need to dereference from the parsed contentItem
        Set<IRI> checked = new HashSet<IRI>();
        // since STANBOL-1334 the list of properties that refer to entities can be configured
        for (IRI referenceProperty : derefContext.getEntityReferences()) {
            Iterator<Triple> entityReferences = metadata.filter(null, referenceProperty, null);
            while (entityReferences.hasNext()) {
                Triple triple = entityReferences.next();
                RDFTerm entityReference = triple.getObject();
                if (// only URIs
                (entityReference instanceof IRI) && // do not check a URI twice
                checked.add((IRI) entityReference) && // fallback mode
                chekcFallbackMode((IRI) entityReference, metadata) && checkURI((IRI) entityReference)) {
                    // URI prefixes and patterns
                    boolean added = referencedEntities.add((IRI) entityReference);
                    if (added && log.isTraceEnabled()) {
                        log.trace("  ... schedule Entity {} (referenced-by: {})", entityReference, referenceProperty);
                    }
                } else if (log.isTraceEnabled()) {
                    log.trace(" ... ignore Entity {} (referenced-by: {})", entityReference, referenceProperty);
                }
            }
        }
    } finally {
        ci.getLock().readLock().unlock();
    }
    long schedule = System.nanoTime();
    final Lock writeLock = ci.getLock().writeLock();
    log.trace(" - scheduled {} Entities for dereferencing", referencedEntities.size());
    // (2) dereference the Entities
    ExecutorService executor = dereferencer.getExecutor();
    Set<IRI> failedEntities = new HashSet<IRI>();
    int dereferencedCount = 0;
    List<DereferenceJob> dereferenceJobs = new ArrayList<DereferenceJob>(referencedEntities.size());
    if (executor != null && !executor.isShutdown()) {
        // schedule all entities to dereference
        for (final IRI entity : referencedEntities) {
            DereferenceJob dereferenceJob = new DereferenceJob(entity, metadata, writeLock, derefContext);
            dereferenceJob.setFuture(executor.submit(dereferenceJob));
            dereferenceJobs.add(dereferenceJob);
        }
        // wait for all entities to be dereferenced
        for (DereferenceJob dereferenceJob : dereferenceJobs) {
            try {
                if (dereferenceJob.await()) {
                    dereferencedCount++;
                }
            } catch (InterruptedException e) {
                // Restore the interrupted status
                Thread.currentThread().interrupt();
                throw new EngineException(this, ci, "Interupted while waiting for dereferencing Entities", e);
            } catch (ExecutionException e) {
                if (e.getCause() instanceof DereferenceException) {
                    failedEntities.add(dereferenceJob.entity);
                    log.debug(" ... error while dereferencing " + dereferenceJob.entity + "!", e);
                } else {
                    // unknown error
                    throw new EngineException(this, ci, "Unchecked Error while " + "dereferencing Entity " + dereferenceJob.entity + "!", e);
                }
            }
        }
    } else {
        // dereference using the current thread
        for (IRI entity : referencedEntities) {
            try {
                log.trace("  ... dereference {}", entity);
                if (dereferencer.dereference(entity, metadata, writeLock, derefContext)) {
                    dereferencedCount++;
                    log.trace("    + success");
                } else {
                    log.trace("    - not found");
                }
            } catch (DereferenceException e) {
                log.debug(" ... error while dereferencing " + entity + "!", e);
                failedEntities.add(entity);
            }
        }
    }
    long end = System.nanoTime();
    float sheduleDuration = ((schedule - start) / 10000) / 100f;
    float dereferenceDuration = ((end - schedule) / 10000) / 100f;
    float duration = ((end - start) / 10000) / 100f;
    if (!failedEntities.isEmpty()) {
        log.warn(" - unable to dereference {} of {} for ContentItem {}", new Object[] { failedEntities.size(), referencedEntities.size(), ci.getUri() });
    }
    if (log.isDebugEnabled() && dereferencedCount > 0) {
        log.debug(" - dereferenced {} of {} Entities in {}ms | schedule:{}ms | " + " dereference: {}ms ({}ms/entity)", new Object[] { dereferencedCount, referencedEntities.size(), duration, sheduleDuration, dereferenceDuration, dereferenceDuration / dereferencedCount });
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) ArrayList(java.util.ArrayList) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) ExecutionException(java.util.concurrent.ExecutionException) HashSet(java.util.HashSet) EnhancementPropertyException(org.apache.stanbol.enhancer.servicesapi.EnhancementPropertyException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Lock(java.util.concurrent.locks.Lock) Triple(org.apache.clerezza.commons.rdf.Triple) Graph(org.apache.clerezza.commons.rdf.Graph) ExecutorService(java.util.concurrent.ExecutorService)

Example 39 with EngineException

use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.

the class DBPSpotlightSpotEnhancementEngine method doPostRequest.

/**
 * Sends a POST request to the DBpediaSpotlight url.
 *
 * @param text
 *            a <code>String</code> with the text to be analyzed
 * @param contentItemUri
 *            the URI of the ContentItem (only used for logging)
 * @return a <code>String</code> with the server response
 * @throws EngineException
 *             if the request cannot be sent
 */
protected Collection<SurfaceForm> doPostRequest(String text, IRI contentItemUri) throws EngineException {
    // rwesten: reimplemented this so that the request
    // is directly written to the request instead
    // of storing the data in an in-memory StringBuilder
    HttpURLConnection connection = null;
    BufferedWriter wr = null;
    try {
        connection = (HttpURLConnection) spotlightUrl.openConnection();
        connection.setRequestMethod("POST");
        connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
        connection.setRequestProperty("Accept", "text/xml");
        // set ConnectionTimeout (if configured)
        if (connectionTimeout > 0) {
            connection.setConnectTimeout(connectionTimeout * 1000);
            connection.setReadTimeout(connectionTimeout * 1000);
        }
        connection.setUseCaches(false);
        connection.setDoInput(true);
        connection.setDoOutput(true);
        // Send request
        wr = new BufferedWriter(new OutputStreamWriter(connection.getOutputStream(), UTF8));
    } catch (IOException e) {
        IOUtils.closeQuietly(wr);
        throw new EngineException("Unable to open connection to " + spotlightUrl, e);
    }
    try {
        if (spotlightSpotter != null && !spotlightSpotter.isEmpty()) {
            wr.write("spotter=");
            wr.write(URLEncoder.encode(spotlightSpotter, UTF8.name()));
            wr.write('&');
        }
        wr.write("text=");
        // now append the URL encoded text
        // TODO: This will load the URLEncoded variant in-memory.
        // One could avoid that by encoding the data in smaller
        // pieces, but using URLEncoding for big data is anyway
        // very inefficient. So instead of fixing this issue here
        // DBpedia Spotlight should support "multipart/from-data"
        // instead.
        // As soon as this is supported this should be re-implemented
        // to support streaming.
        wr.write(URLEncoder.encode(text, UTF8.name()));
    } catch (UnsupportedEncodingException e) {
        throw new IllegalStateException("The platform does not support encoding " + UTF8.name(), e);
    } catch (IOException e) {
        throw new EngineException("Unable to write 'plain/text' content " + "for ContentItem " + contentItemUri + " to " + spotlightUrl, e);
    } finally {
        IOUtils.closeQuietly(wr);
    }
    // rwesten: reimplemented this to read the XML
    // Document directly form the response
    InputStream is = null;
    Document xmlDoc;
    try {
        // Get Response
        is = connection.getInputStream();
        xmlDoc = loadXMLFromInputStream(is);
    } catch (IOException e) {
        throw new EngineException("Unable to spot Entities with" + "Dbpedia Spotlight Spot RESTful Serice running at " + spotlightUrl, e);
    } catch (SAXException e) {
        throw new EngineException("Unable to parse Response from " + "Dbpedia Spotlight Spot RESTful Serice running at " + spotlightUrl, e);
    } finally {
        IOUtils.closeQuietly(is);
    }
    // connection.disconnect();
    return SurfaceForm.parseSurfaceForm(xmlDoc);
}
Also used : HttpURLConnection(java.net.HttpURLConnection) XMLParser.loadXMLFromInputStream(org.apache.stanbol.enhancer.engines.dbpspotlight.utils.XMLParser.loadXMLFromInputStream) InputStream(java.io.InputStream) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) OutputStreamWriter(java.io.OutputStreamWriter) IOException(java.io.IOException) Document(org.w3c.dom.Document) BufferedWriter(java.io.BufferedWriter) SAXException(org.xml.sax.SAXException)

Example 40 with EngineException

use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.

the class KuromojiNlpEngine method computeEnhancements.

/**
 * Compute enhancements for supplied ContentItem. The results of the process
 * are expected to be stored in the metadata of the content item.
 * <p/>
 * The client (usually an {@link org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager}) should take care of
 * persistent storage of the enhanced {@link org.apache.stanbol.enhancer.servicesapi.ContentItem}.
 * <p/>
 * This method creates a new POSContentPart using {@link org.apache.stanbol.enhancer.engines.pos.api.POSTaggerHelper#createContentPart} from a text/plain part and
 * stores it as a new part in the content item. The metadata is not changed.
 *
 * @throws org.apache.stanbol.enhancer.servicesapi.EngineException
 *          if the underlying process failed to work as
 *          expected
 */
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    final AnalysedText at = initAnalysedText(this, analysedTextFactory, ci);
    String language = getLanguage(this, ci, false);
    if (!("ja".equals(language) || (language != null && language.startsWith("ja-")))) {
        throw new IllegalStateException("The detected language is NOT 'ja'! " + "As this is also checked within the #canEnhance(..) method this " + "indicates an Bug in the used EnhancementJobManager implementation. " + "Please report this on the dev@apache.stanbol.org or create an " + "JIRA issue about this.");
    }
    // start with the Tokenizer
    TokenStream tokenStream = tokenizerFactory.create(new CharSequenceReader(at.getText()));
    // build the analyzing chain by adding all TokenFilters
    for (TokenFilterFactory filterFactory : filterFactories) {
        tokenStream = filterFactory.create(tokenStream);
    }
    // Try to extract sentences based on POS tags ...
    int sentStartOffset = -1;
    // NER data
    List<NerData> nerList = new ArrayList<NerData>();
    // the next index where the NerData.context need to be set
    int nerSentIndex = 0;
    NerData ner = null;
    OffsetAttribute offset = null;
    try {
        // required with Solr 4
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            offset = tokenStream.addAttribute(OffsetAttribute.class);
            Token token = at.addToken(offset.startOffset(), offset.endOffset());
            // Get the POS attribute and init the PosTag
            PartOfSpeechAttribute posAttr = tokenStream.addAttribute(PartOfSpeechAttribute.class);
            PosTag posTag = POS_TAG_SET.getTag(posAttr.getPartOfSpeech());
            if (posTag == null) {
                posTag = adhocTags.get(posAttr.getPartOfSpeech());
                if (posTag == null) {
                    posTag = new PosTag(posAttr.getPartOfSpeech());
                    adhocTags.put(posAttr.getPartOfSpeech(), posTag);
                    log.warn(" ... missing PosTag mapping for {}", posAttr.getPartOfSpeech());
                }
            }
            // Sentence detection by POS tag
            if (sentStartOffset < 0) {
                // the last token was a sentence ending
                sentStartOffset = offset.startOffset();
            }
            if (posTag.hasPos(Pos.Point)) {
                Sentence sent = at.addSentence(sentStartOffset, offset.startOffset());
                // add the sentence as context to the NerData instances
                while (nerSentIndex < nerList.size()) {
                    nerList.get(nerSentIndex).context = sent.getSpan();
                    nerSentIndex++;
                }
                sentStartOffset = -1;
            }
            // POS
            token.addAnnotation(POS_ANNOTATION, Value.value(posTag));
            // NER
            NerTag nerTag = NER_TAG_SET.getTag(posAttr.getPartOfSpeech());
            if (ner != null && (nerTag == null || !ner.tag.getType().equals(nerTag.getType()))) {
                // write NER annotation
                Chunk chunk = at.addChunk(ner.start, ner.end);
                chunk.addAnnotation(NlpAnnotations.NER_ANNOTATION, Value.value(ner.tag));
                // NOTE that the fise:TextAnnotation are written later based on the nerList
                // clean up
                ner = null;
            }
            if (nerTag != null) {
                if (ner == null) {
                    ner = new NerData(nerTag, offset.startOffset());
                    nerList.add(ner);
                }
                ner.end = offset.endOffset();
            }
            BaseFormAttribute baseFormAttr = tokenStream.addAttribute(BaseFormAttribute.class);
            MorphoFeatures morpho = null;
            if (baseFormAttr != null && baseFormAttr.getBaseForm() != null) {
                morpho = new MorphoFeatures(baseFormAttr.getBaseForm());
                // and add the posTag
                morpho.addPos(posTag);
            }
            InflectionAttribute inflectionAttr = tokenStream.addAttribute(InflectionAttribute.class);
            inflectionAttr.getInflectionForm();
            inflectionAttr.getInflectionType();
            if (morpho != null) {
                // if present add the morpho
                token.addAnnotation(MORPHO_ANNOTATION, Value.value(morpho));
            }
        }
        // we still need to write the last sentence
        Sentence lastSent = null;
        if (offset != null && sentStartOffset >= 0 && offset.endOffset() > sentStartOffset) {
            lastSent = at.addSentence(sentStartOffset, offset.endOffset());
        }
        // and set the context off remaining named entities
        while (nerSentIndex < nerList.size()) {
            if (lastSent != null) {
                nerList.get(nerSentIndex).context = lastSent.getSpan();
            } else {
                // no sentence detected
                nerList.get(nerSentIndex).context = at.getSpan();
            }
            nerSentIndex++;
        }
    } catch (IOException e) {
        throw new EngineException(this, ci, "Exception while reading from " + "AnalyzedText contentpart", e);
    } finally {
        try {
            tokenStream.close();
        } catch (IOException e) {
        /* ignore */
        }
    }
    // finally write the NER annotations to the metadata of the ContentItem
    final Graph metadata = ci.getMetadata();
    ci.getLock().writeLock().lock();
    try {
        Language lang = new Language("ja");
        for (NerData nerData : nerList) {
            IRI ta = EnhancementEngineHelper.createTextEnhancement(ci, this);
            metadata.add(new TripleImpl(ta, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(at.getSpan().substring(nerData.start, nerData.end), lang)));
            metadata.add(new TripleImpl(ta, DC_TYPE, nerData.tag.getType()));
            metadata.add(new TripleImpl(ta, ENHANCER_START, lf.createTypedLiteral(nerData.start)));
            metadata.add(new TripleImpl(ta, ENHANCER_END, lf.createTypedLiteral(nerData.end)));
            metadata.add(new TripleImpl(ta, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(nerData.context, lang)));
        }
    } finally {
        ci.getLock().writeLock().unlock();
    }
}
Also used : NerTag(org.apache.stanbol.enhancer.nlp.ner.NerTag) IRI(org.apache.clerezza.commons.rdf.IRI) TokenStream(org.apache.lucene.analysis.TokenStream) ArrayList(java.util.ArrayList) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) Token(org.apache.stanbol.enhancer.nlp.model.Token) NlpEngineHelper.initAnalysedText(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.initAnalysedText) AnalysedText(org.apache.stanbol.enhancer.nlp.model.AnalysedText) CharSequenceReader(org.apache.commons.io.input.CharSequenceReader) PosTag(org.apache.stanbol.enhancer.nlp.pos.PosTag) Language(org.apache.clerezza.commons.rdf.Language) NlpEngineHelper.getLanguage(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage) BaseFormAttribute(org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) MorphoFeatures(org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures) Sentence(org.apache.stanbol.enhancer.nlp.model.Sentence) InflectionAttribute(org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) PartOfSpeechAttribute(org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute) IOException(java.io.IOException) Chunk(org.apache.stanbol.enhancer.nlp.model.Chunk) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory) Graph(org.apache.clerezza.commons.rdf.Graph) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute)

Aggregations

EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)55 IRI (org.apache.clerezza.commons.rdf.IRI)37 IOException (java.io.IOException)33 Graph (org.apache.clerezza.commons.rdf.Graph)24 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)23 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)20 AnalysedText (org.apache.stanbol.enhancer.nlp.model.AnalysedText)15 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)15 HashMap (java.util.HashMap)13 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)13 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)12 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)10 InvalidContentException (org.apache.stanbol.enhancer.servicesapi.InvalidContentException)10 Test (org.junit.Test)10 Triple (org.apache.clerezza.commons.rdf.Triple)9 InputStream (java.io.InputStream)8 SOAPException (javax.xml.soap.SOAPException)8 Token (org.apache.stanbol.enhancer.nlp.model.Token)8 Language (org.apache.clerezza.commons.rdf.Language)7 LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)7