Search in sources :

Example 46 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.

the class NgramUnitTest method runFeatureExtractor.

private File runFeatureExtractor(File luceneFolder) throws Exception {
    File outputPath = folder.newFolder();
    Object[] parameters = new Object[] { WordNGram.PARAM_UNIQUE_EXTRACTOR_NAME, EXTRACTOR_NAME, WordNGram.PARAM_NGRAM_USE_TOP_K, "1", WordNGram.PARAM_SOURCE_LOCATION, luceneFolder.toString(), WordNGramMC.PARAM_TARGET_LOCATION, luceneFolder.toString(), WordNGram.PARAM_NGRAM_MIN_N, "1", WordNGram.PARAM_NGRAM_MAX_N, "1" };
    ExternalResourceDescription featureExtractor = ExternalResourceFactory.createExternalResourceDescription(WordNGram.class, parameters);
    List<ExternalResourceDescription> fes = new ArrayList<>();
    fes.add(featureExtractor);
    CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(TestReaderSingleLabelDocumentReader.class, TestReaderSingleLabelDocumentReader.PARAM_LANGUAGE, "en", TestReaderSingleLabelDocumentReader.PARAM_SOURCE_LOCATION, "src/test/resources/ngrams/text3.txt", TestReaderSingleLabelDocumentReader.PARAM_SUPPRESS_DOCUMENT_ANNOTATION, true);
    AnalysisEngineDescription segmenter = AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class);
    AnalysisEngineDescription unitAnno = AnalysisEngineFactory.createEngineDescription(EachTokenAsUnitAnnotator.class);
    AnalysisEngineDescription featExtractorConnector = TaskUtils.getFeatureExtractorConnector(outputPath.getAbsolutePath(), JsonDataWriter.class.getName(), Constants.LM_SINGLE_LABEL, Constants.FM_UNIT, false, false, false, false, Collections.emptyList(), fes, new String[] {});
    SimplePipeline.runPipeline(reader, segmenter, unitAnno, featExtractorConnector);
    return outputPath;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) JsonDataWriter(org.dkpro.tc.core.io.JsonDataWriter) ArrayList(java.util.ArrayList) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) File(java.io.File) ExternalResourceDescription(org.apache.uima.resource.ExternalResourceDescription)

Example 47 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.

the class NgramUnitTest method runMetaCollection.

private void runMetaCollection(File luceneFolder) throws Exception {
    Object[] parameters = new Object[] { WordNGram.PARAM_UNIQUE_EXTRACTOR_NAME, EXTRACTOR_NAME, WordNGram.PARAM_NGRAM_USE_TOP_K, 1, WordNGram.PARAM_SOURCE_LOCATION, luceneFolder.toString(), WordNGramMC.PARAM_TARGET_LOCATION, luceneFolder.toString(), WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 1 };
    List<Object> parameterList = new ArrayList<Object>(Arrays.asList(parameters));
    CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(TestReaderSingleLabelDocumentReader.class, TestReaderSingleLabelDocumentReader.PARAM_LANGUAGE, "en", TestReaderSingleLabelDocumentReader.PARAM_SOURCE_LOCATION, "src/test/resources/ngrams/text3.txt");
    AnalysisEngineDescription segmenter = AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class);
    AnalysisEngineDescription metaCollector = AnalysisEngineFactory.createEngineDescription(WordNGramMC.class, parameterList.toArray());
    // run meta collector
    SimplePipeline.runPipeline(reader, segmenter, metaCollector);
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) ArrayList(java.util.ArrayList) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription)

Example 48 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.

the class PosNGramTest method runFeatureExtractor.

protected void runFeatureExtractor(File luceneFolder, AnalysisEngineDescription featureExtractor) throws Exception {
    CollectionReaderDescription reader = getFeatureReader();
    AnalysisEngineDescription segmenter = AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class);
    AnalysisEngineDescription posTagger = AnalysisEngineFactory.createEngineDescription(OpenNlpPosTagger.class, OpenNlpPosTagger.PARAM_LANGUAGE, "en");
    SimplePipeline.runPipeline(reader, segmenter, posTagger, featureExtractor);
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription)

Example 49 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.

the class TokenLenTest method prepareFeatureExtractor.

@Override
protected AnalysisEngineDescription prepareFeatureExtractor(File outputPath, Class<? extends Resource_ImplBase> class1, Object[] parameters) throws ResourceInitializationException {
    List<ExternalResourceDescription> fes = makeResource(class1, parameters);
    AnalysisEngineDescription featExtractorConnector = TaskUtils.getFeatureExtractorConnector(outputPath.getAbsolutePath(), JsonDataWriter.class.getName(), Constants.LM_SINGLE_LABEL, Constants.FM_UNIT, false, false, false, false, Collections.emptyList(), fes, new String[] {});
    return featExtractorConnector;
}
Also used : JsonDataWriter(org.dkpro.tc.core.io.JsonDataWriter) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) ExternalResourceDescription(org.apache.uima.resource.ExternalResourceDescription)

Example 50 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.

the class WordNGramMetaCollectorTest method emptyDocumentTest.

@SuppressWarnings("unused")
@Test
public void emptyDocumentTest() throws Exception {
    File tmpDir = folder.newFolder();
    CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(TextReader.class, TextReader.PARAM_SOURCE_LOCATION, "src/test/resources/empty/", TextReader.PARAM_LANGUAGE, "en", TextReader.PARAM_PATTERNS, "empty*.txt");
    AnalysisEngineDescription segmenter = AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class);
    AnalysisEngineDescription doc = AnalysisEngineFactory.createEngineDescription(DocumentModeAnnotator.class, DocumentModeAnnotator.PARAM_FEATURE_MODE, Constants.FM_DOCUMENT);
    AnalysisEngineDescription metaCollector = AnalysisEngineFactory.createEngineDescription(WordNGramMC.class, WordNGramMC.PARAM_UNIQUE_EXTRACTOR_NAME, "123", WordNGramMC.PARAM_TARGET_LOCATION, tmpDir);
    for (JCas jcas : new JCasIterable(reader, segmenter, doc, metaCollector)) {
    // System.out.println(jcas.getDocumentText().length());
    }
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) JCasIterable(org.apache.uima.fit.pipeline.JCasIterable) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) JCas(org.apache.uima.jcas.JCas) File(java.io.File) Test(org.junit.Test)

Aggregations

AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)62 Test (org.junit.Test)32 File (java.io.File)27 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)25 ArrayList (java.util.ArrayList)22 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)18 JCas (org.apache.uima.jcas.JCas)16 Feature (org.dkpro.tc.api.features.Feature)13 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)11 ExternalResourceDescription (org.apache.uima.resource.ExternalResourceDescription)10 AggregateBuilder (org.apache.uima.fit.factory.AggregateBuilder)8 ResourceInitializationException (org.apache.uima.resource.ResourceInitializationException)8 JsonDataWriter (org.dkpro.tc.core.io.JsonDataWriter)8 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)7 Gson (com.google.gson.Gson)6 IOException (java.io.IOException)6 Instance (org.dkpro.tc.api.features.Instance)6 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4 CAS (org.apache.uima.cas.CAS)4