Search in sources :

Example 6 with AggregateBuilder

use of org.apache.uima.fit.factory.AggregateBuilder in project dkpro-tc by dkpro.

the class InitTask method getAnalysisEngineDescription.

// what should actually be done in this task
@Override
public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
    String output = isTesting ? OUTPUT_KEY_TEST : OUTPUT_KEY_TRAIN;
    AnalysisEngineDescription xmiWriter = createEngineDescription(BinaryCasWriter.class, BinaryCasWriter.PARAM_TARGET_LOCATION, aContext.getFolder(output, AccessMode.READWRITE).getPath(), BinaryCasWriter.PARAM_FORMAT, "6+");
    // special connector that just checks whether there are no instances and outputs a
    // meaningful error message then
    // should be added before preprocessing
    AnalysisEngineDescription emptyProblemChecker = createEngineDescription(PreprocessConnector.class);
    // PART_TWO views
    if (featureMode.equals(FM_PAIR)) {
        AggregateBuilder builder = new AggregateBuilder();
        builder.add(createEngineDescription(preprocessing), CAS.NAME_DEFAULT_SOFA, PART_ONE);
        builder.add(createEngineDescription(preprocessing), CAS.NAME_DEFAULT_SOFA, PART_TWO);
        preprocessing = builder.createAggregateDescription();
    } else if (operativeViews != null) {
        AggregateBuilder builder = new AggregateBuilder();
        for (String viewName : operativeViews) {
            builder.add(createEngineDescription(preprocessing), CAS.NAME_DEFAULT_SOFA, viewName);
        }
        preprocessing = builder.createAggregateDescription();
    }
    return createEngineDescription(createEngineDescription(DocumentModeAnnotator.class, DocumentModeAnnotator.PARAM_FEATURE_MODE, featureMode), // assign each CAS an unique id
    createEngineDescription(AssignIdConnector.class), // tc pre validity check
    getPreValidityCheckEngine(), emptyProblemChecker, // user preprocessing
    preprocessing, // tc post validity check
    getPostValidityCheckEngine(), // collects the outcomes
    createEngineDescription(OutcomeCollector.class, OutcomeCollector.PARAM_TARGET_FOLDER, aContext.getFolder(output, AccessMode.READWRITE)), // write CAS to HDD
    xmiWriter);
}
Also used : DocumentModeAnnotator(org.dkpro.tc.core.task.uima.DocumentModeAnnotator) AssignIdConnector(org.dkpro.tc.core.task.uima.AssignIdConnector) AggregateBuilder(org.apache.uima.fit.factory.AggregateBuilder) OutcomeCollector(org.dkpro.tc.core.task.uima.OutcomeCollector) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription)

Example 7 with AggregateBuilder

use of org.apache.uima.fit.factory.AggregateBuilder in project dkpro-tc by dkpro.

the class MetaInfoTask method getAnalysisEngineDescription.

@Override
public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
    featureExtractorNames = new HashSet<>();
    // check for error conditions
    if (featureExtractors == null) {
        throw new ResourceInitializationException(new TextClassificationException("No feature extractors have been added to the experiment."));
    }
    List<AnalysisEngineDescription> metaCollectors = new ArrayList<>();
    if (recordContext) {
        AnalysisEngineDescription aed = injectContextMetaCollector(aContext);
        if (aed == null) {
            throw new NullPointerException("Initializing a ContextMetaCollector returned an AnalysisEngineDescription which was [NULL]");
        }
        metaCollectors.add(aed);
    }
    try {
        // Configure the meta collectors for each feature extractor individually
        for (TcFeature feClosure : featureExtractors) {
            ExternalResourceDescription feDesc = feClosure.getActualValue();
            Class<?> feClass = getClass(feDesc);
            // Skip feature extractors that are not dependent on meta collectors
            if (!MetaDependent.class.isAssignableFrom(feClass)) {
                continue;
            }
            MetaDependent feInstance = (MetaDependent) feClass.newInstance();
            Map<String, Object> parameterSettings = ConfigurationParameterFactory.getParameterSettings(feDesc.getResourceSpecifier());
            validateUniqueFeatureExtractorNames(parameterSettings);
            // Tell the meta collectors where to store their data
            for (MetaCollectorConfiguration conf : feInstance.getMetaCollectorClasses(parameterSettings)) {
                configureStorageLocations(aContext, conf.descriptor, (String) feClosure.getId(), conf.collectorOverrides, AccessMode.READWRITE);
                metaCollectors.add(conf.descriptor);
            }
        }
    } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) {
        throw new ResourceInitializationException(e);
    }
    // make sure that the meta key import can be resolved (even when no meta features have been
    // extracted, as in the regression demo)
    aContext.getFolder(META_KEY, AccessMode.READONLY);
    AggregateBuilder builder = new AggregateBuilder();
    for (AnalysisEngineDescription metaCollector : metaCollectors) {
        if (operativeViews != null) {
            for (String viewName : operativeViews) {
                builder.add(metaCollector, CAS.NAME_DEFAULT_SOFA, viewName);
            }
        } else {
            builder.add(metaCollector);
        }
    }
    return builder.createAggregateDescription();
}
Also used : TcFeature(org.dkpro.tc.api.features.TcFeature) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) ArrayList(java.util.ArrayList) MetaDependent(org.dkpro.tc.api.features.meta.MetaDependent) AggregateBuilder(org.apache.uima.fit.factory.AggregateBuilder) ResourceInitializationException(org.apache.uima.resource.ResourceInitializationException) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) MetaCollectorConfiguration(org.dkpro.tc.api.features.meta.MetaCollectorConfiguration) ExternalResourceDescription(org.apache.uima.resource.ExternalResourceDescription)

Example 8 with AggregateBuilder

use of org.apache.uima.fit.factory.AggregateBuilder in project dkpro-tc by dkpro.

the class PreparationTask method getAnalysisEngineDescription.

@Override
public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
    File folder = aContext.getFolder(OUTPUT_KEY, AccessMode.READONLY);
    AggregateBuilder builder = new AggregateBuilder();
    if (integerVectorization) {
        builder.add(createEngineDescription(MappingAnnotator.class, MappingAnnotator.PARAM_TARGET_DIRECTORY, folder, MappingAnnotator.PARAM_START_INDEX_INSTANCES, mlDeepLearningAdapter.lowestIndex(), MappingAnnotator.PARAM_START_INDEX_OUTCOMES, 0));
        if (dictionaryLists != null && !dictionaryLists.isEmpty()) {
            sanityCheckDictionaries(dictionaryLists);
            for (int i = 0; i < dictionaryLists.size(); i += 2) {
                Class<? extends AnalysisComponent> cast = castName(dictionaryLists.get(i + 1));
                builder.add(createEngineDescription(cast, LookupResourceAnnotator.PARAM_DICTIONARY_PATH, dictionaryLists.get(i), LookupResourceAnnotator.PARAM_TARGET_DIRECTORY, folder));
            }
        }
    } else {
        builder.add(createEngineDescription(VocabularyOutcomeCollector.class, VocabularyOutcomeCollector.PARAM_TARGET_DIRECTORY, folder));
    }
    builder.add(getMaximumLengthDeterminer(folder));
    return builder.createAggregateDescription();
}
Also used : AggregateBuilder(org.apache.uima.fit.factory.AggregateBuilder) VocabularyOutcomeCollector(org.dkpro.tc.core.task.deep.anno.VocabularyOutcomeCollector) MappingAnnotator(org.dkpro.tc.core.task.deep.anno.MappingAnnotator) File(java.io.File)

Example 9 with AggregateBuilder

use of org.apache.uima.fit.factory.AggregateBuilder in project dkpro-tc by dkpro.

the class InitTaskDeep method getAnalysisEngineDescription.

// what should actually be done in this task
@Override
public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
    String output = isTesting ? OUTPUT_KEY_TEST : OUTPUT_KEY_TRAIN;
    File folder = aContext.getFolder(output, AccessMode.READWRITE);
    AnalysisEngineDescription xmiWriter = createEngineDescription(BinaryCasWriter.class, BinaryCasWriter.PARAM_TARGET_LOCATION, folder.getPath(), BinaryCasWriter.PARAM_FORMAT, "6+");
    // special connector that just checks whether there are no instances and outputs a
    // meaningful error message then
    // should be added before preprocessing
    AnalysisEngineDescription emptyProblemChecker = createEngineDescription(PreprocessConnector.class);
    if (operativeViews != null) {
        AggregateBuilder builder = new AggregateBuilder();
        for (String viewName : operativeViews) {
            builder.add(createEngineDescription(preprocessing), CAS.NAME_DEFAULT_SOFA, viewName);
        }
        preprocessing = builder.createAggregateDescription();
    }
    AggregateBuilder builder = new AggregateBuilder();
    if (dropVocabWithoutEmbedding) {
        builder.add(createEngineDescription(FilterVocabularyByEmbeddingAnnotator.class, FilterVocabularyByEmbeddingAnnotator.PARAM_EMBEDDING, embedding));
    }
    builder.add(createEngineDescription(AssignIdConnector.class));
    builder.add(emptyProblemChecker);
    builder.add(preprocessing);
    builder.add(xmiWriter);
    return builder.createAggregateDescription();
}
Also used : AssignIdConnector(org.dkpro.tc.core.task.uima.AssignIdConnector) AggregateBuilder(org.apache.uima.fit.factory.AggregateBuilder) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) FilterVocabularyByEmbeddingAnnotator(org.dkpro.tc.core.task.deep.anno.FilterVocabularyByEmbeddingAnnotator) File(java.io.File)

Aggregations

AggregateBuilder (org.apache.uima.fit.factory.AggregateBuilder)9 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)8 File (java.io.File)6 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)4 ResourceInitializationException (org.apache.uima.resource.ResourceInitializationException)4 ArrayList (java.util.ArrayList)3 Gson (com.google.gson.Gson)2 Fields (org.apache.lucene.index.Fields)2 IndexReader (org.apache.lucene.index.IndexReader)2 MultiFields (org.apache.lucene.index.MultiFields)2 Terms (org.apache.lucene.index.Terms)2 TermsEnum (org.apache.lucene.index.TermsEnum)2 BytesRef (org.apache.lucene.util.BytesRef)2 JCasIterable (org.apache.uima.fit.pipeline.JCasIterable)2 JCas (org.apache.uima.jcas.JCas)2 Instance (org.dkpro.tc.api.features.Instance)2 AssignIdConnector (org.dkpro.tc.core.task.uima.AssignIdConnector)2 Test (org.junit.Test)2 ExternalResourceDescription (org.apache.uima.resource.ExternalResourceDescription)1 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)1