use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.
the class FeatureResourceLoader method configureOverrides.
private void configureOverrides(File tcModelLocation, ExternalResourceDescription exRes, Map<String, String> overrides) throws IOException {
// We assume for the moment that we only have primitive analysis engines
// for meta
// collection, not aggregates. If there were aggregates, we'd have to do
// this
// recursively
ResourceSpecifier aDesc = exRes.getResourceSpecifier();
if (aDesc instanceof AnalysisEngineDescription) {
// Analysis engines are ok
if (!((AnalysisEngineDescription) aDesc).isPrimitive()) {
throw new IllegalArgumentException("Only primitive meta collectors currently supported.");
}
} else if (aDesc instanceof CustomResourceSpecifier_impl) {
// Feature extractors are ok
} else {
throw new IllegalArgumentException("Descriptors of type " + aDesc.getClass() + " not supported.");
}
for (Entry<String, String> e : overrides.entrySet()) {
// We generate a storage location from the feature extractor
// discriminator value
// and the preferred value specified by the meta collector
String parameterName = e.getKey();
ConfigurationParameterFactory.setParameter(aDesc, parameterName, new File(tcModelLocation, e.getValue()).getAbsolutePath());
}
}
use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.
the class FoldUtil method createMinimalSplit.
/**
* Takes the available CAS and creates more cases from them to conform to the minimal requested
* amount of CAS objects to have sufficient for running a cross-validation. Computes a
* rule-of-thumb value to split each of the found cas into N sub-cases and the end the total
* created number is compared to the requested number of CAS and an exception thrown if too few
* CAS were created.
*
* @param inputFolder
* the input folder
* @param numFolds
* number of folds to create
* @param numAvailableJCas
* number available cas'
* @param isSequence
* is sequence model
* @return returns folder with sufficient folds
* @throws Exception
* if not enough data is available for creating the required number of folds
*/
public static File createMinimalSplit(String inputFolder, int numFolds, int numAvailableJCas, boolean isSequence) throws Exception {
File outputFolder = new File(inputFolder, "output");
int splitNum = (int) Math.ceil(numFolds / (double) numAvailableJCas);
CollectionReaderDescription createReader = CollectionReaderFactory.createReaderDescription(BinaryCasReader.class, BinaryCasReader.PARAM_SOURCE_LOCATION, inputFolder, BinaryCasReader.PARAM_PATTERNS, "*.bin", BinaryCasReader.PARAM_ADD_DOCUMENT_METADATA, false);
AnalysisEngineDescription multiplier = AnalysisEngineFactory.createEngineDescription(FoldClassificationUnitCasMultiplier.class, FoldClassificationUnitCasMultiplier.PARAM_REQUESTED_SPLITS, splitNum, FoldClassificationUnitCasMultiplier.PARAM_USE_SEQUENCES, isSequence);
AnalysisEngineDescription xmiWriter = AnalysisEngineFactory.createEngineDescription(BinaryCasWriter.class, BinaryCasWriter.PARAM_TARGET_LOCATION, outputFolder.getAbsolutePath(), BinaryCasWriter.PARAM_FORMAT, "6+");
AnalysisEngineDescription both = AnalysisEngineFactory.createEngineDescription(multiplier, xmiWriter);
SimplePipeline.runPipeline(createReader, both);
// final check - do we have at least as many folds as requested by "numFolds"?
isNumberOfCasCreatedLargerEqualNumFolds(outputFolder, numFolds);
return outputFolder;
}
Aggregations