use of com.joliciel.talismane.utils.JolicielException in project talismane by joliciel-informatique.
the class LinearSVMModelTrainer method trainModel.
@Override
public ClassificationModel trainModel(ClassificationEventStream corpusEventStream, Map<String, List<String>> descriptors) {
// Note: since we want a probabilistic classifier, our options here
// are limited to logistic regression:
// L2R_LR: L2-regularized logistic regression (primal)
// L1R_LR: L1-regularized logistic regression
// L2R_LR_DUAL: L2-regularized logistic regression (dual)
SolverType solver = SolverType.valueOf(this.solverType.name());
if (!solver.isLogisticRegressionSolver())
throw new JolicielException("To get a probability distribution of outcomes, only logistic regression solvers are supported.");
TObjectIntMap<String> featureIndexMap = new TObjectIntHashMap<String>(1000, 0.75f, -1);
TObjectIntMap<String> outcomeIndexMap = new TObjectIntHashMap<String>(100, 0.75f, -1);
TIntList outcomeList = new TIntArrayList();
TIntIntMap featureCountMap = new TIntIntHashMap();
CountingInfo countingInfo = new CountingInfo();
Feature[][] featureMatrix = this.getFeatureMatrix(corpusEventStream, featureIndexMap, outcomeIndexMap, outcomeList, featureCountMap, countingInfo);
// apply the cutoff
if (cutoff > 1) {
LOG.debug("Feature count (after cutoff): " + countingInfo.featureCountOverCutoff);
for (int i = 0; i < featureMatrix.length; i++) {
Feature[] featureArray = featureMatrix[i];
List<Feature> featureList = new ArrayList<Feature>(featureArray.length);
for (int j = 0; j < featureArray.length; j++) {
Feature feature = featureArray[j];
int featureCount = featureCountMap.get(feature.getIndex());
if (featureCount >= cutoff)
featureList.add(feature);
}
Feature[] newFeatureArray = new Feature[featureList.size()];
int j = 0;
for (Feature feature : featureList) newFeatureArray[j++] = feature;
// try to force a garbage collect without being too explicit
// about it
featureMatrix[i] = null;
featureArray = null;
featureMatrix[i] = newFeatureArray;
}
}
final String[] outcomeArray2 = new String[outcomeIndexMap.size()];
outcomeIndexMap.forEachEntry(new TObjectIntProcedure<String>() {
@Override
public boolean execute(String key, int value) {
outcomeArray2[value] = key;
return true;
}
});
List<String> outcomes = new ArrayList<String>(outcomeIndexMap.size());
for (String outcome : outcomeArray2) outcomes.add(outcome);
if (oneVsRest) {
// find outcomes representing multiple classes
TIntSet multiClassOutcomes = new TIntHashSet();
TIntObjectMap<TIntSet> outcomeComponentMap = new TIntObjectHashMap<TIntSet>();
List<String> atomicOutcomes = new ArrayList<String>();
TObjectIntMap<String> atomicOutcomeIndexes = new TObjectIntHashMap<String>();
TIntIntMap oldIndexNewIndexMap = new TIntIntHashMap();
// store all atomic outcomes in one data structures
for (int j = 0; j < outcomes.size(); j++) {
String outcome = outcomes.get(j);
if (outcome.indexOf('\t') < 0) {
int newIndex = atomicOutcomes.size();
atomicOutcomeIndexes.put(outcome, newIndex);
oldIndexNewIndexMap.put(j, newIndex);
atomicOutcomes.add(outcome);
}
}
// data structures
for (int j = 0; j < outcomes.size(); j++) {
String outcome = outcomes.get(j);
if (outcome.indexOf('\t') >= 0) {
multiClassOutcomes.add(j);
TIntSet myComponentOutcomes = new TIntHashSet();
outcomeComponentMap.put(j, myComponentOutcomes);
String[] parts = outcome.split("\t", -1);
for (String part : parts) {
int outcomeIndex = outcomeIndexMap.get(part);
int newIndex = 0;
if (outcomeIndex < 0) {
outcomeIndex = countingInfo.currentOutcomeIndex++;
outcomeIndexMap.put(part, outcomeIndex);
newIndex = atomicOutcomes.size();
atomicOutcomeIndexes.put(part, newIndex);
oldIndexNewIndexMap.put(outcomeIndex, newIndex);
atomicOutcomes.add(part);
} else {
newIndex = oldIndexNewIndexMap.get(outcomeIndex);
}
myComponentOutcomes.add(newIndex);
}
}
}
LinearSVMOneVsRestModel linearSVMModel = new LinearSVMOneVsRestModel(config, descriptors);
linearSVMModel.setFeatureIndexMap(featureIndexMap);
linearSVMModel.setOutcomes(atomicOutcomes);
linearSVMModel.addModelAttribute("solver", this.getSolverType().name());
linearSVMModel.addModelAttribute("cutoff", "" + this.getCutoff());
linearSVMModel.addModelAttribute("c", "" + this.getConstraintViolationCost());
linearSVMModel.addModelAttribute("eps", "" + this.getEpsilon());
linearSVMModel.addModelAttribute("oneVsRest", "" + this.isOneVsRest());
linearSVMModel.getModelAttributes().putAll(corpusEventStream.getAttributes());
// build one 1-vs-All model per outcome
for (int j = 0; j < atomicOutcomes.size(); j++) {
String outcome = atomicOutcomes.get(j);
LOG.info("Building model for outcome: " + outcome);
// create an outcome array with 1 for the current outcome
// and 0 for all others
double[] outcomeArray = new double[countingInfo.numEvents];
int i = 0;
TIntIterator outcomeIterator = outcomeList.iterator();
int myOutcomeCount = 0;
while (outcomeIterator.hasNext()) {
boolean isMyOutcome = false;
int originalOutcomeIndex = outcomeIterator.next();
if (multiClassOutcomes.contains(originalOutcomeIndex)) {
if (outcomeComponentMap.get(originalOutcomeIndex).contains(j))
isMyOutcome = true;
} else {
if (oldIndexNewIndexMap.get(originalOutcomeIndex) == j)
isMyOutcome = true;
}
int myOutcome = (isMyOutcome ? 1 : 0);
if (myOutcome == 1)
myOutcomeCount++;
outcomeArray[i++] = myOutcome;
}
LOG.debug("Found " + myOutcomeCount + " out of " + countingInfo.numEvents + " outcomes of type: " + outcome);
double[] myOutcomeArray = outcomeArray;
Feature[][] myFeatureMatrix = featureMatrix;
if (balanceEventCounts) {
// we start with the truncated proportion of false
// events to true events
// we want these approximately balanced
// we only balance up, never balance down
int otherCount = countingInfo.numEvents - myOutcomeCount;
int proportion = otherCount / myOutcomeCount;
if (proportion > 1) {
LOG.debug("Balancing events for " + outcome + " by " + proportion);
int newSize = otherCount + myOutcomeCount * proportion;
myOutcomeArray = new double[newSize];
myFeatureMatrix = new Feature[newSize][];
int l = 0;
for (int k = 0; k < outcomeArray.length; k++) {
double myOutcome = outcomeArray[k];
Feature[] myFeatures = featureMatrix[k];
if (myOutcome == 0) {
myOutcomeArray[l] = myOutcome;
myFeatureMatrix[l] = myFeatures;
l++;
} else {
for (int m = 0; m < proportion; m++) {
myOutcomeArray[l] = myOutcome;
myFeatureMatrix[l] = myFeatures;
l++;
}
}
// is it the right outcome or not?
}
// next outcome in original array
}
// requires balancing?
}
// balance event counts?
Problem problem = new Problem();
// problem.l = ... // number of training examples
// problem.n = ... // number of features
// problem.x = ... // feature nodes - note: must be ordered
// by index
// problem.y = ... // target values
// number of training
problem.l = countingInfo.numEvents;
// examples
// number of
problem.n = countingInfo.currentFeatureIndex;
// features
// feature nodes - note: must
problem.x = myFeatureMatrix;
// be ordered by index
// target values
problem.y = myOutcomeArray;
Parameter parameter = new Parameter(solver, this.constraintViolationCost, this.epsilon);
Model model = Linear.train(problem, parameter);
linearSVMModel.addModel(model);
}
return linearSVMModel;
} else {
double[] outcomeArray = new double[countingInfo.numEvents];
int i = 0;
TIntIterator outcomeIterator = outcomeList.iterator();
while (outcomeIterator.hasNext()) outcomeArray[i++] = outcomeIterator.next();
Problem problem = new Problem();
// problem.l = ... // number of training examples
// problem.n = ... // number of features
// problem.x = ... // feature nodes - note: must be ordered by
// index
// problem.y = ... // target values
// number of training
problem.l = countingInfo.numEvents;
// examples
// number of
problem.n = countingInfo.currentFeatureIndex;
// features
// feature nodes - note: must be
problem.x = featureMatrix;
// ordered by index
// target values
problem.y = outcomeArray;
Parameter parameter = new Parameter(solver, this.constraintViolationCost, this.epsilon);
Model model = Linear.train(problem, parameter);
LinearSVMModel linearSVMModel = new LinearSVMModel(model, config, descriptors);
linearSVMModel.setFeatureIndexMap(featureIndexMap);
linearSVMModel.setOutcomes(outcomes);
linearSVMModel.addModelAttribute("solver", this.getSolverType());
linearSVMModel.addModelAttribute("cutoff", this.getCutoff());
linearSVMModel.addModelAttribute("cost", this.getConstraintViolationCost());
linearSVMModel.addModelAttribute("epsilon", this.getEpsilon());
linearSVMModel.addModelAttribute("oneVsRest", this.isOneVsRest());
linearSVMModel.getModelAttributes().putAll(corpusEventStream.getAttributes());
return linearSVMModel;
}
}
use of com.joliciel.talismane.utils.JolicielException in project talismane by joliciel-informatique.
the class AbstractFeatureParser method getFeatures.
/**
* Get the features corresponding to a particular descriptor by performing
* reflection on the corresponding feature class to be instantiated.
*/
final List<Feature<T, ?>> getFeatures(FunctionDescriptor descriptor, @SuppressWarnings("rawtypes") Class<? extends Feature> featureClass, FunctionDescriptor topLevelDescriptor) {
if (featureClass == null)
throw new FeatureSyntaxException("No class provided for", descriptor, topLevelDescriptor);
List<Feature<T, ?>> features = new ArrayList<Feature<T, ?>>();
int i = 0;
List<List<Object>> argumentLists = new ArrayList<List<Object>>();
List<Object> initialArguments = new ArrayList<Object>();
argumentLists.add(initialArguments);
for (FunctionDescriptor argumentDescriptor : descriptor.getArguments()) {
List<List<Object>> newArgumentLists = new ArrayList<List<Object>>();
for (List<Object> arguments : argumentLists) {
if (!argumentDescriptor.isFunction()) {
Object literal = argumentDescriptor.getObject();
Object convertedObject = literal;
if (literal instanceof String) {
StringLiteralFeature<T> stringLiteralFeature = new StringLiteralFeature<T>((String) literal);
convertedObject = stringLiteralFeature;
} else if (literal instanceof Boolean) {
BooleanLiteralFeature<T> booleanLiteralFeature = new BooleanLiteralFeature<T>((Boolean) literal);
convertedObject = booleanLiteralFeature;
} else if (literal instanceof Double) {
DoubleLiteralFeature<T> doubleLiteralFeature = new DoubleLiteralFeature<T>((Double) literal);
convertedObject = doubleLiteralFeature;
} else if (literal instanceof Integer) {
IntegerLiteralFeature<T> integerLiteralFeature = new IntegerLiteralFeature<T>((Integer) literal);
convertedObject = integerLiteralFeature;
} else {
// do nothing - this was some sort of other object
// added by getModifiedDescriptors that should
// be handled as is.
}
arguments.add(convertedObject);
newArgumentLists.add(arguments);
} else {
List<Feature<T, ?>> featureArguments = this.parseInternal(argumentDescriptor, topLevelDescriptor);
// onto multiple function calls
for (Feature<T, ?> featureArgument : featureArguments) {
List<Object> newArguments = new ArrayList<Object>(arguments);
newArguments.add(featureArgument);
newArgumentLists.add(newArguments);
}
}
// function or object?
}
// next argument list (under construction from original
// arguments)
argumentLists = newArgumentLists;
}
for (List<Object> originalArgumentList : argumentLists) {
// add the argument types (i.e. classes)
// and convert arrays to multiple constructor calls
List<Object[]> argumentsList = new ArrayList<Object[]>();
argumentsList.add(new Object[originalArgumentList.size()]);
Class<?>[] argumentTypes = new Class<?>[originalArgumentList.size()];
List<Object[]> newArgumentsList = new ArrayList<Object[]>();
for (i = 0; i < originalArgumentList.size(); i++) {
Object arg = originalArgumentList.get(i);
if (arg.getClass().isArray()) {
// arrays represent multiple constructor calls
Object[] argArray = (Object[]) arg;
for (Object oneArg : argArray) {
for (Object[] arguments : argumentsList) {
Object[] newArguments = arguments.clone();
newArguments[i] = oneArg;
newArgumentsList.add(newArguments);
}
}
argumentTypes[i] = arg.getClass().getComponentType();
} else {
for (Object[] myArguments : argumentsList) {
newArgumentsList.add(myArguments);
myArguments[i] = arg;
}
argumentTypes[i] = arg.getClass();
}
argumentsList = newArgumentsList;
newArgumentsList = new ArrayList<Object[]>();
}
// next argument
@SuppressWarnings("rawtypes") Constructor<? extends Feature> constructor = this.getMatchingAccessibleConstructor(featureClass, argumentTypes);
if (constructor == null) {
@SuppressWarnings("rawtypes") Constructor<? extends Feature>[] constructors = this.featureConstructors.get(featureClass);
// check if there's a variable argument constructor
for (Constructor<?> oneConstructor : constructors) {
Class<?>[] parameterTypes = oneConstructor.getParameterTypes();
if (parameterTypes.length >= 1 && argumentsList.size() == 1 && argumentsList.get(0).length >= parameterTypes.length) {
Object[] arguments = argumentsList.get(0);
Class<?> parameterType = parameterTypes[parameterTypes.length - 1];
if (parameterType.isArray()) {
// assume it's a variable-argument
// constructor
// build the argument for this constructor
// find a common type for all of the
// arguments.
Object argument = arguments[parameterTypes.length - 1];
Class<?> clazz = null;
if (argument instanceof StringFeature)
clazz = StringFeature.class;
else if (argument instanceof BooleanFeature)
clazz = BooleanFeature.class;
else if (argument instanceof DoubleFeature)
clazz = DoubleFeature.class;
else if (argument instanceof IntegerFeature)
clazz = IntegerFeature.class;
else if (argument instanceof StringCollectionFeature)
clazz = StringFeature.class;
else {
// type
continue;
}
Object[] argumentArray = (Object[]) Array.newInstance(clazz, (arguments.length - parameterTypes.length) + 1);
int j = 0;
for (int k = parameterTypes.length - 1; k < arguments.length; k++) {
Object oneArgument = arguments[k];
if (oneArgument instanceof StringCollectionFeature) {
@SuppressWarnings("unchecked") StringCollectionFeature<T> stringCollectionFeature = (StringCollectionFeature<T>) oneArgument;
StringCollectionFeatureProxy<T> proxy = new StringCollectionFeatureProxy<T>(stringCollectionFeature);
oneArgument = proxy;
}
if (!clazz.isAssignableFrom(oneArgument.getClass())) {
throw new FeatureSyntaxException("Mismatched array types: " + clazz.getSimpleName() + ", " + oneArgument.getClass().getSimpleName(), descriptor, topLevelDescriptor);
}
argumentArray[j++] = oneArgument;
}
// next argument
Class<?>[] argumentTypesWithArray = new Class<?>[parameterTypes.length];
for (int k = 0; k < parameterTypes.length - 1; k++) {
Object oneArgument = arguments[k];
argumentTypesWithArray[k] = oneArgument.getClass();
}
argumentTypesWithArray[argumentTypesWithArray.length - 1] = argumentArray.getClass();
constructor = this.getMatchingAccessibleConstructor(featureClass, argumentTypesWithArray);
if (constructor != null) {
argumentsList = new ArrayList<Object[]>();
Object[] argumentsWithArray = new Object[parameterTypes.length];
for (int k = 0; k < parameterTypes.length - 1; k++) {
Object oneArgument = arguments[k];
argumentsWithArray[k] = oneArgument;
}
argumentsWithArray[parameterTypes.length - 1] = argumentArray;
argumentsList.add(argumentsWithArray);
break;
}
}
// constructor takes an array
}
// exactly one parameter for constructor
}
if (constructor == null) {
// StringCollectionFeature to StringFeature
for (Constructor<?> oneConstructor : constructors) {
Class<?>[] parameterTypes = oneConstructor.getParameterTypes();
boolean isMatchingConstructor = false;
List<Integer> intParametersToConvert = new ArrayList<Integer>();
List<Integer> stringCollectionParametersToConvert = new ArrayList<Integer>();
List<Integer> customParametersToConvert = new ArrayList<Integer>();
if (parameterTypes.length == argumentTypes.length) {
int j = 0;
isMatchingConstructor = true;
for (Class<?> parameterType : parameterTypes) {
if (parameterType.isAssignableFrom(argumentTypes[j]) && !StringCollectionFeature.class.isAssignableFrom(argumentTypes[j])) {
// nothing to do here
} else if (parameterType.equals(DoubleFeature.class) && IntegerFeature.class.isAssignableFrom(argumentTypes[j])) {
intParametersToConvert.add(j);
} else if ((parameterType.equals(StringFeature.class) || parameterType.equals(Feature.class)) && StringCollectionFeature.class.isAssignableFrom(argumentTypes[j])) {
stringCollectionParametersToConvert.add(j);
} else if (this.canConvert(parameterType, argumentTypes[j])) {
customParametersToConvert.add(j);
} else {
isMatchingConstructor = false;
break;
}
j++;
}
}
if (isMatchingConstructor) {
@SuppressWarnings({ "rawtypes", "unchecked" }) Constructor<? extends Feature> matchingConstructor = (Constructor<? extends Feature>) oneConstructor;
constructor = matchingConstructor;
for (Object[] myArguments : argumentsList) {
for (int indexToConvert : intParametersToConvert) {
@SuppressWarnings("unchecked") IntegerFeature<T> integerFeature = (IntegerFeature<T>) myArguments[indexToConvert];
IntegerToDoubleFeature<T> intToDoubleFeature = new IntegerToDoubleFeature<T>(integerFeature);
myArguments[indexToConvert] = intToDoubleFeature;
}
for (int indexToConvert : stringCollectionParametersToConvert) {
@SuppressWarnings("unchecked") StringCollectionFeature<T> stringCollectionFeature = (StringCollectionFeature<T>) myArguments[indexToConvert];
StringCollectionFeatureProxy<T> proxy = new StringCollectionFeatureProxy<T>(stringCollectionFeature);
myArguments[indexToConvert] = proxy;
}
for (int indexToConvert : customParametersToConvert) {
@SuppressWarnings("unchecked") Feature<T, ?> argumentToConvert = (Feature<T, ?>) myArguments[indexToConvert];
Feature<T, ?> customArgument = this.convertArgument(parameterTypes[indexToConvert], argumentToConvert);
myArguments[indexToConvert] = customArgument;
customArgument.addArgument(argumentToConvert);
}
}
break;
}
// found a matching constructor
}
// next possible constructor
}
// still haven't found a constructor, what next?
}
if (constructor == null)
throw new NoConstructorFoundException("No constructor found for " + descriptor.getFunctionName() + " (" + featureClass.getName() + ") matching the arguments provided", descriptor, topLevelDescriptor);
for (Object[] myArguments : argumentsList) {
@SuppressWarnings("rawtypes") Feature feature;
try {
feature = constructor.newInstance(myArguments);
} catch (IllegalArgumentException e) {
throw new RuntimeException(e);
} catch (InstantiationException e) {
throw new RuntimeException(e);
} catch (IllegalAccessException e) {
throw new RuntimeException(e);
} catch (InvocationTargetException e) {
throw new RuntimeException(e);
}
@SuppressWarnings("unchecked") Feature<T, ?> genericFeature = feature;
this.injectDependencies(feature);
if (genericFeature instanceof ExternalResourceFeature) {
if (this.getExternalResourceFinder() == null) {
throw new JolicielException("No external resource finder set.");
}
@SuppressWarnings("unchecked") ExternalResourceFeature<T> externalResourceFeature = (ExternalResourceFeature<T>) genericFeature;
externalResourceFeature.setExternalResourceFinder(this.getExternalResourceFinder());
} else if (genericFeature instanceof ExternalResourceDoubleFeature) {
if (this.getExternalResourceFinder() == null) {
throw new JolicielException("No external resource finder set.");
}
@SuppressWarnings("unchecked") ExternalResourceDoubleFeature<T> externalResourceFeature = (ExternalResourceDoubleFeature<T>) genericFeature;
externalResourceFeature.setExternalResourceFinder(this.getExternalResourceFinder());
} else if (genericFeature instanceof MultivaluedExternalResourceFeature) {
if (this.getExternalResourceFinder() == null) {
throw new JolicielException("No external resource finder set.");
}
@SuppressWarnings("unchecked") MultivaluedExternalResourceFeature<T> externalResourceFeature = (MultivaluedExternalResourceFeature<T>) genericFeature;
externalResourceFeature.setExternalResourceFinder(this.getExternalResourceFinder());
}
// add this feature's arguments
for (Object argument : myArguments) {
if (argument instanceof Feature[]) {
@SuppressWarnings("unchecked") Feature<T, ?>[] featureArray = (Feature<T, ?>[]) argument;
for (Feature<T, ?> oneFeature : featureArray) {
genericFeature.addArgument(oneFeature);
}
} else {
@SuppressWarnings("unchecked") Feature<T, ?> featureArgument = (Feature<T, ?>) argument;
genericFeature.addArgument(featureArgument);
}
}
Feature<T, ?> convertedFeature = this.convertFeature(genericFeature);
features.add(convertedFeature);
}
// next internal argument list
}
// next argument list
return features;
}
use of com.joliciel.talismane.utils.JolicielException in project talismane by joliciel-informatique.
the class StringCollectionFeatureWrapper method check.
@Override
public FeatureResult<List<WeightedOutcome<String>>> check(T context, RuntimeEnvironment env) throws TalismaneException {
List<WeightedOutcome<String>> finalList = new ArrayList<WeightedOutcome<String>>();
FeatureResult<List<WeightedOutcome<String>>> finalResult = null;
// get the collection results for each enclosed collection
List<FeatureResult<List<WeightedOutcome<String>>>> collectionResultList = new ArrayList<FeatureResult<List<WeightedOutcome<String>>>>();
for (StringCollectionFeature<T> collectionFeature : collectionFeatures) {
FeatureResult<List<WeightedOutcome<String>>> collectionResults = collectionFeature.check(context, env);
if (collectionResults != null)
collectionResultList.add(collectionResults);
}
if (collectionResultList.size() > 0) {
// we do a cross product of all of the results from all of the
// enclosed collections
List<List<CollectionFeatureResult>> crossProduct = new ArrayList<List<CollectionFeatureResult>>();
crossProduct.add(new ArrayList<CollectionFeatureResult>());
for (FeatureResult<List<WeightedOutcome<String>>> collectionResults : collectionResultList) {
String featureName = collectionResults.getFeature().getName();
List<List<CollectionFeatureResult>> newCrossProduct = new ArrayList<List<CollectionFeatureResult>>();
for (WeightedOutcome<String> collectionResult : collectionResults.getOutcome()) {
for (List<CollectionFeatureResult> oneList : crossProduct) {
List<CollectionFeatureResult> newList = new ArrayList<CollectionFeatureResult>(oneList);
CollectionFeatureResult result = new CollectionFeatureResult();
result.featureName = featureName;
result.outcome = collectionResult.getOutcome();
result.weight = collectionResult.getWeight();
newList.add(result);
newCrossProduct.add(newList);
}
}
crossProduct = newCrossProduct;
}
// the cross-product
for (List<CollectionFeatureResult> oneCollectionResultSet : crossProduct) {
String collectionResult = "";
double weight = 1.0;
for (CollectionFeatureResult result : oneCollectionResultSet) {
env.setValue(result.featureName, result.outcome);
collectionResult += result.outcome + "|";
weight *= result.weight;
}
FeatureResult<?> featureResult = wrappedFeature.check(context, env);
if (featureResult != null) {
if (wrappedFeature.getFeatureType().equals(StringFeature.class)) {
String outcome = (String) featureResult.getOutcome();
finalList.add(new WeightedOutcome<String>(outcome, weight));
} else if (wrappedFeature.getFeatureType().equals(BooleanFeature.class)) {
Boolean outcome = (Boolean) featureResult.getOutcome();
finalList.add(new WeightedOutcome<String>(collectionResult + outcome.toString(), weight));
} else if (wrappedFeature.getFeatureType().equals(DoubleFeature.class)) {
Double outcome = (Double) featureResult.getOutcome();
finalList.add(new WeightedOutcome<String>(collectionResult, weight * outcome.doubleValue()));
} else if (wrappedFeature.getFeatureType().equals(IntegerFeature.class)) {
Integer outcome = (Integer) featureResult.getOutcome();
finalList.add(new WeightedOutcome<String>(collectionResult, weight * outcome.doubleValue()));
} else {
throw new JolicielException("Cannot include collections in a top-level feature of type: " + wrappedFeature.getFeatureType().getSimpleName());
}
}
}
if (finalList.size() > 0)
finalResult = this.generateResult(finalList);
}
return finalResult;
}
use of com.joliciel.talismane.utils.JolicielException in project talismane by joliciel-informatique.
the class MachineLearningModelFactory method getMachineLearningModel.
public MachineLearningModel getMachineLearningModel(ZipInputStream zis) throws ClassNotFoundException {
try {
MachineLearningModel machineLearningModel = null;
ZipEntry ze = zis.getNextEntry();
if (!ze.getName().equals("algorithm.txt")) {
throw new JolicielException("Expected algorithm.txt as first entry in zip. Was: " + ze.getName());
}
// note: assuming the model type will always be the first entry
@SuppressWarnings("resource") Scanner typeScanner = new Scanner(zis, "UTF-8");
MachineLearningAlgorithm algorithm = MachineLearningAlgorithm.MaxEnt;
if (typeScanner.hasNextLine()) {
String algorithmString = typeScanner.nextLine();
try {
algorithm = MachineLearningAlgorithm.valueOf(algorithmString);
} catch (IllegalArgumentException iae) {
LogUtils.logError(LOG, iae);
throw new JolicielException("Unknown algorithm: " + algorithmString);
}
} else {
throw new JolicielException("Cannot find algorithm in zip file");
}
switch(algorithm) {
case MaxEnt:
machineLearningModel = new MaximumEntropyModel();
break;
case LinearSVM:
machineLearningModel = new LinearSVMModel();
break;
case LinearSVMOneVsRest:
machineLearningModel = new LinearSVMOneVsRestModel();
break;
case Perceptron:
machineLearningModel = new PerceptronClassificationModel();
break;
default:
throw new JolicielException("Machine learning algorithm not yet supported: " + algorithm);
}
while ((ze = zis.getNextEntry()) != null) {
LOG.debug(ze.getName());
machineLearningModel.loadZipEntry(zis, ze);
}
// next zip entry
machineLearningModel.onLoadComplete();
return machineLearningModel;
} catch (IOException ioe) {
LogUtils.logError(LOG, ioe);
throw new RuntimeException(ioe);
} finally {
try {
zis.close();
} catch (IOException ioe) {
LogUtils.logError(LOG, ioe);
}
}
}
use of com.joliciel.talismane.utils.JolicielException in project talismane by joliciel-informatique.
the class WordListFinder method addWordList.
/**
* Add an external word list located in a scanner from a particular filename.
*
* @throws TalismaneException
* if unknown file type
*/
public void addWordList(String fileName, Scanner scanner) throws TalismaneException {
LOG.debug("Reading " + fileName);
String typeLine = scanner.nextLine();
if (!typeLine.startsWith("Type: "))
throw new JolicielException("In file " + fileName + ", expected line starting with \"Type: \"");
String type = typeLine.substring("Type: ".length());
if ("WordList".equals(type)) {
WordList textFileWordList = new WordList(fileName, scanner);
this.addWordList(textFileWordList);
} else {
throw new TalismaneException("Unexpected type in file: " + fileName + ": " + type);
}
}
Aggregations