Search in sources :

Example 1 with JolicielException

use of com.joliciel.talismane.utils.JolicielException in project talismane by joliciel-informatique.

the class LinearSVMModelTrainer method trainModel.

@Override
public ClassificationModel trainModel(ClassificationEventStream corpusEventStream, Map<String, List<String>> descriptors) {
    // Note: since we want a probabilistic classifier, our options here
    // are limited to logistic regression:
    // L2R_LR: L2-regularized logistic regression (primal)
    // L1R_LR: L1-regularized logistic regression
    // L2R_LR_DUAL: L2-regularized logistic regression (dual)
    SolverType solver = SolverType.valueOf(this.solverType.name());
    if (!solver.isLogisticRegressionSolver())
        throw new JolicielException("To get a probability distribution of outcomes, only logistic regression solvers are supported.");
    TObjectIntMap<String> featureIndexMap = new TObjectIntHashMap<String>(1000, 0.75f, -1);
    TObjectIntMap<String> outcomeIndexMap = new TObjectIntHashMap<String>(100, 0.75f, -1);
    TIntList outcomeList = new TIntArrayList();
    TIntIntMap featureCountMap = new TIntIntHashMap();
    CountingInfo countingInfo = new CountingInfo();
    Feature[][] featureMatrix = this.getFeatureMatrix(corpusEventStream, featureIndexMap, outcomeIndexMap, outcomeList, featureCountMap, countingInfo);
    // apply the cutoff
    if (cutoff > 1) {
        LOG.debug("Feature count (after cutoff): " + countingInfo.featureCountOverCutoff);
        for (int i = 0; i < featureMatrix.length; i++) {
            Feature[] featureArray = featureMatrix[i];
            List<Feature> featureList = new ArrayList<Feature>(featureArray.length);
            for (int j = 0; j < featureArray.length; j++) {
                Feature feature = featureArray[j];
                int featureCount = featureCountMap.get(feature.getIndex());
                if (featureCount >= cutoff)
                    featureList.add(feature);
            }
            Feature[] newFeatureArray = new Feature[featureList.size()];
            int j = 0;
            for (Feature feature : featureList) newFeatureArray[j++] = feature;
            // try to force a garbage collect without being too explicit
            // about it
            featureMatrix[i] = null;
            featureArray = null;
            featureMatrix[i] = newFeatureArray;
        }
    }
    final String[] outcomeArray2 = new String[outcomeIndexMap.size()];
    outcomeIndexMap.forEachEntry(new TObjectIntProcedure<String>() {

        @Override
        public boolean execute(String key, int value) {
            outcomeArray2[value] = key;
            return true;
        }
    });
    List<String> outcomes = new ArrayList<String>(outcomeIndexMap.size());
    for (String outcome : outcomeArray2) outcomes.add(outcome);
    if (oneVsRest) {
        // find outcomes representing multiple classes
        TIntSet multiClassOutcomes = new TIntHashSet();
        TIntObjectMap<TIntSet> outcomeComponentMap = new TIntObjectHashMap<TIntSet>();
        List<String> atomicOutcomes = new ArrayList<String>();
        TObjectIntMap<String> atomicOutcomeIndexes = new TObjectIntHashMap<String>();
        TIntIntMap oldIndexNewIndexMap = new TIntIntHashMap();
        // store all atomic outcomes in one data structures
        for (int j = 0; j < outcomes.size(); j++) {
            String outcome = outcomes.get(j);
            if (outcome.indexOf('\t') < 0) {
                int newIndex = atomicOutcomes.size();
                atomicOutcomeIndexes.put(outcome, newIndex);
                oldIndexNewIndexMap.put(j, newIndex);
                atomicOutcomes.add(outcome);
            }
        }
        // data structures
        for (int j = 0; j < outcomes.size(); j++) {
            String outcome = outcomes.get(j);
            if (outcome.indexOf('\t') >= 0) {
                multiClassOutcomes.add(j);
                TIntSet myComponentOutcomes = new TIntHashSet();
                outcomeComponentMap.put(j, myComponentOutcomes);
                String[] parts = outcome.split("\t", -1);
                for (String part : parts) {
                    int outcomeIndex = outcomeIndexMap.get(part);
                    int newIndex = 0;
                    if (outcomeIndex < 0) {
                        outcomeIndex = countingInfo.currentOutcomeIndex++;
                        outcomeIndexMap.put(part, outcomeIndex);
                        newIndex = atomicOutcomes.size();
                        atomicOutcomeIndexes.put(part, newIndex);
                        oldIndexNewIndexMap.put(outcomeIndex, newIndex);
                        atomicOutcomes.add(part);
                    } else {
                        newIndex = oldIndexNewIndexMap.get(outcomeIndex);
                    }
                    myComponentOutcomes.add(newIndex);
                }
            }
        }
        LinearSVMOneVsRestModel linearSVMModel = new LinearSVMOneVsRestModel(config, descriptors);
        linearSVMModel.setFeatureIndexMap(featureIndexMap);
        linearSVMModel.setOutcomes(atomicOutcomes);
        linearSVMModel.addModelAttribute("solver", this.getSolverType().name());
        linearSVMModel.addModelAttribute("cutoff", "" + this.getCutoff());
        linearSVMModel.addModelAttribute("c", "" + this.getConstraintViolationCost());
        linearSVMModel.addModelAttribute("eps", "" + this.getEpsilon());
        linearSVMModel.addModelAttribute("oneVsRest", "" + this.isOneVsRest());
        linearSVMModel.getModelAttributes().putAll(corpusEventStream.getAttributes());
        // build one 1-vs-All model per outcome
        for (int j = 0; j < atomicOutcomes.size(); j++) {
            String outcome = atomicOutcomes.get(j);
            LOG.info("Building model for outcome: " + outcome);
            // create an outcome array with 1 for the current outcome
            // and 0 for all others
            double[] outcomeArray = new double[countingInfo.numEvents];
            int i = 0;
            TIntIterator outcomeIterator = outcomeList.iterator();
            int myOutcomeCount = 0;
            while (outcomeIterator.hasNext()) {
                boolean isMyOutcome = false;
                int originalOutcomeIndex = outcomeIterator.next();
                if (multiClassOutcomes.contains(originalOutcomeIndex)) {
                    if (outcomeComponentMap.get(originalOutcomeIndex).contains(j))
                        isMyOutcome = true;
                } else {
                    if (oldIndexNewIndexMap.get(originalOutcomeIndex) == j)
                        isMyOutcome = true;
                }
                int myOutcome = (isMyOutcome ? 1 : 0);
                if (myOutcome == 1)
                    myOutcomeCount++;
                outcomeArray[i++] = myOutcome;
            }
            LOG.debug("Found " + myOutcomeCount + " out of " + countingInfo.numEvents + " outcomes of type: " + outcome);
            double[] myOutcomeArray = outcomeArray;
            Feature[][] myFeatureMatrix = featureMatrix;
            if (balanceEventCounts) {
                // we start with the truncated proportion of false
                // events to true events
                // we want these approximately balanced
                // we only balance up, never balance down
                int otherCount = countingInfo.numEvents - myOutcomeCount;
                int proportion = otherCount / myOutcomeCount;
                if (proportion > 1) {
                    LOG.debug("Balancing events for " + outcome + " by " + proportion);
                    int newSize = otherCount + myOutcomeCount * proportion;
                    myOutcomeArray = new double[newSize];
                    myFeatureMatrix = new Feature[newSize][];
                    int l = 0;
                    for (int k = 0; k < outcomeArray.length; k++) {
                        double myOutcome = outcomeArray[k];
                        Feature[] myFeatures = featureMatrix[k];
                        if (myOutcome == 0) {
                            myOutcomeArray[l] = myOutcome;
                            myFeatureMatrix[l] = myFeatures;
                            l++;
                        } else {
                            for (int m = 0; m < proportion; m++) {
                                myOutcomeArray[l] = myOutcome;
                                myFeatureMatrix[l] = myFeatures;
                                l++;
                            }
                        }
                    // is it the right outcome or not?
                    }
                // next outcome in original array
                }
            // requires balancing?
            }
            // balance event counts?
            Problem problem = new Problem();
            // problem.l = ... // number of training examples
            // problem.n = ... // number of features
            // problem.x = ... // feature nodes - note: must be ordered
            // by index
            // problem.y = ... // target values
            // number of training
            problem.l = countingInfo.numEvents;
            // examples
            // number of
            problem.n = countingInfo.currentFeatureIndex;
            // features
            // feature nodes - note: must
            problem.x = myFeatureMatrix;
            // be ordered by index
            // target values
            problem.y = myOutcomeArray;
            Parameter parameter = new Parameter(solver, this.constraintViolationCost, this.epsilon);
            Model model = Linear.train(problem, parameter);
            linearSVMModel.addModel(model);
        }
        return linearSVMModel;
    } else {
        double[] outcomeArray = new double[countingInfo.numEvents];
        int i = 0;
        TIntIterator outcomeIterator = outcomeList.iterator();
        while (outcomeIterator.hasNext()) outcomeArray[i++] = outcomeIterator.next();
        Problem problem = new Problem();
        // problem.l = ... // number of training examples
        // problem.n = ... // number of features
        // problem.x = ... // feature nodes - note: must be ordered by
        // index
        // problem.y = ... // target values
        // number of training
        problem.l = countingInfo.numEvents;
        // examples
        // number of
        problem.n = countingInfo.currentFeatureIndex;
        // features
        // feature nodes - note: must be
        problem.x = featureMatrix;
        // ordered by index
        // target values
        problem.y = outcomeArray;
        Parameter parameter = new Parameter(solver, this.constraintViolationCost, this.epsilon);
        Model model = Linear.train(problem, parameter);
        LinearSVMModel linearSVMModel = new LinearSVMModel(model, config, descriptors);
        linearSVMModel.setFeatureIndexMap(featureIndexMap);
        linearSVMModel.setOutcomes(outcomes);
        linearSVMModel.addModelAttribute("solver", this.getSolverType());
        linearSVMModel.addModelAttribute("cutoff", this.getCutoff());
        linearSVMModel.addModelAttribute("cost", this.getConstraintViolationCost());
        linearSVMModel.addModelAttribute("epsilon", this.getEpsilon());
        linearSVMModel.addModelAttribute("oneVsRest", this.isOneVsRest());
        linearSVMModel.getModelAttributes().putAll(corpusEventStream.getAttributes());
        return linearSVMModel;
    }
}
Also used : JolicielException(com.joliciel.talismane.utils.JolicielException) TIntSet(gnu.trove.set.TIntSet) TIntArrayList(gnu.trove.list.array.TIntArrayList) ArrayList(java.util.ArrayList) Feature(de.bwaldvogel.liblinear.Feature) TIntIntMap(gnu.trove.map.TIntIntMap) TIntHashSet(gnu.trove.set.hash.TIntHashSet) TObjectIntHashMap(gnu.trove.map.hash.TObjectIntHashMap) TIntIntHashMap(gnu.trove.map.hash.TIntIntHashMap) TIntIterator(gnu.trove.iterator.TIntIterator) SolverType(de.bwaldvogel.liblinear.SolverType) TIntArrayList(gnu.trove.list.array.TIntArrayList) TIntObjectHashMap(gnu.trove.map.hash.TIntObjectHashMap) ClassificationModel(com.joliciel.talismane.machineLearning.ClassificationModel) Model(de.bwaldvogel.liblinear.Model) MachineLearningModel(com.joliciel.talismane.machineLearning.MachineLearningModel) Parameter(de.bwaldvogel.liblinear.Parameter) Problem(de.bwaldvogel.liblinear.Problem) TIntList(gnu.trove.list.TIntList)

Example 2 with JolicielException

use of com.joliciel.talismane.utils.JolicielException in project talismane by joliciel-informatique.

the class AbstractFeatureParser method getFeatures.

/**
 * Get the features corresponding to a particular descriptor by performing
 * reflection on the corresponding feature class to be instantiated.
 */
final List<Feature<T, ?>> getFeatures(FunctionDescriptor descriptor, @SuppressWarnings("rawtypes") Class<? extends Feature> featureClass, FunctionDescriptor topLevelDescriptor) {
    if (featureClass == null)
        throw new FeatureSyntaxException("No class provided for", descriptor, topLevelDescriptor);
    List<Feature<T, ?>> features = new ArrayList<Feature<T, ?>>();
    int i = 0;
    List<List<Object>> argumentLists = new ArrayList<List<Object>>();
    List<Object> initialArguments = new ArrayList<Object>();
    argumentLists.add(initialArguments);
    for (FunctionDescriptor argumentDescriptor : descriptor.getArguments()) {
        List<List<Object>> newArgumentLists = new ArrayList<List<Object>>();
        for (List<Object> arguments : argumentLists) {
            if (!argumentDescriptor.isFunction()) {
                Object literal = argumentDescriptor.getObject();
                Object convertedObject = literal;
                if (literal instanceof String) {
                    StringLiteralFeature<T> stringLiteralFeature = new StringLiteralFeature<T>((String) literal);
                    convertedObject = stringLiteralFeature;
                } else if (literal instanceof Boolean) {
                    BooleanLiteralFeature<T> booleanLiteralFeature = new BooleanLiteralFeature<T>((Boolean) literal);
                    convertedObject = booleanLiteralFeature;
                } else if (literal instanceof Double) {
                    DoubleLiteralFeature<T> doubleLiteralFeature = new DoubleLiteralFeature<T>((Double) literal);
                    convertedObject = doubleLiteralFeature;
                } else if (literal instanceof Integer) {
                    IntegerLiteralFeature<T> integerLiteralFeature = new IntegerLiteralFeature<T>((Integer) literal);
                    convertedObject = integerLiteralFeature;
                } else {
                // do nothing - this was some sort of other object
                // added by getModifiedDescriptors that should
                // be handled as is.
                }
                arguments.add(convertedObject);
                newArgumentLists.add(arguments);
            } else {
                List<Feature<T, ?>> featureArguments = this.parseInternal(argumentDescriptor, topLevelDescriptor);
                // onto multiple function calls
                for (Feature<T, ?> featureArgument : featureArguments) {
                    List<Object> newArguments = new ArrayList<Object>(arguments);
                    newArguments.add(featureArgument);
                    newArgumentLists.add(newArguments);
                }
            }
        // function or object?
        }
        // next argument list (under construction from original
        // arguments)
        argumentLists = newArgumentLists;
    }
    for (List<Object> originalArgumentList : argumentLists) {
        // add the argument types (i.e. classes)
        // and convert arrays to multiple constructor calls
        List<Object[]> argumentsList = new ArrayList<Object[]>();
        argumentsList.add(new Object[originalArgumentList.size()]);
        Class<?>[] argumentTypes = new Class<?>[originalArgumentList.size()];
        List<Object[]> newArgumentsList = new ArrayList<Object[]>();
        for (i = 0; i < originalArgumentList.size(); i++) {
            Object arg = originalArgumentList.get(i);
            if (arg.getClass().isArray()) {
                // arrays represent multiple constructor calls
                Object[] argArray = (Object[]) arg;
                for (Object oneArg : argArray) {
                    for (Object[] arguments : argumentsList) {
                        Object[] newArguments = arguments.clone();
                        newArguments[i] = oneArg;
                        newArgumentsList.add(newArguments);
                    }
                }
                argumentTypes[i] = arg.getClass().getComponentType();
            } else {
                for (Object[] myArguments : argumentsList) {
                    newArgumentsList.add(myArguments);
                    myArguments[i] = arg;
                }
                argumentTypes[i] = arg.getClass();
            }
            argumentsList = newArgumentsList;
            newArgumentsList = new ArrayList<Object[]>();
        }
        // next argument
        @SuppressWarnings("rawtypes") Constructor<? extends Feature> constructor = this.getMatchingAccessibleConstructor(featureClass, argumentTypes);
        if (constructor == null) {
            @SuppressWarnings("rawtypes") Constructor<? extends Feature>[] constructors = this.featureConstructors.get(featureClass);
            // check if there's a variable argument constructor
            for (Constructor<?> oneConstructor : constructors) {
                Class<?>[] parameterTypes = oneConstructor.getParameterTypes();
                if (parameterTypes.length >= 1 && argumentsList.size() == 1 && argumentsList.get(0).length >= parameterTypes.length) {
                    Object[] arguments = argumentsList.get(0);
                    Class<?> parameterType = parameterTypes[parameterTypes.length - 1];
                    if (parameterType.isArray()) {
                        // assume it's a variable-argument
                        // constructor
                        // build the argument for this constructor
                        // find a common type for all of the
                        // arguments.
                        Object argument = arguments[parameterTypes.length - 1];
                        Class<?> clazz = null;
                        if (argument instanceof StringFeature)
                            clazz = StringFeature.class;
                        else if (argument instanceof BooleanFeature)
                            clazz = BooleanFeature.class;
                        else if (argument instanceof DoubleFeature)
                            clazz = DoubleFeature.class;
                        else if (argument instanceof IntegerFeature)
                            clazz = IntegerFeature.class;
                        else if (argument instanceof StringCollectionFeature)
                            clazz = StringFeature.class;
                        else {
                            // type
                            continue;
                        }
                        Object[] argumentArray = (Object[]) Array.newInstance(clazz, (arguments.length - parameterTypes.length) + 1);
                        int j = 0;
                        for (int k = parameterTypes.length - 1; k < arguments.length; k++) {
                            Object oneArgument = arguments[k];
                            if (oneArgument instanceof StringCollectionFeature) {
                                @SuppressWarnings("unchecked") StringCollectionFeature<T> stringCollectionFeature = (StringCollectionFeature<T>) oneArgument;
                                StringCollectionFeatureProxy<T> proxy = new StringCollectionFeatureProxy<T>(stringCollectionFeature);
                                oneArgument = proxy;
                            }
                            if (!clazz.isAssignableFrom(oneArgument.getClass())) {
                                throw new FeatureSyntaxException("Mismatched array types: " + clazz.getSimpleName() + ", " + oneArgument.getClass().getSimpleName(), descriptor, topLevelDescriptor);
                            }
                            argumentArray[j++] = oneArgument;
                        }
                        // next argument
                        Class<?>[] argumentTypesWithArray = new Class<?>[parameterTypes.length];
                        for (int k = 0; k < parameterTypes.length - 1; k++) {
                            Object oneArgument = arguments[k];
                            argumentTypesWithArray[k] = oneArgument.getClass();
                        }
                        argumentTypesWithArray[argumentTypesWithArray.length - 1] = argumentArray.getClass();
                        constructor = this.getMatchingAccessibleConstructor(featureClass, argumentTypesWithArray);
                        if (constructor != null) {
                            argumentsList = new ArrayList<Object[]>();
                            Object[] argumentsWithArray = new Object[parameterTypes.length];
                            for (int k = 0; k < parameterTypes.length - 1; k++) {
                                Object oneArgument = arguments[k];
                                argumentsWithArray[k] = oneArgument;
                            }
                            argumentsWithArray[parameterTypes.length - 1] = argumentArray;
                            argumentsList.add(argumentsWithArray);
                            break;
                        }
                    }
                // constructor takes an array
                }
            // exactly one parameter for constructor
            }
            if (constructor == null) {
                // StringCollectionFeature to StringFeature
                for (Constructor<?> oneConstructor : constructors) {
                    Class<?>[] parameterTypes = oneConstructor.getParameterTypes();
                    boolean isMatchingConstructor = false;
                    List<Integer> intParametersToConvert = new ArrayList<Integer>();
                    List<Integer> stringCollectionParametersToConvert = new ArrayList<Integer>();
                    List<Integer> customParametersToConvert = new ArrayList<Integer>();
                    if (parameterTypes.length == argumentTypes.length) {
                        int j = 0;
                        isMatchingConstructor = true;
                        for (Class<?> parameterType : parameterTypes) {
                            if (parameterType.isAssignableFrom(argumentTypes[j]) && !StringCollectionFeature.class.isAssignableFrom(argumentTypes[j])) {
                            // nothing to do here
                            } else if (parameterType.equals(DoubleFeature.class) && IntegerFeature.class.isAssignableFrom(argumentTypes[j])) {
                                intParametersToConvert.add(j);
                            } else if ((parameterType.equals(StringFeature.class) || parameterType.equals(Feature.class)) && StringCollectionFeature.class.isAssignableFrom(argumentTypes[j])) {
                                stringCollectionParametersToConvert.add(j);
                            } else if (this.canConvert(parameterType, argumentTypes[j])) {
                                customParametersToConvert.add(j);
                            } else {
                                isMatchingConstructor = false;
                                break;
                            }
                            j++;
                        }
                    }
                    if (isMatchingConstructor) {
                        @SuppressWarnings({ "rawtypes", "unchecked" }) Constructor<? extends Feature> matchingConstructor = (Constructor<? extends Feature>) oneConstructor;
                        constructor = matchingConstructor;
                        for (Object[] myArguments : argumentsList) {
                            for (int indexToConvert : intParametersToConvert) {
                                @SuppressWarnings("unchecked") IntegerFeature<T> integerFeature = (IntegerFeature<T>) myArguments[indexToConvert];
                                IntegerToDoubleFeature<T> intToDoubleFeature = new IntegerToDoubleFeature<T>(integerFeature);
                                myArguments[indexToConvert] = intToDoubleFeature;
                            }
                            for (int indexToConvert : stringCollectionParametersToConvert) {
                                @SuppressWarnings("unchecked") StringCollectionFeature<T> stringCollectionFeature = (StringCollectionFeature<T>) myArguments[indexToConvert];
                                StringCollectionFeatureProxy<T> proxy = new StringCollectionFeatureProxy<T>(stringCollectionFeature);
                                myArguments[indexToConvert] = proxy;
                            }
                            for (int indexToConvert : customParametersToConvert) {
                                @SuppressWarnings("unchecked") Feature<T, ?> argumentToConvert = (Feature<T, ?>) myArguments[indexToConvert];
                                Feature<T, ?> customArgument = this.convertArgument(parameterTypes[indexToConvert], argumentToConvert);
                                myArguments[indexToConvert] = customArgument;
                                customArgument.addArgument(argumentToConvert);
                            }
                        }
                        break;
                    }
                // found a matching constructor
                }
            // next possible constructor
            }
        // still haven't found a constructor, what next?
        }
        if (constructor == null)
            throw new NoConstructorFoundException("No constructor found for " + descriptor.getFunctionName() + " (" + featureClass.getName() + ") matching the arguments provided", descriptor, topLevelDescriptor);
        for (Object[] myArguments : argumentsList) {
            @SuppressWarnings("rawtypes") Feature feature;
            try {
                feature = constructor.newInstance(myArguments);
            } catch (IllegalArgumentException e) {
                throw new RuntimeException(e);
            } catch (InstantiationException e) {
                throw new RuntimeException(e);
            } catch (IllegalAccessException e) {
                throw new RuntimeException(e);
            } catch (InvocationTargetException e) {
                throw new RuntimeException(e);
            }
            @SuppressWarnings("unchecked") Feature<T, ?> genericFeature = feature;
            this.injectDependencies(feature);
            if (genericFeature instanceof ExternalResourceFeature) {
                if (this.getExternalResourceFinder() == null) {
                    throw new JolicielException("No external resource finder set.");
                }
                @SuppressWarnings("unchecked") ExternalResourceFeature<T> externalResourceFeature = (ExternalResourceFeature<T>) genericFeature;
                externalResourceFeature.setExternalResourceFinder(this.getExternalResourceFinder());
            } else if (genericFeature instanceof ExternalResourceDoubleFeature) {
                if (this.getExternalResourceFinder() == null) {
                    throw new JolicielException("No external resource finder set.");
                }
                @SuppressWarnings("unchecked") ExternalResourceDoubleFeature<T> externalResourceFeature = (ExternalResourceDoubleFeature<T>) genericFeature;
                externalResourceFeature.setExternalResourceFinder(this.getExternalResourceFinder());
            } else if (genericFeature instanceof MultivaluedExternalResourceFeature) {
                if (this.getExternalResourceFinder() == null) {
                    throw new JolicielException("No external resource finder set.");
                }
                @SuppressWarnings("unchecked") MultivaluedExternalResourceFeature<T> externalResourceFeature = (MultivaluedExternalResourceFeature<T>) genericFeature;
                externalResourceFeature.setExternalResourceFinder(this.getExternalResourceFinder());
            }
            // add this feature's arguments
            for (Object argument : myArguments) {
                if (argument instanceof Feature[]) {
                    @SuppressWarnings("unchecked") Feature<T, ?>[] featureArray = (Feature<T, ?>[]) argument;
                    for (Feature<T, ?> oneFeature : featureArray) {
                        genericFeature.addArgument(oneFeature);
                    }
                } else {
                    @SuppressWarnings("unchecked") Feature<T, ?> featureArgument = (Feature<T, ?>) argument;
                    genericFeature.addArgument(featureArgument);
                }
            }
            Feature<T, ?> convertedFeature = this.convertFeature(genericFeature);
            features.add(convertedFeature);
        }
    // next internal argument list
    }
    // next argument list
    return features;
}
Also used : JolicielException(com.joliciel.talismane.utils.JolicielException) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) InvocationTargetException(java.lang.reflect.InvocationTargetException) Constructor(java.lang.reflect.Constructor)

Example 3 with JolicielException

use of com.joliciel.talismane.utils.JolicielException in project talismane by joliciel-informatique.

the class StringCollectionFeatureWrapper method check.

@Override
public FeatureResult<List<WeightedOutcome<String>>> check(T context, RuntimeEnvironment env) throws TalismaneException {
    List<WeightedOutcome<String>> finalList = new ArrayList<WeightedOutcome<String>>();
    FeatureResult<List<WeightedOutcome<String>>> finalResult = null;
    // get the collection results for each enclosed collection
    List<FeatureResult<List<WeightedOutcome<String>>>> collectionResultList = new ArrayList<FeatureResult<List<WeightedOutcome<String>>>>();
    for (StringCollectionFeature<T> collectionFeature : collectionFeatures) {
        FeatureResult<List<WeightedOutcome<String>>> collectionResults = collectionFeature.check(context, env);
        if (collectionResults != null)
            collectionResultList.add(collectionResults);
    }
    if (collectionResultList.size() > 0) {
        // we do a cross product of all of the results from all of the
        // enclosed collections
        List<List<CollectionFeatureResult>> crossProduct = new ArrayList<List<CollectionFeatureResult>>();
        crossProduct.add(new ArrayList<CollectionFeatureResult>());
        for (FeatureResult<List<WeightedOutcome<String>>> collectionResults : collectionResultList) {
            String featureName = collectionResults.getFeature().getName();
            List<List<CollectionFeatureResult>> newCrossProduct = new ArrayList<List<CollectionFeatureResult>>();
            for (WeightedOutcome<String> collectionResult : collectionResults.getOutcome()) {
                for (List<CollectionFeatureResult> oneList : crossProduct) {
                    List<CollectionFeatureResult> newList = new ArrayList<CollectionFeatureResult>(oneList);
                    CollectionFeatureResult result = new CollectionFeatureResult();
                    result.featureName = featureName;
                    result.outcome = collectionResult.getOutcome();
                    result.weight = collectionResult.getWeight();
                    newList.add(result);
                    newCrossProduct.add(newList);
                }
            }
            crossProduct = newCrossProduct;
        }
        // the cross-product
        for (List<CollectionFeatureResult> oneCollectionResultSet : crossProduct) {
            String collectionResult = "";
            double weight = 1.0;
            for (CollectionFeatureResult result : oneCollectionResultSet) {
                env.setValue(result.featureName, result.outcome);
                collectionResult += result.outcome + "|";
                weight *= result.weight;
            }
            FeatureResult<?> featureResult = wrappedFeature.check(context, env);
            if (featureResult != null) {
                if (wrappedFeature.getFeatureType().equals(StringFeature.class)) {
                    String outcome = (String) featureResult.getOutcome();
                    finalList.add(new WeightedOutcome<String>(outcome, weight));
                } else if (wrappedFeature.getFeatureType().equals(BooleanFeature.class)) {
                    Boolean outcome = (Boolean) featureResult.getOutcome();
                    finalList.add(new WeightedOutcome<String>(collectionResult + outcome.toString(), weight));
                } else if (wrappedFeature.getFeatureType().equals(DoubleFeature.class)) {
                    Double outcome = (Double) featureResult.getOutcome();
                    finalList.add(new WeightedOutcome<String>(collectionResult, weight * outcome.doubleValue()));
                } else if (wrappedFeature.getFeatureType().equals(IntegerFeature.class)) {
                    Integer outcome = (Integer) featureResult.getOutcome();
                    finalList.add(new WeightedOutcome<String>(collectionResult, weight * outcome.doubleValue()));
                } else {
                    throw new JolicielException("Cannot include collections in a top-level feature of type: " + wrappedFeature.getFeatureType().getSimpleName());
                }
            }
        }
        if (finalList.size() > 0)
            finalResult = this.generateResult(finalList);
    }
    return finalResult;
}
Also used : JolicielException(com.joliciel.talismane.utils.JolicielException) ArrayList(java.util.ArrayList) WeightedOutcome(com.joliciel.talismane.utils.WeightedOutcome) List(java.util.List) ArrayList(java.util.ArrayList)

Example 4 with JolicielException

use of com.joliciel.talismane.utils.JolicielException in project talismane by joliciel-informatique.

the class MachineLearningModelFactory method getMachineLearningModel.

public MachineLearningModel getMachineLearningModel(ZipInputStream zis) throws ClassNotFoundException {
    try {
        MachineLearningModel machineLearningModel = null;
        ZipEntry ze = zis.getNextEntry();
        if (!ze.getName().equals("algorithm.txt")) {
            throw new JolicielException("Expected algorithm.txt as first entry in zip. Was: " + ze.getName());
        }
        // note: assuming the model type will always be the first entry
        @SuppressWarnings("resource") Scanner typeScanner = new Scanner(zis, "UTF-8");
        MachineLearningAlgorithm algorithm = MachineLearningAlgorithm.MaxEnt;
        if (typeScanner.hasNextLine()) {
            String algorithmString = typeScanner.nextLine();
            try {
                algorithm = MachineLearningAlgorithm.valueOf(algorithmString);
            } catch (IllegalArgumentException iae) {
                LogUtils.logError(LOG, iae);
                throw new JolicielException("Unknown algorithm: " + algorithmString);
            }
        } else {
            throw new JolicielException("Cannot find algorithm in zip file");
        }
        switch(algorithm) {
            case MaxEnt:
                machineLearningModel = new MaximumEntropyModel();
                break;
            case LinearSVM:
                machineLearningModel = new LinearSVMModel();
                break;
            case LinearSVMOneVsRest:
                machineLearningModel = new LinearSVMOneVsRestModel();
                break;
            case Perceptron:
                machineLearningModel = new PerceptronClassificationModel();
                break;
            default:
                throw new JolicielException("Machine learning algorithm not yet supported: " + algorithm);
        }
        while ((ze = zis.getNextEntry()) != null) {
            LOG.debug(ze.getName());
            machineLearningModel.loadZipEntry(zis, ze);
        }
        // next zip entry
        machineLearningModel.onLoadComplete();
        return machineLearningModel;
    } catch (IOException ioe) {
        LogUtils.logError(LOG, ioe);
        throw new RuntimeException(ioe);
    } finally {
        try {
            zis.close();
        } catch (IOException ioe) {
            LogUtils.logError(LOG, ioe);
        }
    }
}
Also used : Scanner(java.util.Scanner) LinearSVMOneVsRestModel(com.joliciel.talismane.machineLearning.linearsvm.LinearSVMOneVsRestModel) JolicielException(com.joliciel.talismane.utils.JolicielException) MaximumEntropyModel(com.joliciel.talismane.machineLearning.maxent.MaximumEntropyModel) PerceptronClassificationModel(com.joliciel.talismane.machineLearning.perceptron.PerceptronClassificationModel) LinearSVMModel(com.joliciel.talismane.machineLearning.linearsvm.LinearSVMModel) ZipEntry(java.util.zip.ZipEntry) IOException(java.io.IOException)

Example 5 with JolicielException

use of com.joliciel.talismane.utils.JolicielException in project talismane by joliciel-informatique.

the class WordListFinder method addWordList.

/**
 * Add an external word list located in a scanner from a particular filename.
 *
 * @throws TalismaneException
 *           if unknown file type
 */
public void addWordList(String fileName, Scanner scanner) throws TalismaneException {
    LOG.debug("Reading " + fileName);
    String typeLine = scanner.nextLine();
    if (!typeLine.startsWith("Type: "))
        throw new JolicielException("In file " + fileName + ", expected line starting with \"Type: \"");
    String type = typeLine.substring("Type: ".length());
    if ("WordList".equals(type)) {
        WordList textFileWordList = new WordList(fileName, scanner);
        this.addWordList(textFileWordList);
    } else {
        throw new TalismaneException("Unexpected type in file: " + fileName + ": " + type);
    }
}
Also used : JolicielException(com.joliciel.talismane.utils.JolicielException) TalismaneException(com.joliciel.talismane.TalismaneException)

Aggregations

JolicielException (com.joliciel.talismane.utils.JolicielException)8 ArrayList (java.util.ArrayList)3 IOException (java.io.IOException)2 List (java.util.List)2 Scanner (java.util.Scanner)2 TalismaneException (com.joliciel.talismane.TalismaneException)1 ClassificationModel (com.joliciel.talismane.machineLearning.ClassificationModel)1 MachineLearningModel (com.joliciel.talismane.machineLearning.MachineLearningModel)1 LinearSVMModel (com.joliciel.talismane.machineLearning.linearsvm.LinearSVMModel)1 LinearSVMModelTrainer (com.joliciel.talismane.machineLearning.linearsvm.LinearSVMModelTrainer)1 LinearSVMOneVsRestModel (com.joliciel.talismane.machineLearning.linearsvm.LinearSVMOneVsRestModel)1 MaxentModelTrainer (com.joliciel.talismane.machineLearning.maxent.MaxentModelTrainer)1 MaximumEntropyModel (com.joliciel.talismane.machineLearning.maxent.MaximumEntropyModel)1 PerceptronClassificationModel (com.joliciel.talismane.machineLearning.perceptron.PerceptronClassificationModel)1 PerceptronClassificationModelTrainer (com.joliciel.talismane.machineLearning.perceptron.PerceptronClassificationModelTrainer)1 WeightedOutcome (com.joliciel.talismane.utils.WeightedOutcome)1 Feature (de.bwaldvogel.liblinear.Feature)1 Model (de.bwaldvogel.liblinear.Model)1 Parameter (de.bwaldvogel.liblinear.Parameter)1 Problem (de.bwaldvogel.liblinear.Problem)1