Search in sources :

Example 1 with FunctionDescriptorParser

use of com.joliciel.talismane.machineLearning.features.FunctionDescriptorParser in project talismane by joliciel-informatique.

the class PosTaggerFeatureParser method getRules.

/**
 * @param ruleDescriptors
 * @return
 * @throws TalismaneException
 *           if a rule is incorrectly configured
 */
public List<PosTaggerRule> getRules(List<String> ruleDescriptors) throws TalismaneException {
    List<PosTaggerRule> rules = new ArrayList<PosTaggerRule>();
    FunctionDescriptorParser descriptorParser = new FunctionDescriptorParser();
    for (String ruleDescriptor : ruleDescriptors) {
        LOG.debug(ruleDescriptor);
        if (ruleDescriptor.length() > 0 && !ruleDescriptor.startsWith("#")) {
            String[] ruleParts = ruleDescriptor.split("\t");
            String posTagCode = ruleParts[0];
            PosTag posTag = null;
            boolean negative = false;
            String descriptor = null;
            String descriptorName = null;
            if (ruleParts.length > 2) {
                descriptor = ruleParts[2];
                descriptorName = ruleParts[1];
            } else {
                descriptor = ruleParts[1];
            }
            if (posTagCode.length() == 0) {
                if (descriptorName == null) {
                    throw new TalismaneException("Rule without PosTag must have a name.");
                }
            } else {
                if (posTagCode.startsWith("!")) {
                    negative = true;
                    posTagCode = posTagCode.substring(1);
                }
                posTag = TalismaneSession.get(sessionId).getPosTagSet().getPosTag(posTagCode);
            }
            FunctionDescriptor functionDescriptor = descriptorParser.parseDescriptor(descriptor);
            if (descriptorName != null)
                functionDescriptor.setDescriptorName(descriptorName);
            List<PosTaggerFeature<?>> myFeatures = this.parseDescriptor(functionDescriptor);
            if (posTag != null) {
                for (PosTaggerFeature<?> feature : myFeatures) {
                    if (feature instanceof BooleanFeature) {
                        @SuppressWarnings("unchecked") BooleanFeature<PosTaggerContext> condition = (BooleanFeature<PosTaggerContext>) feature;
                        PosTaggerRule rule = new PosTaggerRule(condition, posTag);
                        rule.setNegative(negative);
                        rules.add(rule);
                    } else {
                        throw new TalismaneException("Rule must be based on a boolean feature.");
                    }
                }
            // next feature
            }
        // is it a rule, or just a descriptor
        }
    // proper rule descriptor
    }
    // next rule descriptor
    return rules;
}
Also used : TalismaneException(com.joliciel.talismane.TalismaneException) ArrayList(java.util.ArrayList) FunctionDescriptorParser(com.joliciel.talismane.machineLearning.features.FunctionDescriptorParser) FunctionDescriptor(com.joliciel.talismane.machineLearning.features.FunctionDescriptor) PosTaggerContext(com.joliciel.talismane.posTagger.PosTaggerContext) BooleanFeature(com.joliciel.talismane.machineLearning.features.BooleanFeature) PosTag(com.joliciel.talismane.posTagger.PosTag)

Example 2 with FunctionDescriptorParser

use of com.joliciel.talismane.machineLearning.features.FunctionDescriptorParser in project talismane by joliciel-informatique.

the class PosTaggerFeatureParser method getFeatureSet.

public Set<PosTaggerFeature<?>> getFeatureSet(List<String> featureDescriptors) {
    Set<PosTaggerFeature<?>> features = new TreeSet<PosTaggerFeature<?>>();
    FunctionDescriptorParser descriptorParser = new FunctionDescriptorParser();
    for (String featureDescriptor : featureDescriptors) {
        LOG.debug(featureDescriptor);
        if (featureDescriptor.length() > 0 && !featureDescriptor.startsWith("#")) {
            FunctionDescriptor functionDescriptor = descriptorParser.parseDescriptor(featureDescriptor);
            List<PosTaggerFeature<?>> myFeatures = this.parseDescriptor(functionDescriptor);
            features.addAll(myFeatures);
        }
    }
    return features;
}
Also used : TreeSet(java.util.TreeSet) FunctionDescriptorParser(com.joliciel.talismane.machineLearning.features.FunctionDescriptorParser) FunctionDescriptor(com.joliciel.talismane.machineLearning.features.FunctionDescriptor)

Example 3 with FunctionDescriptorParser

use of com.joliciel.talismane.machineLearning.features.FunctionDescriptorParser in project jochre by urieli.

the class LetterFeatureParser method getLetterFeatureSet.

public Set<LetterFeature<?>> getLetterFeatureSet(List<String> featureDescriptors) {
    Set<LetterFeature<?>> features = new TreeSet<LetterFeature<?>>();
    FunctionDescriptorParser descriptorParser = new FunctionDescriptorParser();
    for (String featureDescriptor : featureDescriptors) {
        LOG.trace(featureDescriptor);
        if (featureDescriptor.length() > 0 && !featureDescriptor.startsWith("#")) {
            FunctionDescriptor functionDescriptor = descriptorParser.parseDescriptor(featureDescriptor);
            List<LetterFeature<?>> myFeatures = this.parseDescriptor(functionDescriptor);
            features.addAll(myFeatures);
        }
    }
    return features;
}
Also used : TreeSet(java.util.TreeSet) FunctionDescriptorParser(com.joliciel.talismane.machineLearning.features.FunctionDescriptorParser) FunctionDescriptor(com.joliciel.talismane.machineLearning.features.FunctionDescriptor)

Example 4 with FunctionDescriptorParser

use of com.joliciel.talismane.machineLearning.features.FunctionDescriptorParser in project talismane by joliciel-informatique.

the class ParserFeatureParser method getRules.

public List<ParserRule> getRules(List<String> ruleDescriptors) throws TalismaneException {
    List<ParserRule> rules = new ArrayList<ParserRule>();
    FunctionDescriptorParser descriptorParser = new FunctionDescriptorParser();
    for (String ruleDescriptor : ruleDescriptors) {
        LOG.debug(ruleDescriptor);
        if (ruleDescriptor.trim().length() > 0 && !ruleDescriptor.startsWith("#")) {
            String[] ruleParts = ruleDescriptor.split("\t");
            String transitionCode = ruleParts[0];
            Transition transition = null;
            Set<Transition> transitions = null;
            boolean negative = false;
            String descriptor = null;
            String descriptorName = null;
            if (ruleParts.length > 2) {
                descriptor = ruleParts[2];
                descriptorName = ruleParts[1];
            } else {
                descriptor = ruleParts[1];
            }
            if (transitionCode.length() == 0) {
                if (descriptorName == null) {
                    throw new TalismaneException("Rule without Transition must have a name.");
                }
            } else {
                if (transitionCode.startsWith("!")) {
                    negative = true;
                    String[] transitionCodes = transitionCode.substring(1).split(";");
                    transitions = new HashSet<Transition>();
                    for (String code : transitionCodes) {
                        Transition oneTransition = TalismaneSession.get(sessionId).getTransitionSystem().getTransitionForCode(code);
                        transitions.add(oneTransition);
                    }
                    transition = transitions.iterator().next();
                } else {
                    transition = TalismaneSession.get(sessionId).getTransitionSystem().getTransitionForCode(transitionCode);
                }
            }
            FunctionDescriptor functionDescriptor = descriptorParser.parseDescriptor(descriptor);
            if (descriptorName != null)
                functionDescriptor.setDescriptorName(descriptorName);
            List<ParseConfigurationFeature<?>> myFeatures = this.parseDescriptor(functionDescriptor);
            if (transition != null) {
                for (ParseConfigurationFeature<?> feature : myFeatures) {
                    if (feature instanceof BooleanFeature) {
                        @SuppressWarnings("unchecked") BooleanFeature<ParseConfigurationWrapper> condition = (BooleanFeature<ParseConfigurationWrapper>) feature;
                        if (negative) {
                            ParserRule rule = new ParserRule(condition, transitions, true);
                            rules.add(rule);
                        } else {
                            ParserRule rule = new ParserRule(condition, transition, false);
                            rules.add(rule);
                        }
                    } else {
                        throw new TalismaneException("Rule must be based on a boolean feature.");
                    }
                }
            // next feature
            }
        // is it a rule, or just a descriptor
        }
    // proper rule descriptor
    }
    // next rule descriptor
    return rules;
}
Also used : TalismaneException(com.joliciel.talismane.TalismaneException) ArrayList(java.util.ArrayList) FunctionDescriptorParser(com.joliciel.talismane.machineLearning.features.FunctionDescriptorParser) FunctionDescriptor(com.joliciel.talismane.machineLearning.features.FunctionDescriptor) BooleanFeature(com.joliciel.talismane.machineLearning.features.BooleanFeature) Transition(com.joliciel.talismane.parser.Transition)

Example 5 with FunctionDescriptorParser

use of com.joliciel.talismane.machineLearning.features.FunctionDescriptorParser in project talismane by joliciel-informatique.

the class ParserFeatureParser method getFeatures.

public Set<ParseConfigurationFeature<?>> getFeatures(List<String> featureDescriptors) {
    Set<ParseConfigurationFeature<?>> parseFeatures = new TreeSet<ParseConfigurationFeature<?>>();
    FunctionDescriptorParser descriptorParser = new FunctionDescriptorParser();
    for (String featureDescriptor : featureDescriptors) {
        if (featureDescriptor.trim().length() > 0 && !featureDescriptor.startsWith("#")) {
            FunctionDescriptor functionDescriptor = descriptorParser.parseDescriptor(featureDescriptor);
            List<ParseConfigurationFeature<?>> myFeatures = this.parseDescriptor(functionDescriptor);
            parseFeatures.addAll(myFeatures);
        }
    }
    return parseFeatures;
}
Also used : TreeSet(java.util.TreeSet) FunctionDescriptorParser(com.joliciel.talismane.machineLearning.features.FunctionDescriptorParser) FunctionDescriptor(com.joliciel.talismane.machineLearning.features.FunctionDescriptor)

Aggregations

FunctionDescriptor (com.joliciel.talismane.machineLearning.features.FunctionDescriptor)9 FunctionDescriptorParser (com.joliciel.talismane.machineLearning.features.FunctionDescriptorParser)9 TreeSet (java.util.TreeSet)7 TalismaneException (com.joliciel.talismane.TalismaneException)2 BooleanFeature (com.joliciel.talismane.machineLearning.features.BooleanFeature)2 ArrayList (java.util.ArrayList)2 Transition (com.joliciel.talismane.parser.Transition)1 PosTag (com.joliciel.talismane.posTagger.PosTag)1 PosTaggerContext (com.joliciel.talismane.posTagger.PosTaggerContext)1