Search in sources :

Example 1 with Transition

use of com.joliciel.talismane.parser.Transition in project talismane by joliciel-informatique.

the class ParseFeatureTester method onNextParseConfiguration.

@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) throws TalismaneException {
    ParseConfiguration currentConfiguration = new ParseConfiguration(parseConfiguration.getPosTagSequence());
    for (Transition transition : parseConfiguration.getTransitions()) {
        StringBuilder sb = new StringBuilder();
        for (PosTaggedToken taggedToken : currentConfiguration.getPosTagSequence()) {
            if (taggedToken.equals(currentConfiguration.getStack().getFirst())) {
                sb.append(" #[" + taggedToken.getToken().getOriginalText().replace(' ', '_') + "/" + taggedToken.getTag().toString() + "]#");
            } else if (taggedToken.equals(currentConfiguration.getBuffer().getFirst())) {
                sb.append(" #[" + taggedToken.getToken().getOriginalText().replace(' ', '_') + "/" + taggedToken.getTag().toString() + "]#");
            } else {
                sb.append(" " + taggedToken.getToken().getOriginalText().replace(' ', '_') + "/" + taggedToken.getTag().toString());
            }
        }
        sb.append(" ## Line: " + parseConfiguration.getSentence().getStartLineNumber());
        if (LOG.isTraceEnabled())
            LOG.trace(sb.toString());
        List<FeatureResult<?>> parseFeatureResults = new ArrayList<FeatureResult<?>>();
        for (ParseConfigurationFeature<?> parseFeature : parseFeatures) {
            RuntimeEnvironment env = new RuntimeEnvironment();
            FeatureResult<?> featureResult = parseFeature.check(currentConfiguration, env);
            if (featureResult != null) {
                parseFeatureResults.add(featureResult);
                if (LOG.isTraceEnabled()) {
                    LOG.trace(featureResult.toString());
                }
            }
        }
        String classification = transition.getCode();
        for (FeatureResult<?> featureResult : parseFeatureResults) {
            Map<String, List<String>> classificationMap = featureResultMap.get(featureResult.toString());
            if (classificationMap == null) {
                classificationMap = new TreeMap<String, List<String>>();
                featureResultMap.put(featureResult.toString(), classificationMap);
            }
            List<String> sentences = classificationMap.get(classification);
            if (sentences == null) {
                sentences = new ArrayList<String>();
                classificationMap.put(classification, sentences);
            }
            sentences.add(sb.toString());
        }
        // apply the transition and up the index
        currentConfiguration = new ParseConfiguration(currentConfiguration);
        transition.apply(currentConfiguration);
    }
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) ArrayList(java.util.ArrayList) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration) Transition(com.joliciel.talismane.parser.Transition) ArrayList(java.util.ArrayList) List(java.util.List) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult)

Example 2 with Transition

use of com.joliciel.talismane.parser.Transition in project talismane by joliciel-informatique.

the class ParserFeatureParser method getRules.

public List<ParserRule> getRules(List<String> ruleDescriptors) throws TalismaneException {
    List<ParserRule> rules = new ArrayList<ParserRule>();
    FunctionDescriptorParser descriptorParser = new FunctionDescriptorParser();
    for (String ruleDescriptor : ruleDescriptors) {
        LOG.debug(ruleDescriptor);
        if (ruleDescriptor.trim().length() > 0 && !ruleDescriptor.startsWith("#")) {
            String[] ruleParts = ruleDescriptor.split("\t");
            String transitionCode = ruleParts[0];
            Transition transition = null;
            Set<Transition> transitions = null;
            boolean negative = false;
            String descriptor = null;
            String descriptorName = null;
            if (ruleParts.length > 2) {
                descriptor = ruleParts[2];
                descriptorName = ruleParts[1];
            } else {
                descriptor = ruleParts[1];
            }
            if (transitionCode.length() == 0) {
                if (descriptorName == null) {
                    throw new TalismaneException("Rule without Transition must have a name.");
                }
            } else {
                if (transitionCode.startsWith("!")) {
                    negative = true;
                    String[] transitionCodes = transitionCode.substring(1).split(";");
                    transitions = new HashSet<Transition>();
                    for (String code : transitionCodes) {
                        Transition oneTransition = TalismaneSession.get(sessionId).getTransitionSystem().getTransitionForCode(code);
                        transitions.add(oneTransition);
                    }
                    transition = transitions.iterator().next();
                } else {
                    transition = TalismaneSession.get(sessionId).getTransitionSystem().getTransitionForCode(transitionCode);
                }
            }
            FunctionDescriptor functionDescriptor = descriptorParser.parseDescriptor(descriptor);
            if (descriptorName != null)
                functionDescriptor.setDescriptorName(descriptorName);
            List<ParseConfigurationFeature<?>> myFeatures = this.parseDescriptor(functionDescriptor);
            if (transition != null) {
                for (ParseConfigurationFeature<?> feature : myFeatures) {
                    if (feature instanceof BooleanFeature) {
                        @SuppressWarnings("unchecked") BooleanFeature<ParseConfigurationWrapper> condition = (BooleanFeature<ParseConfigurationWrapper>) feature;
                        if (negative) {
                            ParserRule rule = new ParserRule(condition, transitions, true);
                            rules.add(rule);
                        } else {
                            ParserRule rule = new ParserRule(condition, transition, false);
                            rules.add(rule);
                        }
                    } else {
                        throw new TalismaneException("Rule must be based on a boolean feature.");
                    }
                }
            // next feature
            }
        // is it a rule, or just a descriptor
        }
    // proper rule descriptor
    }
    // next rule descriptor
    return rules;
}
Also used : TalismaneException(com.joliciel.talismane.TalismaneException) ArrayList(java.util.ArrayList) FunctionDescriptorParser(com.joliciel.talismane.machineLearning.features.FunctionDescriptorParser) FunctionDescriptor(com.joliciel.talismane.machineLearning.features.FunctionDescriptor) BooleanFeature(com.joliciel.talismane.machineLearning.features.BooleanFeature) Transition(com.joliciel.talismane.parser.Transition)

Example 3 with Transition

use of com.joliciel.talismane.parser.Transition in project talismane by joliciel-informatique.

the class ParseEvaluationSentenceWriter method onParseEnd.

@Override
public void onParseEnd(ParseConfiguration realConfiguration, List<ParseConfiguration> guessedConfigurations) throws IOException {
    TreeSet<Integer> startIndexes = new TreeSet<Integer>();
    for (PosTaggedToken posTaggedToken : realConfiguration.getPosTagSequence()) {
        if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
            Token token = posTaggedToken.getToken();
            startIndexes.add(token.getStartIndex());
        }
    }
    if (hasTokeniser || hasPosTagger) {
        int i = 0;
        for (ParseConfiguration guessedConfiguration : guessedConfigurations) {
            for (PosTaggedToken posTaggedToken : guessedConfiguration.getPosTagSequence()) {
                if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
                    Token token = posTaggedToken.getToken();
                    startIndexes.add(token.getStartIndex());
                }
            }
            i++;
            if (i == guessCount)
                break;
        }
    }
    Map<Integer, Integer> startIndexMap = new HashMap<Integer, Integer>();
    int j = 0;
    for (int startIndex : startIndexes) {
        startIndexMap.put(startIndex, j++);
    }
    PosTagSequence posTagSequence = realConfiguration.getPosTagSequence();
    PosTaggedToken[] realTokens = new PosTaggedToken[startIndexes.size()];
    for (PosTaggedToken posTaggedToken : posTagSequence) {
        if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
            realTokens[startIndexMap.get(posTaggedToken.getToken().getStartIndex())] = posTaggedToken;
        }
    }
    for (PosTaggedToken posTaggedToken : realTokens) {
        if (posTaggedToken != null) {
            csvFileWriter.write(CSV.format(posTaggedToken.getToken().getOriginalText()));
        } else {
            csvFileWriter.write(CSV.getCsvSeparator());
        }
    }
    csvFileWriter.write("\n");
    for (PosTaggedToken posTaggedToken : realTokens) {
        if (posTaggedToken != null) {
            csvFileWriter.write(CSV.format(posTaggedToken.getTag().getCode()));
        } else {
            csvFileWriter.write(CSV.getCsvSeparator());
        }
    }
    csvFileWriter.write("\n");
    for (PosTaggedToken posTaggedToken : realTokens) {
        if (posTaggedToken != null) {
            DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken);
            String realLabel = realArc.getLabel() == null ? "null" : realArc.getLabel();
            csvFileWriter.write(CSV.format(realLabel));
        } else {
            csvFileWriter.write(CSV.getCsvSeparator());
        }
    }
    csvFileWriter.write("\n");
    for (PosTaggedToken posTaggedToken : realTokens) {
        if (posTaggedToken != null) {
            DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken);
            int startIndex = -1;
            if (realArc != null) {
                PosTaggedToken head = realArc.getHead();
                if (!head.getTag().equals(PosTag.ROOT_POS_TAG)) {
                    startIndex = head.getToken().getStartIndex();
                }
            }
            if (startIndex < 0)
                csvFileWriter.write(CSV.format("ROOT"));
            else
                csvFileWriter.write(CSV.getColumnLabel(startIndexMap.get(startIndex)) + CSV.getCsvSeparator());
        } else {
            csvFileWriter.write(CSV.getCsvSeparator());
        }
    }
    csvFileWriter.write("\n");
    for (int i = 0; i < guessCount; i++) {
        if (i < guessedConfigurations.size()) {
            ParseConfiguration guessedConfiguration = guessedConfigurations.get(i);
            PosTaggedToken[] guessedTokens = new PosTaggedToken[startIndexes.size()];
            for (PosTaggedToken posTaggedToken : guessedConfiguration.getPosTagSequence()) {
                if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
                    guessedTokens[startIndexMap.get(posTaggedToken.getToken().getStartIndex())] = posTaggedToken;
                }
            }
            if (hasTokeniser) {
                for (PosTaggedToken posTaggedToken : guessedTokens) {
                    if (posTaggedToken != null) {
                        csvFileWriter.write(CSV.format(posTaggedToken.getToken().getOriginalText()));
                    } else {
                        csvFileWriter.write(CSV.getCsvSeparator());
                    }
                }
                csvFileWriter.write("\n");
            }
            if (hasPosTagger) {
                for (PosTaggedToken posTaggedToken : guessedTokens) {
                    if (posTaggedToken != null) {
                        csvFileWriter.write(CSV.format(posTaggedToken.getTag().getCode()));
                    } else {
                        csvFileWriter.write(CSV.getCsvSeparator());
                    }
                }
                csvFileWriter.write("\n");
            }
            for (PosTaggedToken posTaggedToken : guessedTokens) {
                if (posTaggedToken != null) {
                    DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
                    String guessedLabel = "";
                    if (guessedArc != null) {
                        guessedLabel = guessedArc.getLabel() == null ? "null" : guessedArc.getLabel();
                    }
                    csvFileWriter.write(CSV.format(guessedLabel));
                } else {
                    csvFileWriter.write(CSV.getCsvSeparator());
                }
            }
            csvFileWriter.write("\n");
            for (PosTaggedToken posTaggedToken : guessedTokens) {
                if (posTaggedToken != null) {
                    DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
                    int startIndex = -1;
                    if (guessedArc != null) {
                        PosTaggedToken head = guessedArc.getHead();
                        if (!head.getTag().equals(PosTag.ROOT_POS_TAG)) {
                            startIndex = head.getToken().getStartIndex();
                        }
                    }
                    if (startIndex < 0)
                        csvFileWriter.write(CSV.format("ROOT"));
                    else
                        csvFileWriter.write(CSV.getColumnLabel(startIndexMap.get(startIndex)) + CSV.getCsvSeparator());
                } else {
                    csvFileWriter.write(CSV.getCsvSeparator());
                }
            }
            csvFileWriter.write("\n");
            for (PosTaggedToken posTaggedToken : guessedTokens) {
                if (posTaggedToken != null) {
                    DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
                    double prob = 1.0;
                    if (guessedArc != null) {
                        Transition transition = guessedConfiguration.getTransition(guessedArc);
                        if (transition != null)
                            prob = transition.getDecision().getProbability();
                    }
                    csvFileWriter.write(CSV.format(prob));
                } else {
                    csvFileWriter.write(CSV.getCsvSeparator());
                }
            }
            csvFileWriter.write("\n");
        } else {
            csvFileWriter.write("\n");
            csvFileWriter.write("\n");
        }
    // have more configurations
    }
    // next guessed configuration
    csvFileWriter.flush();
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) HashMap(java.util.HashMap) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) Token(com.joliciel.talismane.tokeniser.Token) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration) TreeSet(java.util.TreeSet) Transition(com.joliciel.talismane.parser.Transition) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) DependencyArc(com.joliciel.talismane.parser.DependencyArc)

Example 4 with Transition

use of com.joliciel.talismane.parser.Transition in project talismane by joliciel-informatique.

the class TransitionLogWriter method onNextParseConfiguration.

@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) throws TalismaneException, IOException {
    ParseConfiguration currentConfiguration = new ParseConfiguration(parseConfiguration.getPosTagSequence());
    writer.write("\n");
    writer.write("\t" + this.getTopOfStack(currentConfiguration) + "\t" + this.getTopOfBuffer(currentConfiguration) + "\t" + "\n");
    Set<DependencyArc> dependencies = new HashSet<DependencyArc>();
    for (Transition transition : parseConfiguration.getTransitions()) {
        currentConfiguration = new ParseConfiguration(currentConfiguration);
        transition.apply(currentConfiguration);
        DependencyArc newDep = null;
        if (currentConfiguration.getDependencies().size() > dependencies.size()) {
            for (DependencyArc arc : currentConfiguration.getDependencies()) {
                if (dependencies.contains(arc)) {
                    continue;
                } else {
                    dependencies.add(arc);
                    newDep = arc;
                    break;
                }
            }
        }
        String newDepText = "";
        if (newDep != null) {
            newDepText = newDep.getLabel() + "[" + newDep.getHead().getToken().getOriginalText().replace(' ', '_') + "|" + newDep.getHead().getTag().getCode() + "," + newDep.getDependent().getToken().getOriginalText().replace(' ', '_') + "|" + newDep.getDependent().getTag().getCode() + "]";
        }
        writer.write(transition.getCode() + "\t" + this.getTopOfStack(currentConfiguration) + "\t" + this.getTopOfBuffer(currentConfiguration) + "\t" + newDepText + "\n");
    }
    writer.flush();
}
Also used : Transition(com.joliciel.talismane.parser.Transition) DependencyArc(com.joliciel.talismane.parser.DependencyArc) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration) HashSet(java.util.HashSet)

Aggregations

Transition (com.joliciel.talismane.parser.Transition)4 ParseConfiguration (com.joliciel.talismane.parser.ParseConfiguration)3 DependencyArc (com.joliciel.talismane.parser.DependencyArc)2 PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)2 ArrayList (java.util.ArrayList)2 TalismaneException (com.joliciel.talismane.TalismaneException)1 BooleanFeature (com.joliciel.talismane.machineLearning.features.BooleanFeature)1 FeatureResult (com.joliciel.talismane.machineLearning.features.FeatureResult)1 FunctionDescriptor (com.joliciel.talismane.machineLearning.features.FunctionDescriptor)1 FunctionDescriptorParser (com.joliciel.talismane.machineLearning.features.FunctionDescriptorParser)1 RuntimeEnvironment (com.joliciel.talismane.machineLearning.features.RuntimeEnvironment)1 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)1 Token (com.joliciel.talismane.tokeniser.Token)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 TreeSet (java.util.TreeSet)1