Search in sources :

Example 21 with ParseConfiguration

use of com.joliciel.talismane.parser.ParseConfiguration in project talismane by joliciel-informatique.

the class ValencyFeature method check.

@Override
public FeatureResult<Integer> check(ParseConfigurationWrapper wrapper, RuntimeEnvironment env) throws TalismaneException {
    ParseConfiguration configuration = wrapper.getParseConfiguration();
    FeatureResult<PosTaggedTokenWrapper> tokenResult = addressFunction.check(wrapper, env);
    FeatureResult<Integer> featureResult = null;
    if (tokenResult != null) {
        PosTaggedToken posTaggedToken = tokenResult.getOutcome().getPosTaggedToken();
        int valency = configuration.getDependents(posTaggedToken).size();
        featureResult = this.generateResult(valency);
    }
    return featureResult;
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) PosTaggedTokenWrapper(com.joliciel.talismane.posTagger.features.PosTaggedTokenWrapper) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration)

Example 22 with ParseConfiguration

use of com.joliciel.talismane.parser.ParseConfiguration in project talismane by joliciel-informatique.

the class ParseEvaluationSentenceWriter method onParseEnd.

@Override
public void onParseEnd(ParseConfiguration realConfiguration, List<ParseConfiguration> guessedConfigurations) throws IOException {
    TreeSet<Integer> startIndexes = new TreeSet<Integer>();
    for (PosTaggedToken posTaggedToken : realConfiguration.getPosTagSequence()) {
        if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
            Token token = posTaggedToken.getToken();
            startIndexes.add(token.getStartIndex());
        }
    }
    if (hasTokeniser || hasPosTagger) {
        int i = 0;
        for (ParseConfiguration guessedConfiguration : guessedConfigurations) {
            for (PosTaggedToken posTaggedToken : guessedConfiguration.getPosTagSequence()) {
                if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
                    Token token = posTaggedToken.getToken();
                    startIndexes.add(token.getStartIndex());
                }
            }
            i++;
            if (i == guessCount)
                break;
        }
    }
    Map<Integer, Integer> startIndexMap = new HashMap<Integer, Integer>();
    int j = 0;
    for (int startIndex : startIndexes) {
        startIndexMap.put(startIndex, j++);
    }
    PosTagSequence posTagSequence = realConfiguration.getPosTagSequence();
    PosTaggedToken[] realTokens = new PosTaggedToken[startIndexes.size()];
    for (PosTaggedToken posTaggedToken : posTagSequence) {
        if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
            realTokens[startIndexMap.get(posTaggedToken.getToken().getStartIndex())] = posTaggedToken;
        }
    }
    for (PosTaggedToken posTaggedToken : realTokens) {
        if (posTaggedToken != null) {
            csvFileWriter.write(CSV.format(posTaggedToken.getToken().getOriginalText()));
        } else {
            csvFileWriter.write(CSV.getCsvSeparator());
        }
    }
    csvFileWriter.write("\n");
    for (PosTaggedToken posTaggedToken : realTokens) {
        if (posTaggedToken != null) {
            csvFileWriter.write(CSV.format(posTaggedToken.getTag().getCode()));
        } else {
            csvFileWriter.write(CSV.getCsvSeparator());
        }
    }
    csvFileWriter.write("\n");
    for (PosTaggedToken posTaggedToken : realTokens) {
        if (posTaggedToken != null) {
            DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken);
            String realLabel = realArc.getLabel() == null ? "null" : realArc.getLabel();
            csvFileWriter.write(CSV.format(realLabel));
        } else {
            csvFileWriter.write(CSV.getCsvSeparator());
        }
    }
    csvFileWriter.write("\n");
    for (PosTaggedToken posTaggedToken : realTokens) {
        if (posTaggedToken != null) {
            DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken);
            int startIndex = -1;
            if (realArc != null) {
                PosTaggedToken head = realArc.getHead();
                if (!head.getTag().equals(PosTag.ROOT_POS_TAG)) {
                    startIndex = head.getToken().getStartIndex();
                }
            }
            if (startIndex < 0)
                csvFileWriter.write(CSV.format("ROOT"));
            else
                csvFileWriter.write(CSV.getColumnLabel(startIndexMap.get(startIndex)) + CSV.getCsvSeparator());
        } else {
            csvFileWriter.write(CSV.getCsvSeparator());
        }
    }
    csvFileWriter.write("\n");
    for (int i = 0; i < guessCount; i++) {
        if (i < guessedConfigurations.size()) {
            ParseConfiguration guessedConfiguration = guessedConfigurations.get(i);
            PosTaggedToken[] guessedTokens = new PosTaggedToken[startIndexes.size()];
            for (PosTaggedToken posTaggedToken : guessedConfiguration.getPosTagSequence()) {
                if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
                    guessedTokens[startIndexMap.get(posTaggedToken.getToken().getStartIndex())] = posTaggedToken;
                }
            }
            if (hasTokeniser) {
                for (PosTaggedToken posTaggedToken : guessedTokens) {
                    if (posTaggedToken != null) {
                        csvFileWriter.write(CSV.format(posTaggedToken.getToken().getOriginalText()));
                    } else {
                        csvFileWriter.write(CSV.getCsvSeparator());
                    }
                }
                csvFileWriter.write("\n");
            }
            if (hasPosTagger) {
                for (PosTaggedToken posTaggedToken : guessedTokens) {
                    if (posTaggedToken != null) {
                        csvFileWriter.write(CSV.format(posTaggedToken.getTag().getCode()));
                    } else {
                        csvFileWriter.write(CSV.getCsvSeparator());
                    }
                }
                csvFileWriter.write("\n");
            }
            for (PosTaggedToken posTaggedToken : guessedTokens) {
                if (posTaggedToken != null) {
                    DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
                    String guessedLabel = "";
                    if (guessedArc != null) {
                        guessedLabel = guessedArc.getLabel() == null ? "null" : guessedArc.getLabel();
                    }
                    csvFileWriter.write(CSV.format(guessedLabel));
                } else {
                    csvFileWriter.write(CSV.getCsvSeparator());
                }
            }
            csvFileWriter.write("\n");
            for (PosTaggedToken posTaggedToken : guessedTokens) {
                if (posTaggedToken != null) {
                    DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
                    int startIndex = -1;
                    if (guessedArc != null) {
                        PosTaggedToken head = guessedArc.getHead();
                        if (!head.getTag().equals(PosTag.ROOT_POS_TAG)) {
                            startIndex = head.getToken().getStartIndex();
                        }
                    }
                    if (startIndex < 0)
                        csvFileWriter.write(CSV.format("ROOT"));
                    else
                        csvFileWriter.write(CSV.getColumnLabel(startIndexMap.get(startIndex)) + CSV.getCsvSeparator());
                } else {
                    csvFileWriter.write(CSV.getCsvSeparator());
                }
            }
            csvFileWriter.write("\n");
            for (PosTaggedToken posTaggedToken : guessedTokens) {
                if (posTaggedToken != null) {
                    DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
                    double prob = 1.0;
                    if (guessedArc != null) {
                        Transition transition = guessedConfiguration.getTransition(guessedArc);
                        if (transition != null)
                            prob = transition.getDecision().getProbability();
                    }
                    csvFileWriter.write(CSV.format(prob));
                } else {
                    csvFileWriter.write(CSV.getCsvSeparator());
                }
            }
            csvFileWriter.write("\n");
        } else {
            csvFileWriter.write("\n");
            csvFileWriter.write("\n");
        }
    // have more configurations
    }
    // next guessed configuration
    csvFileWriter.flush();
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) HashMap(java.util.HashMap) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) Token(com.joliciel.talismane.tokeniser.Token) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration) TreeSet(java.util.TreeSet) Transition(com.joliciel.talismane.parser.Transition) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) DependencyArc(com.joliciel.talismane.parser.DependencyArc)

Example 23 with ParseConfiguration

use of com.joliciel.talismane.parser.ParseConfiguration in project talismane by joliciel-informatique.

the class DependencyCountIf method check.

@Override
public FeatureResult<Integer> check(ParseConfigurationWrapper wrapper, RuntimeEnvironment env) throws TalismaneException {
    ParseConfiguration configuration = wrapper.getParseConfiguration();
    FeatureResult<PosTaggedTokenWrapper> tokenResult = addressFunction.check(wrapper, env);
    FeatureResult<Integer> featureResult = null;
    if (tokenResult != null) {
        PosTaggedToken posTaggedToken = tokenResult.getOutcome().getPosTaggedToken();
        int countMatching = 0;
        for (PosTaggedToken dependent : configuration.getDependents(posTaggedToken)) {
            ParseConfigurationAddress parseConfigurationAddress = new ParseConfigurationAddress(env);
            parseConfigurationAddress.setParseConfiguration(configuration);
            parseConfigurationAddress.setPosTaggedToken(dependent);
            FeatureResult<Boolean> criterionResult = criterion.check(parseConfigurationAddress, env);
            if (criterionResult != null && criterionResult.getOutcome())
                countMatching++;
        }
        featureResult = this.generateResult(countMatching);
    }
    return featureResult;
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) PosTaggedTokenWrapper(com.joliciel.talismane.posTagger.features.PosTaggedTokenWrapper) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration)

Example 24 with ParseConfiguration

use of com.joliciel.talismane.parser.ParseConfiguration in project talismane by joliciel-informatique.

the class DependencyLabelFeature method check.

@Override
public FeatureResult<String> check(ParseConfigurationWrapper wrapper, RuntimeEnvironment env) throws TalismaneException {
    PosTaggedTokenWrapper innerWrapper = this.getToken(wrapper, env);
    if (innerWrapper == null)
        return null;
    PosTaggedToken posTaggedToken = innerWrapper.getPosTaggedToken();
    if (posTaggedToken == null)
        return null;
    FeatureResult<String> featureResult = null;
    ParseConfiguration configuration = wrapper.getParseConfiguration();
    DependencyArc arc = configuration.getGoverningDependency(posTaggedToken);
    if (arc != null) {
        String label = arc.getLabel();
        if (label == null)
            label = "null";
        featureResult = this.generateResult(label);
    }
    return featureResult;
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) PosTaggedTokenWrapper(com.joliciel.talismane.posTagger.features.PosTaggedTokenWrapper) DependencyArc(com.joliciel.talismane.parser.DependencyArc) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration)

Example 25 with ParseConfiguration

use of com.joliciel.talismane.parser.ParseConfiguration in project talismane by joliciel-informatique.

the class DependencySearchFeature method check.

@Override
public FeatureResult<PosTaggedTokenWrapper> check(ParseConfigurationWrapper wrapper, RuntimeEnvironment env) throws TalismaneException {
    ParseConfiguration configuration = wrapper.getParseConfiguration();
    PosTaggedToken resultToken = null;
    FeatureResult<PosTaggedTokenWrapper> referenceTokenResult = referenceTokenFeature.check(configuration, env);
    if (referenceTokenResult != null) {
        PosTaggedToken referenceToken = referenceTokenResult.getOutcome().getPosTaggedToken();
        ParseConfigurationAddress parseConfigurationAddress = new ParseConfigurationAddress(env);
        parseConfigurationAddress.setParseConfiguration(configuration);
        for (PosTaggedToken dependent : configuration.getDependents(referenceToken)) {
            parseConfigurationAddress.setPosTaggedToken(dependent);
            FeatureResult<Boolean> criterionResult = criterionFeature.check(parseConfigurationAddress, env);
            if (criterionResult != null) {
                boolean criterion = criterionResult.getOutcome();
                if (criterion) {
                    resultToken = dependent;
                    break;
                }
            }
        }
    }
    FeatureResult<PosTaggedTokenWrapper> featureResult = null;
    if (resultToken != null)
        featureResult = this.generateResult(resultToken);
    return featureResult;
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) PosTaggedTokenWrapper(com.joliciel.talismane.posTagger.features.PosTaggedTokenWrapper) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration)

Aggregations

ParseConfiguration (com.joliciel.talismane.parser.ParseConfiguration)31 PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)24 PosTaggedTokenWrapper (com.joliciel.talismane.posTagger.features.PosTaggedTokenWrapper)18 DependencyArc (com.joliciel.talismane.parser.DependencyArc)7 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)7 TalismaneException (com.joliciel.talismane.TalismaneException)4 Sentence (com.joliciel.talismane.rawText.Sentence)4 Transition (com.joliciel.talismane.parser.Transition)3 SentenceAnnotator (com.joliciel.talismane.sentenceAnnotators.SentenceAnnotator)3 TokenSequence (com.joliciel.talismane.tokeniser.TokenSequence)3 NonDeterministicParser (com.joliciel.talismane.parser.NonDeterministicParser)2 ParseConfigurationProcessor (com.joliciel.talismane.parser.output.ParseConfigurationProcessor)2 NonDeterministicPosTagger (com.joliciel.talismane.posTagger.NonDeterministicPosTagger)2 PosTagAnnotatedCorpusReader (com.joliciel.talismane.posTagger.PosTagAnnotatedCorpusReader)2 PosTagSequenceProcessor (com.joliciel.talismane.posTagger.output.PosTagSequenceProcessor)2 SentenceProcessor (com.joliciel.talismane.sentenceDetector.SentenceProcessor)2 Token (com.joliciel.talismane.tokeniser.Token)2 TokeniserAnnotatedCorpusReader (com.joliciel.talismane.tokeniser.TokeniserAnnotatedCorpusReader)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2