Search in sources :

Example 6 with DependencyArc

use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.

the class NonProjectiveStatisticsWriter method onNextParseConfiguration.

@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) throws IOException {
    ParseTree parseTree = new ParseTree(parseConfiguration, false);
    if (!parseTree.isProjective()) {
        writer.write(CSV.format(parseConfiguration.getSentence().getText().toString()));
        writer.write(CSV.format(parseTree.getGapDegree().getRight()));
        writer.write(CSV.format(parseTree.getGapDegree().getLeft().toString()));
        int gapDegree = parseTree.getGapDegree().getRight();
        if (gapDegree > 9)
            gapDegree = 9;
        gapDegreeCounts[gapDegree]++;
        writer.write(CSV.format(parseTree.getEdgeDegree().getRight()));
        writer.write(CSV.format(parseTree.getEdgeDegree().getLeft().toString()));
        int edgeDegree = parseTree.getEdgeDegree().getRight();
        if (edgeDegree > 9)
            edgeDegree = 9;
        edgeDegreeCounts[edgeDegree]++;
        writer.write(CSV.format(parseTree.isWellNested()));
        for (Pair<ParseTreeNode, ParseTreeNode> illNestedNodes : parseTree.getIllNestedNodes()) {
            writer.write(CSV.format(illNestedNodes.getLeft().toString()));
            writer.write(CSV.format(illNestedNodes.getRight().toString()));
        }
        if (!parseTree.isWellNested())
            illNestedCount++;
        writer.write("\n");
        writer.flush();
        nonProjectiveCount++;
        for (ParseTreeNode nonProjNode : parseTree.getNonProjectiveNodes()) {
            writer2.write(CSV.format(parseConfiguration.getSentence().getText().toString()));
            writer2.write(CSV.format(nonProjNode.getPosTaggedToken().toString()));
            writer2.write(CSV.format(nonProjNode.toString()));
            writer2.write(CSV.format(nonProjNode.getGapCount()));
            writer2.write(CSV.format(nonProjNode.getEdgeCount()));
            for (DependencyArc arc : nonProjNode.getGapHeads()) {
                writer2.write(CSV.format(arc.toString()));
            }
            writer2.write("\n");
            writer2.flush();
            nonProjectiveNodeCount++;
        }
        for (DependencyArc arc : parseTree.getNonProjectiveEdges()) {
            writer3.write(CSV.format(parseConfiguration.getSentence().getText().toString()));
            writer3.write(CSV.format(arc.toString()));
            writer3.write("\n");
            writer3.flush();
            nonProjectiveEdgeCount++;
        }
    } else {
        gapDegreeCounts[0]++;
        edgeDegreeCounts[0]++;
    }
    totalNodeCount += parseConfiguration.getPosTagSequence().size() - 1;
    totalCount++;
}
Also used : DependencyArc(com.joliciel.talismane.parser.DependencyArc) ParseTree(com.joliciel.talismane.parser.ParseTree) ParseTreeNode(com.joliciel.talismane.parser.ParseTreeNode)

Example 7 with DependencyArc

use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.

the class StandoffWriter method onNextParseConfiguration.

@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) throws IOException {
    Map<String, Object> model = new HashMap<String, Object>();
    ParseConfigurationOutput output = new ParseConfigurationOutput(parseConfiguration);
    model.put("sentence", output);
    model.put("configuration", parseConfiguration);
    model.put("tokenCount", tokenCount);
    model.put("relationCount", relationCount);
    model.put("sentenceCount", sentenceCount);
    model.put("characterCount", characterCount);
    model.put("LOG", LOG);
    List<DependencyArc> dependencies = new ArrayList<DependencyArc>();
    for (DependencyArc dependencyArc : parseConfiguration.getRealDependencies()) {
        if (!dependencyArc.getLabel().equals(punctuationDepLabel)) {
            dependencies.add(dependencyArc);
        }
    }
    model.put("dependencies", dependencies);
    this.process(model, writer);
    tokenCount += parseConfiguration.getPosTagSequence().size();
    relationCount += dependencies.size();
    characterCount += parseConfiguration.getSentence().getText().length();
    sentenceCount += 1;
}
Also used : ParseConfigurationOutput(com.joliciel.talismane.parser.output.ParseConfigurationOutput) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DependencyArc(com.joliciel.talismane.parser.DependencyArc)

Example 8 with DependencyArc

use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.

the class ParseEvaluationSentenceWriter method onParseEnd.

@Override
public void onParseEnd(ParseConfiguration realConfiguration, List<ParseConfiguration> guessedConfigurations) throws IOException {
    TreeSet<Integer> startIndexes = new TreeSet<Integer>();
    for (PosTaggedToken posTaggedToken : realConfiguration.getPosTagSequence()) {
        if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
            Token token = posTaggedToken.getToken();
            startIndexes.add(token.getStartIndex());
        }
    }
    if (hasTokeniser || hasPosTagger) {
        int i = 0;
        for (ParseConfiguration guessedConfiguration : guessedConfigurations) {
            for (PosTaggedToken posTaggedToken : guessedConfiguration.getPosTagSequence()) {
                if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
                    Token token = posTaggedToken.getToken();
                    startIndexes.add(token.getStartIndex());
                }
            }
            i++;
            if (i == guessCount)
                break;
        }
    }
    Map<Integer, Integer> startIndexMap = new HashMap<Integer, Integer>();
    int j = 0;
    for (int startIndex : startIndexes) {
        startIndexMap.put(startIndex, j++);
    }
    PosTagSequence posTagSequence = realConfiguration.getPosTagSequence();
    PosTaggedToken[] realTokens = new PosTaggedToken[startIndexes.size()];
    for (PosTaggedToken posTaggedToken : posTagSequence) {
        if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
            realTokens[startIndexMap.get(posTaggedToken.getToken().getStartIndex())] = posTaggedToken;
        }
    }
    for (PosTaggedToken posTaggedToken : realTokens) {
        if (posTaggedToken != null) {
            csvFileWriter.write(CSV.format(posTaggedToken.getToken().getOriginalText()));
        } else {
            csvFileWriter.write(CSV.getCsvSeparator());
        }
    }
    csvFileWriter.write("\n");
    for (PosTaggedToken posTaggedToken : realTokens) {
        if (posTaggedToken != null) {
            csvFileWriter.write(CSV.format(posTaggedToken.getTag().getCode()));
        } else {
            csvFileWriter.write(CSV.getCsvSeparator());
        }
    }
    csvFileWriter.write("\n");
    for (PosTaggedToken posTaggedToken : realTokens) {
        if (posTaggedToken != null) {
            DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken);
            String realLabel = realArc.getLabel() == null ? "null" : realArc.getLabel();
            csvFileWriter.write(CSV.format(realLabel));
        } else {
            csvFileWriter.write(CSV.getCsvSeparator());
        }
    }
    csvFileWriter.write("\n");
    for (PosTaggedToken posTaggedToken : realTokens) {
        if (posTaggedToken != null) {
            DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken);
            int startIndex = -1;
            if (realArc != null) {
                PosTaggedToken head = realArc.getHead();
                if (!head.getTag().equals(PosTag.ROOT_POS_TAG)) {
                    startIndex = head.getToken().getStartIndex();
                }
            }
            if (startIndex < 0)
                csvFileWriter.write(CSV.format("ROOT"));
            else
                csvFileWriter.write(CSV.getColumnLabel(startIndexMap.get(startIndex)) + CSV.getCsvSeparator());
        } else {
            csvFileWriter.write(CSV.getCsvSeparator());
        }
    }
    csvFileWriter.write("\n");
    for (int i = 0; i < guessCount; i++) {
        if (i < guessedConfigurations.size()) {
            ParseConfiguration guessedConfiguration = guessedConfigurations.get(i);
            PosTaggedToken[] guessedTokens = new PosTaggedToken[startIndexes.size()];
            for (PosTaggedToken posTaggedToken : guessedConfiguration.getPosTagSequence()) {
                if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
                    guessedTokens[startIndexMap.get(posTaggedToken.getToken().getStartIndex())] = posTaggedToken;
                }
            }
            if (hasTokeniser) {
                for (PosTaggedToken posTaggedToken : guessedTokens) {
                    if (posTaggedToken != null) {
                        csvFileWriter.write(CSV.format(posTaggedToken.getToken().getOriginalText()));
                    } else {
                        csvFileWriter.write(CSV.getCsvSeparator());
                    }
                }
                csvFileWriter.write("\n");
            }
            if (hasPosTagger) {
                for (PosTaggedToken posTaggedToken : guessedTokens) {
                    if (posTaggedToken != null) {
                        csvFileWriter.write(CSV.format(posTaggedToken.getTag().getCode()));
                    } else {
                        csvFileWriter.write(CSV.getCsvSeparator());
                    }
                }
                csvFileWriter.write("\n");
            }
            for (PosTaggedToken posTaggedToken : guessedTokens) {
                if (posTaggedToken != null) {
                    DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
                    String guessedLabel = "";
                    if (guessedArc != null) {
                        guessedLabel = guessedArc.getLabel() == null ? "null" : guessedArc.getLabel();
                    }
                    csvFileWriter.write(CSV.format(guessedLabel));
                } else {
                    csvFileWriter.write(CSV.getCsvSeparator());
                }
            }
            csvFileWriter.write("\n");
            for (PosTaggedToken posTaggedToken : guessedTokens) {
                if (posTaggedToken != null) {
                    DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
                    int startIndex = -1;
                    if (guessedArc != null) {
                        PosTaggedToken head = guessedArc.getHead();
                        if (!head.getTag().equals(PosTag.ROOT_POS_TAG)) {
                            startIndex = head.getToken().getStartIndex();
                        }
                    }
                    if (startIndex < 0)
                        csvFileWriter.write(CSV.format("ROOT"));
                    else
                        csvFileWriter.write(CSV.getColumnLabel(startIndexMap.get(startIndex)) + CSV.getCsvSeparator());
                } else {
                    csvFileWriter.write(CSV.getCsvSeparator());
                }
            }
            csvFileWriter.write("\n");
            for (PosTaggedToken posTaggedToken : guessedTokens) {
                if (posTaggedToken != null) {
                    DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
                    double prob = 1.0;
                    if (guessedArc != null) {
                        Transition transition = guessedConfiguration.getTransition(guessedArc);
                        if (transition != null)
                            prob = transition.getDecision().getProbability();
                    }
                    csvFileWriter.write(CSV.format(prob));
                } else {
                    csvFileWriter.write(CSV.getCsvSeparator());
                }
            }
            csvFileWriter.write("\n");
        } else {
            csvFileWriter.write("\n");
            csvFileWriter.write("\n");
        }
    // have more configurations
    }
    // next guessed configuration
    csvFileWriter.flush();
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) HashMap(java.util.HashMap) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) Token(com.joliciel.talismane.tokeniser.Token) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration) TreeSet(java.util.TreeSet) Transition(com.joliciel.talismane.parser.Transition) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) DependencyArc(com.joliciel.talismane.parser.DependencyArc)

Example 9 with DependencyArc

use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.

the class DependencyLabelFeature method check.

@Override
public FeatureResult<String> check(ParseConfigurationWrapper wrapper, RuntimeEnvironment env) throws TalismaneException {
    PosTaggedTokenWrapper innerWrapper = this.getToken(wrapper, env);
    if (innerWrapper == null)
        return null;
    PosTaggedToken posTaggedToken = innerWrapper.getPosTaggedToken();
    if (posTaggedToken == null)
        return null;
    FeatureResult<String> featureResult = null;
    ParseConfiguration configuration = wrapper.getParseConfiguration();
    DependencyArc arc = configuration.getGoverningDependency(posTaggedToken);
    if (arc != null) {
        String label = arc.getLabel();
        if (label == null)
            label = "null";
        featureResult = this.generateResult(label);
    }
    return featureResult;
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) PosTaggedTokenWrapper(com.joliciel.talismane.posTagger.features.PosTaggedTokenWrapper) DependencyArc(com.joliciel.talismane.parser.DependencyArc) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration)

Example 10 with DependencyArc

use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.

the class TransitionLogWriter method onNextParseConfiguration.

@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) throws TalismaneException, IOException {
    ParseConfiguration currentConfiguration = new ParseConfiguration(parseConfiguration.getPosTagSequence());
    writer.write("\n");
    writer.write("\t" + this.getTopOfStack(currentConfiguration) + "\t" + this.getTopOfBuffer(currentConfiguration) + "\t" + "\n");
    Set<DependencyArc> dependencies = new HashSet<DependencyArc>();
    for (Transition transition : parseConfiguration.getTransitions()) {
        currentConfiguration = new ParseConfiguration(currentConfiguration);
        transition.apply(currentConfiguration);
        DependencyArc newDep = null;
        if (currentConfiguration.getDependencies().size() > dependencies.size()) {
            for (DependencyArc arc : currentConfiguration.getDependencies()) {
                if (dependencies.contains(arc)) {
                    continue;
                } else {
                    dependencies.add(arc);
                    newDep = arc;
                    break;
                }
            }
        }
        String newDepText = "";
        if (newDep != null) {
            newDepText = newDep.getLabel() + "[" + newDep.getHead().getToken().getOriginalText().replace(' ', '_') + "|" + newDep.getHead().getTag().getCode() + "," + newDep.getDependent().getToken().getOriginalText().replace(' ', '_') + "|" + newDep.getDependent().getTag().getCode() + "]";
        }
        writer.write(transition.getCode() + "\t" + this.getTopOfStack(currentConfiguration) + "\t" + this.getTopOfBuffer(currentConfiguration) + "\t" + newDepText + "\n");
    }
    writer.flush();
}
Also used : Transition(com.joliciel.talismane.parser.Transition) DependencyArc(com.joliciel.talismane.parser.DependencyArc) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration) HashSet(java.util.HashSet)

Aggregations

DependencyArc (com.joliciel.talismane.parser.DependencyArc)14 PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)9 ParseConfiguration (com.joliciel.talismane.parser.ParseConfiguration)7 TalismaneException (com.joliciel.talismane.TalismaneException)4 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)4 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 Token (com.joliciel.talismane.tokeniser.Token)3 HashSet (java.util.HashSet)3 Transition (com.joliciel.talismane.parser.Transition)2 LinguisticRules (com.joliciel.talismane.LinguisticRules)1 CorpusLine (com.joliciel.talismane.corpus.CorpusLine)1 CorpusElement (com.joliciel.talismane.corpus.CorpusLine.CorpusElement)1 Decision (com.joliciel.talismane.machineLearning.Decision)1 ParseTree (com.joliciel.talismane.parser.ParseTree)1 ParseTreeNode (com.joliciel.talismane.parser.ParseTreeNode)1 ParseConfigurationOutput (com.joliciel.talismane.parser.output.ParseConfigurationOutput)1 PosTaggedTokenWrapper (com.joliciel.talismane.posTagger.features.PosTaggedTokenWrapper)1 Sentence (com.joliciel.talismane.rawText.Sentence)1 SentenceAnnotator (com.joliciel.talismane.sentenceAnnotators.SentenceAnnotator)1