use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.
the class NonProjectiveStatisticsWriter method onNextParseConfiguration.
@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) throws IOException {
ParseTree parseTree = new ParseTree(parseConfiguration, false);
if (!parseTree.isProjective()) {
writer.write(CSV.format(parseConfiguration.getSentence().getText().toString()));
writer.write(CSV.format(parseTree.getGapDegree().getRight()));
writer.write(CSV.format(parseTree.getGapDegree().getLeft().toString()));
int gapDegree = parseTree.getGapDegree().getRight();
if (gapDegree > 9)
gapDegree = 9;
gapDegreeCounts[gapDegree]++;
writer.write(CSV.format(parseTree.getEdgeDegree().getRight()));
writer.write(CSV.format(parseTree.getEdgeDegree().getLeft().toString()));
int edgeDegree = parseTree.getEdgeDegree().getRight();
if (edgeDegree > 9)
edgeDegree = 9;
edgeDegreeCounts[edgeDegree]++;
writer.write(CSV.format(parseTree.isWellNested()));
for (Pair<ParseTreeNode, ParseTreeNode> illNestedNodes : parseTree.getIllNestedNodes()) {
writer.write(CSV.format(illNestedNodes.getLeft().toString()));
writer.write(CSV.format(illNestedNodes.getRight().toString()));
}
if (!parseTree.isWellNested())
illNestedCount++;
writer.write("\n");
writer.flush();
nonProjectiveCount++;
for (ParseTreeNode nonProjNode : parseTree.getNonProjectiveNodes()) {
writer2.write(CSV.format(parseConfiguration.getSentence().getText().toString()));
writer2.write(CSV.format(nonProjNode.getPosTaggedToken().toString()));
writer2.write(CSV.format(nonProjNode.toString()));
writer2.write(CSV.format(nonProjNode.getGapCount()));
writer2.write(CSV.format(nonProjNode.getEdgeCount()));
for (DependencyArc arc : nonProjNode.getGapHeads()) {
writer2.write(CSV.format(arc.toString()));
}
writer2.write("\n");
writer2.flush();
nonProjectiveNodeCount++;
}
for (DependencyArc arc : parseTree.getNonProjectiveEdges()) {
writer3.write(CSV.format(parseConfiguration.getSentence().getText().toString()));
writer3.write(CSV.format(arc.toString()));
writer3.write("\n");
writer3.flush();
nonProjectiveEdgeCount++;
}
} else {
gapDegreeCounts[0]++;
edgeDegreeCounts[0]++;
}
totalNodeCount += parseConfiguration.getPosTagSequence().size() - 1;
totalCount++;
}
use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.
the class StandoffWriter method onNextParseConfiguration.
@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) throws IOException {
Map<String, Object> model = new HashMap<String, Object>();
ParseConfigurationOutput output = new ParseConfigurationOutput(parseConfiguration);
model.put("sentence", output);
model.put("configuration", parseConfiguration);
model.put("tokenCount", tokenCount);
model.put("relationCount", relationCount);
model.put("sentenceCount", sentenceCount);
model.put("characterCount", characterCount);
model.put("LOG", LOG);
List<DependencyArc> dependencies = new ArrayList<DependencyArc>();
for (DependencyArc dependencyArc : parseConfiguration.getRealDependencies()) {
if (!dependencyArc.getLabel().equals(punctuationDepLabel)) {
dependencies.add(dependencyArc);
}
}
model.put("dependencies", dependencies);
this.process(model, writer);
tokenCount += parseConfiguration.getPosTagSequence().size();
relationCount += dependencies.size();
characterCount += parseConfiguration.getSentence().getText().length();
sentenceCount += 1;
}
use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.
the class ParseEvaluationSentenceWriter method onParseEnd.
@Override
public void onParseEnd(ParseConfiguration realConfiguration, List<ParseConfiguration> guessedConfigurations) throws IOException {
TreeSet<Integer> startIndexes = new TreeSet<Integer>();
for (PosTaggedToken posTaggedToken : realConfiguration.getPosTagSequence()) {
if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
Token token = posTaggedToken.getToken();
startIndexes.add(token.getStartIndex());
}
}
if (hasTokeniser || hasPosTagger) {
int i = 0;
for (ParseConfiguration guessedConfiguration : guessedConfigurations) {
for (PosTaggedToken posTaggedToken : guessedConfiguration.getPosTagSequence()) {
if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
Token token = posTaggedToken.getToken();
startIndexes.add(token.getStartIndex());
}
}
i++;
if (i == guessCount)
break;
}
}
Map<Integer, Integer> startIndexMap = new HashMap<Integer, Integer>();
int j = 0;
for (int startIndex : startIndexes) {
startIndexMap.put(startIndex, j++);
}
PosTagSequence posTagSequence = realConfiguration.getPosTagSequence();
PosTaggedToken[] realTokens = new PosTaggedToken[startIndexes.size()];
for (PosTaggedToken posTaggedToken : posTagSequence) {
if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
realTokens[startIndexMap.get(posTaggedToken.getToken().getStartIndex())] = posTaggedToken;
}
}
for (PosTaggedToken posTaggedToken : realTokens) {
if (posTaggedToken != null) {
csvFileWriter.write(CSV.format(posTaggedToken.getToken().getOriginalText()));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
for (PosTaggedToken posTaggedToken : realTokens) {
if (posTaggedToken != null) {
csvFileWriter.write(CSV.format(posTaggedToken.getTag().getCode()));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
for (PosTaggedToken posTaggedToken : realTokens) {
if (posTaggedToken != null) {
DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken);
String realLabel = realArc.getLabel() == null ? "null" : realArc.getLabel();
csvFileWriter.write(CSV.format(realLabel));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
for (PosTaggedToken posTaggedToken : realTokens) {
if (posTaggedToken != null) {
DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken);
int startIndex = -1;
if (realArc != null) {
PosTaggedToken head = realArc.getHead();
if (!head.getTag().equals(PosTag.ROOT_POS_TAG)) {
startIndex = head.getToken().getStartIndex();
}
}
if (startIndex < 0)
csvFileWriter.write(CSV.format("ROOT"));
else
csvFileWriter.write(CSV.getColumnLabel(startIndexMap.get(startIndex)) + CSV.getCsvSeparator());
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
for (int i = 0; i < guessCount; i++) {
if (i < guessedConfigurations.size()) {
ParseConfiguration guessedConfiguration = guessedConfigurations.get(i);
PosTaggedToken[] guessedTokens = new PosTaggedToken[startIndexes.size()];
for (PosTaggedToken posTaggedToken : guessedConfiguration.getPosTagSequence()) {
if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
guessedTokens[startIndexMap.get(posTaggedToken.getToken().getStartIndex())] = posTaggedToken;
}
}
if (hasTokeniser) {
for (PosTaggedToken posTaggedToken : guessedTokens) {
if (posTaggedToken != null) {
csvFileWriter.write(CSV.format(posTaggedToken.getToken().getOriginalText()));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
}
if (hasPosTagger) {
for (PosTaggedToken posTaggedToken : guessedTokens) {
if (posTaggedToken != null) {
csvFileWriter.write(CSV.format(posTaggedToken.getTag().getCode()));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
}
for (PosTaggedToken posTaggedToken : guessedTokens) {
if (posTaggedToken != null) {
DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
String guessedLabel = "";
if (guessedArc != null) {
guessedLabel = guessedArc.getLabel() == null ? "null" : guessedArc.getLabel();
}
csvFileWriter.write(CSV.format(guessedLabel));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
for (PosTaggedToken posTaggedToken : guessedTokens) {
if (posTaggedToken != null) {
DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
int startIndex = -1;
if (guessedArc != null) {
PosTaggedToken head = guessedArc.getHead();
if (!head.getTag().equals(PosTag.ROOT_POS_TAG)) {
startIndex = head.getToken().getStartIndex();
}
}
if (startIndex < 0)
csvFileWriter.write(CSV.format("ROOT"));
else
csvFileWriter.write(CSV.getColumnLabel(startIndexMap.get(startIndex)) + CSV.getCsvSeparator());
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
for (PosTaggedToken posTaggedToken : guessedTokens) {
if (posTaggedToken != null) {
DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
double prob = 1.0;
if (guessedArc != null) {
Transition transition = guessedConfiguration.getTransition(guessedArc);
if (transition != null)
prob = transition.getDecision().getProbability();
}
csvFileWriter.write(CSV.format(prob));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
} else {
csvFileWriter.write("\n");
csvFileWriter.write("\n");
}
// have more configurations
}
// next guessed configuration
csvFileWriter.flush();
}
use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.
the class DependencyLabelFeature method check.
@Override
public FeatureResult<String> check(ParseConfigurationWrapper wrapper, RuntimeEnvironment env) throws TalismaneException {
PosTaggedTokenWrapper innerWrapper = this.getToken(wrapper, env);
if (innerWrapper == null)
return null;
PosTaggedToken posTaggedToken = innerWrapper.getPosTaggedToken();
if (posTaggedToken == null)
return null;
FeatureResult<String> featureResult = null;
ParseConfiguration configuration = wrapper.getParseConfiguration();
DependencyArc arc = configuration.getGoverningDependency(posTaggedToken);
if (arc != null) {
String label = arc.getLabel();
if (label == null)
label = "null";
featureResult = this.generateResult(label);
}
return featureResult;
}
use of com.joliciel.talismane.parser.DependencyArc in project talismane by joliciel-informatique.
the class TransitionLogWriter method onNextParseConfiguration.
@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) throws TalismaneException, IOException {
ParseConfiguration currentConfiguration = new ParseConfiguration(parseConfiguration.getPosTagSequence());
writer.write("\n");
writer.write("\t" + this.getTopOfStack(currentConfiguration) + "\t" + this.getTopOfBuffer(currentConfiguration) + "\t" + "\n");
Set<DependencyArc> dependencies = new HashSet<DependencyArc>();
for (Transition transition : parseConfiguration.getTransitions()) {
currentConfiguration = new ParseConfiguration(currentConfiguration);
transition.apply(currentConfiguration);
DependencyArc newDep = null;
if (currentConfiguration.getDependencies().size() > dependencies.size()) {
for (DependencyArc arc : currentConfiguration.getDependencies()) {
if (dependencies.contains(arc)) {
continue;
} else {
dependencies.add(arc);
newDep = arc;
break;
}
}
}
String newDepText = "";
if (newDep != null) {
newDepText = newDep.getLabel() + "[" + newDep.getHead().getToken().getOriginalText().replace(' ', '_') + "|" + newDep.getHead().getTag().getCode() + "," + newDep.getDependent().getToken().getOriginalText().replace(' ', '_') + "|" + newDep.getDependent().getTag().getCode() + "]";
}
writer.write(transition.getCode() + "\t" + this.getTopOfStack(currentConfiguration) + "\t" + this.getTopOfBuffer(currentConfiguration) + "\t" + newDepText + "\n");
}
writer.flush();
}
Aggregations