use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class HistoryCountIfFeature method checkInternal.
@Override
public FeatureResult<Integer> checkInternal(PosTaggerContext context, RuntimeEnvironment env) throws TalismaneException {
FeatureResult<Integer> featureResult = null;
int startIndex = 0;
int endIndex = context.getHistory().size() - 1;
FeatureResult<Integer> startIndexResult = startIndexFeature.check(context, env);
if (startIndexResult != null) {
startIndex = startIndexResult.getOutcome();
} else {
return null;
}
if (endIndexFeature != null) {
FeatureResult<Integer> endIndexResult = endIndexFeature.check(context, env);
if (endIndexResult != null) {
endIndex = endIndexResult.getOutcome();
} else {
return null;
}
}
if (endIndex < startIndex)
return null;
if (startIndex <= 0)
startIndex = 0;
int count = 0;
for (int i = startIndex; i < context.getHistory().size() && i <= endIndex; i++) {
PosTaggedToken oneToken = context.getHistory().get(i);
FeatureResult<Boolean> criterionResult = this.criterion.check(oneToken, env);
if (criterionResult != null && criterionResult.getOutcome()) {
count++;
}
}
featureResult = this.generateResult(count);
return featureResult;
}
use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class PosTaggedTokenIndexFeature method checkInternal.
@Override
public FeatureResult<Integer> checkInternal(T context, RuntimeEnvironment env) throws TalismaneException {
PosTaggedTokenWrapper innerWrapper = this.getToken(context, env);
if (innerWrapper == null)
return null;
PosTaggedToken posTaggedToken = innerWrapper.getPosTaggedToken();
if (posTaggedToken == null)
return null;
FeatureResult<Integer> featureResult = null;
Token token = posTaggedToken.getToken();
int index = token.getIndex();
featureResult = this.generateResult(index);
return featureResult;
}
use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class ValencyByLabelFeature method check.
@Override
public FeatureResult<Integer> check(ParseConfigurationWrapper wrapper, RuntimeEnvironment env) throws TalismaneException {
ParseConfiguration configuration = wrapper.getParseConfiguration();
FeatureResult<PosTaggedTokenWrapper> tokenResult = addressFunction.check(wrapper, env);
FeatureResult<Integer> featureResult = null;
if (tokenResult != null) {
FeatureResult<String> depLabelResult = dependencyLabelFeature.check(wrapper, env);
if (depLabelResult != null) {
PosTaggedToken posTaggedToken = tokenResult.getOutcome().getPosTaggedToken();
String label = depLabelResult.getOutcome();
int valency = configuration.getDependents(posTaggedToken, label).size();
featureResult = this.generateResult(valency);
}
}
return featureResult;
}
use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class ValencyFeature method check.
@Override
public FeatureResult<Integer> check(ParseConfigurationWrapper wrapper, RuntimeEnvironment env) throws TalismaneException {
ParseConfiguration configuration = wrapper.getParseConfiguration();
FeatureResult<PosTaggedTokenWrapper> tokenResult = addressFunction.check(wrapper, env);
FeatureResult<Integer> featureResult = null;
if (tokenResult != null) {
PosTaggedToken posTaggedToken = tokenResult.getOutcome().getPosTaggedToken();
int valency = configuration.getDependents(posTaggedToken).size();
featureResult = this.generateResult(valency);
}
return featureResult;
}
use of com.joliciel.talismane.posTagger.PosTaggedToken in project talismane by joliciel-informatique.
the class ParseEvaluationSentenceWriter method onParseEnd.
@Override
public void onParseEnd(ParseConfiguration realConfiguration, List<ParseConfiguration> guessedConfigurations) throws IOException {
TreeSet<Integer> startIndexes = new TreeSet<Integer>();
for (PosTaggedToken posTaggedToken : realConfiguration.getPosTagSequence()) {
if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
Token token = posTaggedToken.getToken();
startIndexes.add(token.getStartIndex());
}
}
if (hasTokeniser || hasPosTagger) {
int i = 0;
for (ParseConfiguration guessedConfiguration : guessedConfigurations) {
for (PosTaggedToken posTaggedToken : guessedConfiguration.getPosTagSequence()) {
if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
Token token = posTaggedToken.getToken();
startIndexes.add(token.getStartIndex());
}
}
i++;
if (i == guessCount)
break;
}
}
Map<Integer, Integer> startIndexMap = new HashMap<Integer, Integer>();
int j = 0;
for (int startIndex : startIndexes) {
startIndexMap.put(startIndex, j++);
}
PosTagSequence posTagSequence = realConfiguration.getPosTagSequence();
PosTaggedToken[] realTokens = new PosTaggedToken[startIndexes.size()];
for (PosTaggedToken posTaggedToken : posTagSequence) {
if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
realTokens[startIndexMap.get(posTaggedToken.getToken().getStartIndex())] = posTaggedToken;
}
}
for (PosTaggedToken posTaggedToken : realTokens) {
if (posTaggedToken != null) {
csvFileWriter.write(CSV.format(posTaggedToken.getToken().getOriginalText()));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
for (PosTaggedToken posTaggedToken : realTokens) {
if (posTaggedToken != null) {
csvFileWriter.write(CSV.format(posTaggedToken.getTag().getCode()));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
for (PosTaggedToken posTaggedToken : realTokens) {
if (posTaggedToken != null) {
DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken);
String realLabel = realArc.getLabel() == null ? "null" : realArc.getLabel();
csvFileWriter.write(CSV.format(realLabel));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
for (PosTaggedToken posTaggedToken : realTokens) {
if (posTaggedToken != null) {
DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken);
int startIndex = -1;
if (realArc != null) {
PosTaggedToken head = realArc.getHead();
if (!head.getTag().equals(PosTag.ROOT_POS_TAG)) {
startIndex = head.getToken().getStartIndex();
}
}
if (startIndex < 0)
csvFileWriter.write(CSV.format("ROOT"));
else
csvFileWriter.write(CSV.getColumnLabel(startIndexMap.get(startIndex)) + CSV.getCsvSeparator());
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
for (int i = 0; i < guessCount; i++) {
if (i < guessedConfigurations.size()) {
ParseConfiguration guessedConfiguration = guessedConfigurations.get(i);
PosTaggedToken[] guessedTokens = new PosTaggedToken[startIndexes.size()];
for (PosTaggedToken posTaggedToken : guessedConfiguration.getPosTagSequence()) {
if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
guessedTokens[startIndexMap.get(posTaggedToken.getToken().getStartIndex())] = posTaggedToken;
}
}
if (hasTokeniser) {
for (PosTaggedToken posTaggedToken : guessedTokens) {
if (posTaggedToken != null) {
csvFileWriter.write(CSV.format(posTaggedToken.getToken().getOriginalText()));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
}
if (hasPosTagger) {
for (PosTaggedToken posTaggedToken : guessedTokens) {
if (posTaggedToken != null) {
csvFileWriter.write(CSV.format(posTaggedToken.getTag().getCode()));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
}
for (PosTaggedToken posTaggedToken : guessedTokens) {
if (posTaggedToken != null) {
DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
String guessedLabel = "";
if (guessedArc != null) {
guessedLabel = guessedArc.getLabel() == null ? "null" : guessedArc.getLabel();
}
csvFileWriter.write(CSV.format(guessedLabel));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
for (PosTaggedToken posTaggedToken : guessedTokens) {
if (posTaggedToken != null) {
DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
int startIndex = -1;
if (guessedArc != null) {
PosTaggedToken head = guessedArc.getHead();
if (!head.getTag().equals(PosTag.ROOT_POS_TAG)) {
startIndex = head.getToken().getStartIndex();
}
}
if (startIndex < 0)
csvFileWriter.write(CSV.format("ROOT"));
else
csvFileWriter.write(CSV.getColumnLabel(startIndexMap.get(startIndex)) + CSV.getCsvSeparator());
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
for (PosTaggedToken posTaggedToken : guessedTokens) {
if (posTaggedToken != null) {
DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
double prob = 1.0;
if (guessedArc != null) {
Transition transition = guessedConfiguration.getTransition(guessedArc);
if (transition != null)
prob = transition.getDecision().getProbability();
}
csvFileWriter.write(CSV.format(prob));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
} else {
csvFileWriter.write("\n");
csvFileWriter.write("\n");
}
// have more configurations
}
// next guessed configuration
csvFileWriter.flush();
}
Aggregations