use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.
the class PosTaggerFeatureParser method getRules.
/**
* @param ruleDescriptors
* @return
* @throws TalismaneException
* if a rule is incorrectly configured
*/
public List<PosTaggerRule> getRules(List<String> ruleDescriptors) throws TalismaneException {
List<PosTaggerRule> rules = new ArrayList<PosTaggerRule>();
FunctionDescriptorParser descriptorParser = new FunctionDescriptorParser();
for (String ruleDescriptor : ruleDescriptors) {
LOG.debug(ruleDescriptor);
if (ruleDescriptor.length() > 0 && !ruleDescriptor.startsWith("#")) {
String[] ruleParts = ruleDescriptor.split("\t");
String posTagCode = ruleParts[0];
PosTag posTag = null;
boolean negative = false;
String descriptor = null;
String descriptorName = null;
if (ruleParts.length > 2) {
descriptor = ruleParts[2];
descriptorName = ruleParts[1];
} else {
descriptor = ruleParts[1];
}
if (posTagCode.length() == 0) {
if (descriptorName == null) {
throw new TalismaneException("Rule without PosTag must have a name.");
}
} else {
if (posTagCode.startsWith("!")) {
negative = true;
posTagCode = posTagCode.substring(1);
}
posTag = TalismaneSession.get(sessionId).getPosTagSet().getPosTag(posTagCode);
}
FunctionDescriptor functionDescriptor = descriptorParser.parseDescriptor(descriptor);
if (descriptorName != null)
functionDescriptor.setDescriptorName(descriptorName);
List<PosTaggerFeature<?>> myFeatures = this.parseDescriptor(functionDescriptor);
if (posTag != null) {
for (PosTaggerFeature<?> feature : myFeatures) {
if (feature instanceof BooleanFeature) {
@SuppressWarnings("unchecked") BooleanFeature<PosTaggerContext> condition = (BooleanFeature<PosTaggerContext>) feature;
PosTaggerRule rule = new PosTaggerRule(condition, posTag);
rule.setNegative(negative);
rules.add(rule);
} else {
throw new TalismaneException("Rule must be based on a boolean feature.");
}
}
// next feature
}
// is it a rule, or just a descriptor
}
// proper rule descriptor
}
// next rule descriptor
return rules;
}
use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.
the class NgramFeature method checkInternal.
@Override
public FeatureResult<String> checkInternal(PosTaggerContext context, RuntimeEnvironment env) throws TalismaneException {
FeatureResult<String> result = null;
FeatureResult<Integer> nResult = nFeature.check(context, env);
if (nResult != null) {
int n = nResult.getOutcome();
int historyToFind = n - 1;
int historyFound = 0;
if (context.getToken().getIndex() >= historyToFind - 1) {
String ngram = "";
int i = 0;
while (historyFound < historyToFind) {
String posTagCode = null;
boolean isEmptyTag = false;
if (context.getHistory().size() > i) {
PosTag posTag = context.getHistory().get(context.getHistory().size() - i - 1).getTag();
posTagCode = posTag.getCode();
if (posTag.isEmpty())
isEmptyTag = true;
} else {
posTagCode = START_TOKEN;
}
if (!isEmptyTag) {
if (historyFound > 0)
ngram = "," + ngram;
ngram = posTagCode + ngram;
historyFound++;
}
i++;
}
result = this.generateResult(ngram);
}
}
// have n
return result;
}
use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.
the class PosTagFScoreCalculator method onNextPosTagSequence.
@Override
public void onNextPosTagSequence(PosTagSequence realSequence, List<PosTagSequence> guessedSequences) throws TalismaneException {
PosTagSequence guessedSequence = guessedSequences.get(0);
int j = 0;
for (int i = 0; i < realSequence.size(); i++) {
TaggedToken<PosTag> realToken = realSequence.get(i);
TaggedToken<PosTag> testToken = guessedSequence.get(j);
// special handling for null tags & empty tokens
if (realToken.getTag().equals(PosTag.NULL_POS_TAG)) {
// required comparisons.
if (testToken.getToken().isEmpty()) {
j++;
}
continue;
} else if (testToken.getToken().isEmpty() && !realToken.getToken().isEmpty()) {
// If the test token is empty, but the real token isn't, we skip
// this as well
// Again, we assume the previous non-empty token took care of
// any required comparisons.
j++;
testToken = guessedSequence.get(j);
}
boolean tokenError = false;
if (realToken.getToken().getStartIndex() == testToken.getToken().getStartIndex() && realToken.getToken().getEndIndex() == testToken.getToken().getEndIndex()) {
// no token error
j++;
if (j == guessedSequence.size()) {
j--;
}
} else {
tokenError = true;
while (realToken.getToken().getEndIndex() >= testToken.getToken().getEndIndex()) {
j++;
if (j == guessedSequence.size()) {
j--;
break;
}
testToken = guessedSequence.get(j);
}
}
if (LOG.isTraceEnabled()) {
LOG.trace("Token " + testToken.getToken().getAnalyisText() + ", guessed: " + testToken.getTag().getCode() + " (" + testToken.getDecision().getProbability() + "), actual: " + realToken.getTag().getCode());
}
String result = testToken.getTag().getCode();
if (tokenError)
result = "TOKEN_ERROR";
fScoreCalculator.increment(realToken.getTag().getCode(), result);
if (testToken.getToken().getPossiblePosTags() == null || testToken.getToken().getPossiblePosTags().size() == 0)
fscoreUnknownInLexicon.increment(realToken.getTag().getCode(), result);
else
fscoreKnownInLexicon.increment(realToken.getTag().getCode(), result);
}
}
use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.
the class PosTagLexicalCoverageTester method onNextPosTagSequence.
@Override
public void onNextPosTagSequence(PosTagSequence realSequence, List<PosTagSequence> guessedSequences) throws TalismaneException {
PosTagSequence guessedSequence = guessedSequences.get(0);
for (int i = 0; i < realSequence.size(); i++) {
TaggedToken<PosTag> realToken = realSequence.get(i);
TaggedToken<PosTag> testToken = guessedSequence.get(i);
boolean tokenUnknown = realToken.getToken().getPossiblePosTags() != null && realToken.getToken().getPossiblePosTags().size() == 0;
if (tokenUnknown) {
fscoreUnknownInLexicon.increment(realToken.getTag().getCode(), testToken.getTag().getCode());
unknownWordCount++;
Integer countObj = unknownWords.get(realToken.getTag() + "|" + realToken.getToken().getAnalyisText());
int count = countObj == null ? 0 : countObj.intValue();
unknownWords.put(realToken.getTag() + "|" + realToken.getToken().getAnalyisText(), count + 1);
} else {
knownWordCount++;
knownWords.add(realToken.getToken().getAnalyisText());
}
if (realToken.getTag().getOpenClassIndicator().isClosed() && !realToken.getToken().getPossiblePosTags().contains(realToken.getTag())) {
closedCategoryMismatches.add(realToken.getTag() + "|" + realToken.getToken().getAnalyisText());
}
}
}
use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.
the class PosTagSetFeature method checkInternal.
@Override
public FeatureResult<List<WeightedOutcome<String>>> checkInternal(TokenWrapper context, RuntimeEnvironment env) {
PosTagSet posTagSet = TalismaneSession.get(sessionId).getPosTagSet();
Set<PosTag> posTags = posTagSet.getTags();
List<WeightedOutcome<String>> resultList = new ArrayList<WeightedOutcome<String>>();
for (PosTag posTag : posTags) {
resultList.add(new WeightedOutcome<String>(posTag.getCode(), 1.0));
}
return this.generateResult(resultList);
}
Aggregations