Search in sources :

Example 1 with PosTag

use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.

the class PosTaggerFeatureParser method getRules.

/**
 * @param ruleDescriptors
 * @return
 * @throws TalismaneException
 *           if a rule is incorrectly configured
 */
public List<PosTaggerRule> getRules(List<String> ruleDescriptors) throws TalismaneException {
    List<PosTaggerRule> rules = new ArrayList<PosTaggerRule>();
    FunctionDescriptorParser descriptorParser = new FunctionDescriptorParser();
    for (String ruleDescriptor : ruleDescriptors) {
        LOG.debug(ruleDescriptor);
        if (ruleDescriptor.length() > 0 && !ruleDescriptor.startsWith("#")) {
            String[] ruleParts = ruleDescriptor.split("\t");
            String posTagCode = ruleParts[0];
            PosTag posTag = null;
            boolean negative = false;
            String descriptor = null;
            String descriptorName = null;
            if (ruleParts.length > 2) {
                descriptor = ruleParts[2];
                descriptorName = ruleParts[1];
            } else {
                descriptor = ruleParts[1];
            }
            if (posTagCode.length() == 0) {
                if (descriptorName == null) {
                    throw new TalismaneException("Rule without PosTag must have a name.");
                }
            } else {
                if (posTagCode.startsWith("!")) {
                    negative = true;
                    posTagCode = posTagCode.substring(1);
                }
                posTag = TalismaneSession.get(sessionId).getPosTagSet().getPosTag(posTagCode);
            }
            FunctionDescriptor functionDescriptor = descriptorParser.parseDescriptor(descriptor);
            if (descriptorName != null)
                functionDescriptor.setDescriptorName(descriptorName);
            List<PosTaggerFeature<?>> myFeatures = this.parseDescriptor(functionDescriptor);
            if (posTag != null) {
                for (PosTaggerFeature<?> feature : myFeatures) {
                    if (feature instanceof BooleanFeature) {
                        @SuppressWarnings("unchecked") BooleanFeature<PosTaggerContext> condition = (BooleanFeature<PosTaggerContext>) feature;
                        PosTaggerRule rule = new PosTaggerRule(condition, posTag);
                        rule.setNegative(negative);
                        rules.add(rule);
                    } else {
                        throw new TalismaneException("Rule must be based on a boolean feature.");
                    }
                }
            // next feature
            }
        // is it a rule, or just a descriptor
        }
    // proper rule descriptor
    }
    // next rule descriptor
    return rules;
}
Also used : TalismaneException(com.joliciel.talismane.TalismaneException) ArrayList(java.util.ArrayList) FunctionDescriptorParser(com.joliciel.talismane.machineLearning.features.FunctionDescriptorParser) FunctionDescriptor(com.joliciel.talismane.machineLearning.features.FunctionDescriptor) PosTaggerContext(com.joliciel.talismane.posTagger.PosTaggerContext) BooleanFeature(com.joliciel.talismane.machineLearning.features.BooleanFeature) PosTag(com.joliciel.talismane.posTagger.PosTag)

Example 2 with PosTag

use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.

the class NgramFeature method checkInternal.

@Override
public FeatureResult<String> checkInternal(PosTaggerContext context, RuntimeEnvironment env) throws TalismaneException {
    FeatureResult<String> result = null;
    FeatureResult<Integer> nResult = nFeature.check(context, env);
    if (nResult != null) {
        int n = nResult.getOutcome();
        int historyToFind = n - 1;
        int historyFound = 0;
        if (context.getToken().getIndex() >= historyToFind - 1) {
            String ngram = "";
            int i = 0;
            while (historyFound < historyToFind) {
                String posTagCode = null;
                boolean isEmptyTag = false;
                if (context.getHistory().size() > i) {
                    PosTag posTag = context.getHistory().get(context.getHistory().size() - i - 1).getTag();
                    posTagCode = posTag.getCode();
                    if (posTag.isEmpty())
                        isEmptyTag = true;
                } else {
                    posTagCode = START_TOKEN;
                }
                if (!isEmptyTag) {
                    if (historyFound > 0)
                        ngram = "," + ngram;
                    ngram = posTagCode + ngram;
                    historyFound++;
                }
                i++;
            }
            result = this.generateResult(ngram);
        }
    }
    // have n
    return result;
}
Also used : PosTag(com.joliciel.talismane.posTagger.PosTag)

Example 3 with PosTag

use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.

the class PosTagFScoreCalculator method onNextPosTagSequence.

@Override
public void onNextPosTagSequence(PosTagSequence realSequence, List<PosTagSequence> guessedSequences) throws TalismaneException {
    PosTagSequence guessedSequence = guessedSequences.get(0);
    int j = 0;
    for (int i = 0; i < realSequence.size(); i++) {
        TaggedToken<PosTag> realToken = realSequence.get(i);
        TaggedToken<PosTag> testToken = guessedSequence.get(j);
        // special handling for null tags & empty tokens
        if (realToken.getTag().equals(PosTag.NULL_POS_TAG)) {
            // required comparisons.
            if (testToken.getToken().isEmpty()) {
                j++;
            }
            continue;
        } else if (testToken.getToken().isEmpty() && !realToken.getToken().isEmpty()) {
            // If the test token is empty, but the real token isn't, we skip
            // this as well
            // Again, we assume the previous non-empty token took care of
            // any required comparisons.
            j++;
            testToken = guessedSequence.get(j);
        }
        boolean tokenError = false;
        if (realToken.getToken().getStartIndex() == testToken.getToken().getStartIndex() && realToken.getToken().getEndIndex() == testToken.getToken().getEndIndex()) {
            // no token error
            j++;
            if (j == guessedSequence.size()) {
                j--;
            }
        } else {
            tokenError = true;
            while (realToken.getToken().getEndIndex() >= testToken.getToken().getEndIndex()) {
                j++;
                if (j == guessedSequence.size()) {
                    j--;
                    break;
                }
                testToken = guessedSequence.get(j);
            }
        }
        if (LOG.isTraceEnabled()) {
            LOG.trace("Token " + testToken.getToken().getAnalyisText() + ", guessed: " + testToken.getTag().getCode() + " (" + testToken.getDecision().getProbability() + "), actual: " + realToken.getTag().getCode());
        }
        String result = testToken.getTag().getCode();
        if (tokenError)
            result = "TOKEN_ERROR";
        fScoreCalculator.increment(realToken.getTag().getCode(), result);
        if (testToken.getToken().getPossiblePosTags() == null || testToken.getToken().getPossiblePosTags().size() == 0)
            fscoreUnknownInLexicon.increment(realToken.getTag().getCode(), result);
        else
            fscoreKnownInLexicon.increment(realToken.getTag().getCode(), result);
    }
}
Also used : PosTag(com.joliciel.talismane.posTagger.PosTag) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence)

Example 4 with PosTag

use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.

the class PosTagLexicalCoverageTester method onNextPosTagSequence.

@Override
public void onNextPosTagSequence(PosTagSequence realSequence, List<PosTagSequence> guessedSequences) throws TalismaneException {
    PosTagSequence guessedSequence = guessedSequences.get(0);
    for (int i = 0; i < realSequence.size(); i++) {
        TaggedToken<PosTag> realToken = realSequence.get(i);
        TaggedToken<PosTag> testToken = guessedSequence.get(i);
        boolean tokenUnknown = realToken.getToken().getPossiblePosTags() != null && realToken.getToken().getPossiblePosTags().size() == 0;
        if (tokenUnknown) {
            fscoreUnknownInLexicon.increment(realToken.getTag().getCode(), testToken.getTag().getCode());
            unknownWordCount++;
            Integer countObj = unknownWords.get(realToken.getTag() + "|" + realToken.getToken().getAnalyisText());
            int count = countObj == null ? 0 : countObj.intValue();
            unknownWords.put(realToken.getTag() + "|" + realToken.getToken().getAnalyisText(), count + 1);
        } else {
            knownWordCount++;
            knownWords.add(realToken.getToken().getAnalyisText());
        }
        if (realToken.getTag().getOpenClassIndicator().isClosed() && !realToken.getToken().getPossiblePosTags().contains(realToken.getTag())) {
            closedCategoryMismatches.add(realToken.getTag() + "|" + realToken.getToken().getAnalyisText());
        }
    }
}
Also used : PosTag(com.joliciel.talismane.posTagger.PosTag) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence)

Example 5 with PosTag

use of com.joliciel.talismane.posTagger.PosTag in project talismane by joliciel-informatique.

the class PosTagSetFeature method checkInternal.

@Override
public FeatureResult<List<WeightedOutcome<String>>> checkInternal(TokenWrapper context, RuntimeEnvironment env) {
    PosTagSet posTagSet = TalismaneSession.get(sessionId).getPosTagSet();
    Set<PosTag> posTags = posTagSet.getTags();
    List<WeightedOutcome<String>> resultList = new ArrayList<WeightedOutcome<String>>();
    for (PosTag posTag : posTags) {
        resultList.add(new WeightedOutcome<String>(posTag.getCode(), 1.0));
    }
    return this.generateResult(resultList);
}
Also used : PosTagSet(com.joliciel.talismane.posTagger.PosTagSet) PosTag(com.joliciel.talismane.posTagger.PosTag) ArrayList(java.util.ArrayList) WeightedOutcome(com.joliciel.talismane.utils.WeightedOutcome)

Aggregations

PosTag (com.joliciel.talismane.posTagger.PosTag)17 ArrayList (java.util.ArrayList)6 Token (com.joliciel.talismane.tokeniser.Token)5 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)3 PosTagSet (com.joliciel.talismane.posTagger.PosTagSet)3 WeightedOutcome (com.joliciel.talismane.utils.WeightedOutcome)3 List (java.util.List)3 LexicalEntry (com.joliciel.talismane.lexicon.LexicalEntry)2 PosTaggerLexicon (com.joliciel.talismane.lexicon.PosTaggerLexicon)2 TalismaneException (com.joliciel.talismane.TalismaneException)1 TalismaneTest (com.joliciel.talismane.TalismaneTest)1 BooleanFeature (com.joliciel.talismane.machineLearning.features.BooleanFeature)1 FunctionDescriptor (com.joliciel.talismane.machineLearning.features.FunctionDescriptor)1 FunctionDescriptorParser (com.joliciel.talismane.machineLearning.features.FunctionDescriptorParser)1 PosTaggerContext (com.joliciel.talismane.posTagger.PosTaggerContext)1 UnknownPosTagException (com.joliciel.talismane.posTagger.UnknownPosTagException)1 Config (com.typesafe.config.Config)1 FileOutputStream (java.io.FileOutputStream)1 ObjectOutputStream (java.io.ObjectOutputStream)1 ZipEntry (java.util.zip.ZipEntry)1