Search in sources :

Example 66 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class LastWordInSentenceFeature method checkInternal.

@Override
public FeatureResult<Boolean> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
    TokenWrapper innerWrapper = this.getToken(tokenWrapper, env);
    if (innerWrapper == null)
        return null;
    Token token = innerWrapper.getToken();
    FeatureResult<Boolean> result = null;
    boolean lastWord = (token.getIndex() == token.getTokenSequence().size() - 1);
    result = this.generateResult(lastWord);
    return result;
}
Also used : Token(com.joliciel.talismane.tokeniser.Token)

Example 67 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class NextTokensFeature method checkInternal.

@Override
public FeatureResult<String> checkInternal(PossibleSentenceBoundary context, RuntimeEnvironment env) throws TalismaneException {
    FeatureResult<String> result = null;
    FeatureResult<Integer> nResult = nFeature.check(context, env);
    if (nResult != null) {
        int n = nResult.getOutcome();
        int tokenIndex = context.getTokenIndexWithWhitespace();
        String tokenString = "";
        for (int i = 0; i <= n; i++) {
            int relativeIndex = tokenIndex + i;
            if (relativeIndex < context.getTokenSequence().listWithWhiteSpace().size()) {
                Token token = context.getTokenSequence().listWithWhiteSpace().get(relativeIndex);
                tokenString = tokenString + token.getOriginalText();
            } else {
                tokenString = tokenString + "[[END]]";
            }
        }
        result = this.generateResult(tokenString);
    }
    return result;
}
Also used : Token(com.joliciel.talismane.tokeniser.Token)

Example 68 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class PreviousTokensFeature method checkInternal.

@Override
public FeatureResult<String> checkInternal(PossibleSentenceBoundary context, RuntimeEnvironment env) throws TalismaneException {
    FeatureResult<String> result = null;
    FeatureResult<Integer> nResult = nFeature.check(context, env);
    if (nResult != null) {
        int n = nResult.getOutcome();
        int tokenIndex = context.getTokenIndexWithWhitespace();
        String tokenString = "";
        for (int i = 0; i <= n; i++) {
            int relativeIndex = tokenIndex - i;
            if (relativeIndex >= 0) {
                Token token = context.getTokenSequence().listWithWhiteSpace().get(relativeIndex);
                tokenString = token.getOriginalText() + tokenString;
            } else {
                tokenString = "[[START]]" + tokenString;
            }
        }
        result = this.generateResult(tokenString);
    }
    return result;
}
Also used : Token(com.joliciel.talismane.tokeniser.Token)

Example 69 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class NextLetterCapitalFeature method checkInternal.

@Override
public FeatureResult<String> checkInternal(PossibleSentenceBoundary context, RuntimeEnvironment env) {
    FeatureResult<String> result = null;
    int tokenIndex = context.getTokenIndexWithWhitespace();
    boolean isInitial = false;
    if (context.getBoundaryString().equals(".")) {
        Token previousToken = null;
        if (tokenIndex > 0)
            previousToken = context.getTokenSequence().listWithWhiteSpace().get(tokenIndex - 1);
        if (previousToken != null && Character.isUpperCase(previousToken.getOriginalText().charAt(0))) {
            if (previousToken.getOriginalText().length() < 2)
                isInitial = true;
        }
    }
    boolean hasWhiteSpace = false;
    boolean hasQuote = false;
    boolean hasDash = false;
    boolean nextLetterCapital = false;
    if (tokenIndex >= 0) {
        for (int i = tokenIndex + 1; i < context.getTokenSequence().listWithWhiteSpace().size(); i++) {
            Token token = context.getTokenSequence().listWithWhiteSpace().get(i);
            if (token.isWhiteSpace()) {
                hasWhiteSpace = true;
            } else if (token.getText().equals("\"") || token.getText().equals("“") || token.getText().equals("„") || token.getText().equals("‟") || token.getText().equals("″")) {
                hasQuote = true;
                if (hasDash)
                    break;
            } else if (token.getText().equals("-")) {
                hasDash = true;
                if (hasQuote)
                    break;
            } else if (token.isSeparator()) {
                nextLetterCapital = false;
                break;
            } else {
                nextLetterCapital = (Character.isUpperCase(token.getOriginalText().charAt(0)));
                break;
            }
        }
    }
    nextLetterCapital = nextLetterCapital & hasWhiteSpace;
    if (nextLetterCapital && isInitial)
        result = this.generateResult("CapitalAfterInitial");
    else if (nextLetterCapital && hasQuote)
        result = this.generateResult("CapitalAfterQuote");
    else if (nextLetterCapital && hasDash)
        result = this.generateResult("CapitalAfterDash");
    else if (nextLetterCapital)
        result = this.generateResult("true");
    else
        result = this.generateResult("false");
    return result;
}
Also used : Token(com.joliciel.talismane.tokeniser.Token)

Aggregations

Token (com.joliciel.talismane.tokeniser.Token)69 TokenSequence (com.joliciel.talismane.tokeniser.TokenSequence)16 ArrayList (java.util.ArrayList)15 Sentence (com.joliciel.talismane.rawText.Sentence)14 Decision (com.joliciel.talismane.machineLearning.Decision)12 Config (com.typesafe.config.Config)12 TalismaneTest (com.joliciel.talismane.TalismaneTest)11 PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)11 Test (org.junit.Test)11 TalismaneException (com.joliciel.talismane.TalismaneException)7 RuntimeEnvironment (com.joliciel.talismane.machineLearning.features.RuntimeEnvironment)7 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)7 TokeniserOutcome (com.joliciel.talismane.tokeniser.TokeniserOutcome)7 List (java.util.List)7 WeightedOutcome (com.joliciel.talismane.utils.WeightedOutcome)6 HashMap (java.util.HashMap)6 StringLiteralFeature (com.joliciel.talismane.machineLearning.features.StringLiteralFeature)5 PosTag (com.joliciel.talismane.posTagger.PosTag)5 PosTaggerContext (com.joliciel.talismane.posTagger.PosTaggerContext)5 PosTaggerContextImpl (com.joliciel.talismane.posTagger.PosTaggerContextImpl)5