Search in sources :

Example 26 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class LexicalAttributeFeatureTest method testCheckInternalMultipleEntries.

@Test
public void testCheckInternalMultipleEntries() throws Exception {
    System.setProperty("config.file", "src/test/resources/testWithLex.conf");
    ConfigFactory.invalidateCaches();
    final Config config = ConfigFactory.load();
    final String sessionId = "test";
    Sentence sentence = new Sentence("je demande", sessionId);
    TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
    Token token = new Token("demande", tokenSequence, 1, "je ".length(), "je demande".length(), sessionId);
    Decision decision = new Decision("V", 1.0);
    final PosTaggedToken posTaggedToken = new PosTaggedToken(token, decision, sessionId);
    PosTaggedTokenAddressFunction<PosTaggerContext> addressFunction = new AbstractPosTaggedTokenAddressFunction() {

        @Override
        protected FeatureResult<PosTaggedTokenWrapper> checkInternal(PosTaggerContext context, RuntimeEnvironment env) {
            return this.generateResult(posTaggedToken);
        }
    };
    StringLiteralFeature<PosTaggedTokenWrapper> person = new StringLiteralFeature<>(LexicalAttribute.Person.name());
    LexicalAttributeFeature<PosTaggerContext> feature = new LexicalAttributeFeature<>(addressFunction, person);
    PosTagSequence history = new PosTagSequence(tokenSequence);
    PosTaggerContext context = new PosTaggerContextImpl(token, history);
    RuntimeEnvironment env = new RuntimeEnvironment();
    FeatureResult<List<WeightedOutcome<String>>> featureResult = feature.checkInternal(context, env);
    List<WeightedOutcome<String>> outcomes = featureResult.getOutcome();
    System.out.println(outcomes);
    for (WeightedOutcome<String> outcome : outcomes) {
        assertTrue("1".equals(outcome.getOutcome()) || "3".equals(outcome.getOutcome()));
    }
    assertEquals(2, outcomes.size());
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) Config(com.typesafe.config.Config) StringLiteralFeature(com.joliciel.talismane.machineLearning.features.StringLiteralFeature) WeightedOutcome(com.joliciel.talismane.utils.WeightedOutcome) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) Token(com.joliciel.talismane.tokeniser.Token) PosTaggerContext(com.joliciel.talismane.posTagger.PosTaggerContext) Decision(com.joliciel.talismane.machineLearning.Decision) PosTaggerContextImpl(com.joliciel.talismane.posTagger.PosTaggerContextImpl) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) List(java.util.List) Sentence(com.joliciel.talismane.rawText.Sentence) TokenSequence(com.joliciel.talismane.tokeniser.TokenSequence) TalismaneTest(com.joliciel.talismane.TalismaneTest) Test(org.junit.Test)

Example 27 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class PatternTokeniserTest method testTokenise.

@Test
public void testTokenise() throws Exception {
    System.setProperty("config.file", "src/test/resources/test.conf");
    ConfigFactory.invalidateCaches();
    final Config config = ConfigFactory.load();
    final String sessionId = "test";
    String[] labels = new String[0];
    final Sentence sentence = new Sentence("Je n'ai pas l'ourang-outan sur www.google.com.", sessionId);
    List<Annotation<TokenPlaceholder>> annotations = new ArrayList<>();
    Annotation<TokenPlaceholder> annotation = new Annotation<TokenPlaceholder>("Je n'ai pas l'ourang-outan sur ".length(), "Je n'ai pas l'ourang-outan sur www.google.com".length(), new TokenPlaceholder("URL", ""), labels);
    annotations.add(annotation);
    sentence.addAnnotations(annotations);
    List<String> tokeniserPatterns = new ArrayList<String>();
    tokeniserPatterns.add("IS_NOT_SEPARATOR -_");
    tokeniserPatterns.add("IS_SEPARATOR_AFTER '");
    TokeniserPatternManager patternManager = new TokeniserPatternManager(tokeniserPatterns, sessionId);
    PatternTokeniser tokeniser = new PatternTokeniser(null, patternManager, null, 1, sessionId);
    List<TokenSequence> tokenSequences = tokeniser.tokenise(sentence);
    TokenSequence tokenSequence = tokenSequences.get(0);
    LOG.debug(tokenSequence.toString());
    assertEquals(9, tokenSequence.size());
    int i = 0;
    for (Token token : tokenSequence) {
        if (i == 0) {
            assertEquals("Je", token.getAnalyisText());
        } else if (i == 1) {
            assertEquals("n'", token.getAnalyisText());
        } else if (i == 2) {
            assertEquals("ai", token.getAnalyisText());
        } else if (i == 3) {
            assertEquals("pas", token.getAnalyisText());
        } else if (i == 4) {
            assertEquals("l'", token.getAnalyisText());
        } else if (i == 5) {
            assertEquals("ourang-outan", token.getAnalyisText());
        } else if (i == 6) {
            assertEquals("sur", token.getAnalyisText());
        } else if (i == 7) {
            assertEquals("URL", token.getAnalyisText());
        } else if (i == 8) {
            assertEquals(".", token.getAnalyisText());
        }
        i++;
    }
}
Also used : Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) Token(com.joliciel.talismane.tokeniser.Token) Annotation(com.joliciel.talismane.Annotation) TokenPlaceholder(com.joliciel.talismane.sentenceAnnotators.TokenPlaceholder) Sentence(com.joliciel.talismane.rawText.Sentence) TokenSequence(com.joliciel.talismane.tokeniser.TokenSequence) TalismaneTest(com.joliciel.talismane.TalismaneTest) Test(org.junit.Test)

Example 28 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class AndRangeFeature method checkInternal.

@Override
public FeatureResult<Boolean> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
    TokenWrapper innerWrapper = this.getToken(tokenWrapper, env);
    if (innerWrapper == null)
        return null;
    Token token = innerWrapper.getToken();
    FeatureResult<Boolean> featureResult = null;
    FeatureResult<Integer> startResult = startFeature.check(innerWrapper, env);
    FeatureResult<Integer> endResult = endFeature.check(innerWrapper, env);
    if (startResult != null && endResult != null) {
        int start = startResult.getOutcome();
        int end = endResult.getOutcome();
        if (start < 0)
            start = 0;
        if (end > token.getTokenSequence().size() - 1)
            end = token.getTokenSequence().size() - 1;
        if (start <= end) {
            Boolean result = Boolean.TRUE;
            for (int i = start; i <= end; i++) {
                Token oneToken = token.getTokenSequence().get(i);
                FeatureResult<Boolean> criterionResult = this.criterion.check(oneToken, env);
                if (criterionResult == null) {
                    result = null;
                    break;
                }
                result = result && criterionResult.getOutcome();
            }
            if (result != null) {
                featureResult = this.generateResult(result);
            }
        }
    }
    return featureResult;
}
Also used : Token(com.joliciel.talismane.tokeniser.Token)

Example 29 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class TokeniserPatternsFeature method checkInternal.

@Override
public FeatureResult<List<WeightedOutcome<String>>> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
    Token token = tokenWrapper.getToken();
    List<WeightedOutcome<String>> resultList = new ArrayList<WeightedOutcome<String>>();
    for (TokenPatternMatch tokenMatch : token.getMatches()) {
        if (tokenMatch.getIndex() == tokenMatch.getPattern().getIndexesToTest().get(0)) {
            resultList.add(new WeightedOutcome<String>(tokenMatch.getPattern().getName(), 1.0));
        }
    }
    return this.generateResult(resultList);
}
Also used : ArrayList(java.util.ArrayList) WeightedOutcome(com.joliciel.talismane.utils.WeightedOutcome) Token(com.joliciel.talismane.tokeniser.Token) TokenPatternMatch(com.joliciel.talismane.tokeniser.patterns.TokenPatternMatch)

Example 30 with Token

use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.

the class UnknownWordFeature method checkInternal.

@Override
public FeatureResult<Boolean> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
    TokenWrapper innerWrapper = this.getToken(tokenWrapper, env);
    if (innerWrapper == null)
        return null;
    Token token = innerWrapper.getToken();
    FeatureResult<Boolean> result = null;
    boolean unknownWord = token.getPossiblePosTags().size() == 0;
    result = this.generateResult(unknownWord);
    return result;
}
Also used : Token(com.joliciel.talismane.tokeniser.Token)

Aggregations

Token (com.joliciel.talismane.tokeniser.Token)69 TokenSequence (com.joliciel.talismane.tokeniser.TokenSequence)16 ArrayList (java.util.ArrayList)15 Sentence (com.joliciel.talismane.rawText.Sentence)14 Decision (com.joliciel.talismane.machineLearning.Decision)12 Config (com.typesafe.config.Config)12 TalismaneTest (com.joliciel.talismane.TalismaneTest)11 PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)11 Test (org.junit.Test)11 TalismaneException (com.joliciel.talismane.TalismaneException)7 RuntimeEnvironment (com.joliciel.talismane.machineLearning.features.RuntimeEnvironment)7 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)7 TokeniserOutcome (com.joliciel.talismane.tokeniser.TokeniserOutcome)7 List (java.util.List)7 WeightedOutcome (com.joliciel.talismane.utils.WeightedOutcome)6 HashMap (java.util.HashMap)6 StringLiteralFeature (com.joliciel.talismane.machineLearning.features.StringLiteralFeature)5 PosTag (com.joliciel.talismane.posTagger.PosTag)5 PosTaggerContext (com.joliciel.talismane.posTagger.PosTaggerContext)5 PosTaggerContextImpl (com.joliciel.talismane.posTagger.PosTaggerContextImpl)5