Search in sources :

Example 1 with PosTaggerContext

use of com.joliciel.talismane.posTagger.PosTaggerContext in project talismane by joliciel-informatique.

the class PosTaggerFeatureParser method getRules.

/**
 * @param ruleDescriptors
 * @return
 * @throws TalismaneException
 *           if a rule is incorrectly configured
 */
public List<PosTaggerRule> getRules(List<String> ruleDescriptors) throws TalismaneException {
    List<PosTaggerRule> rules = new ArrayList<PosTaggerRule>();
    FunctionDescriptorParser descriptorParser = new FunctionDescriptorParser();
    for (String ruleDescriptor : ruleDescriptors) {
        LOG.debug(ruleDescriptor);
        if (ruleDescriptor.length() > 0 && !ruleDescriptor.startsWith("#")) {
            String[] ruleParts = ruleDescriptor.split("\t");
            String posTagCode = ruleParts[0];
            PosTag posTag = null;
            boolean negative = false;
            String descriptor = null;
            String descriptorName = null;
            if (ruleParts.length > 2) {
                descriptor = ruleParts[2];
                descriptorName = ruleParts[1];
            } else {
                descriptor = ruleParts[1];
            }
            if (posTagCode.length() == 0) {
                if (descriptorName == null) {
                    throw new TalismaneException("Rule without PosTag must have a name.");
                }
            } else {
                if (posTagCode.startsWith("!")) {
                    negative = true;
                    posTagCode = posTagCode.substring(1);
                }
                posTag = TalismaneSession.get(sessionId).getPosTagSet().getPosTag(posTagCode);
            }
            FunctionDescriptor functionDescriptor = descriptorParser.parseDescriptor(descriptor);
            if (descriptorName != null)
                functionDescriptor.setDescriptorName(descriptorName);
            List<PosTaggerFeature<?>> myFeatures = this.parseDescriptor(functionDescriptor);
            if (posTag != null) {
                for (PosTaggerFeature<?> feature : myFeatures) {
                    if (feature instanceof BooleanFeature) {
                        @SuppressWarnings("unchecked") BooleanFeature<PosTaggerContext> condition = (BooleanFeature<PosTaggerContext>) feature;
                        PosTaggerRule rule = new PosTaggerRule(condition, posTag);
                        rule.setNegative(negative);
                        rules.add(rule);
                    } else {
                        throw new TalismaneException("Rule must be based on a boolean feature.");
                    }
                }
            // next feature
            }
        // is it a rule, or just a descriptor
        }
    // proper rule descriptor
    }
    // next rule descriptor
    return rules;
}
Also used : TalismaneException(com.joliciel.talismane.TalismaneException) ArrayList(java.util.ArrayList) FunctionDescriptorParser(com.joliciel.talismane.machineLearning.features.FunctionDescriptorParser) FunctionDescriptor(com.joliciel.talismane.machineLearning.features.FunctionDescriptor) PosTaggerContext(com.joliciel.talismane.posTagger.PosTaggerContext) BooleanFeature(com.joliciel.talismane.machineLearning.features.BooleanFeature) PosTag(com.joliciel.talismane.posTagger.PosTag)

Example 2 with PosTaggerContext

use of com.joliciel.talismane.posTagger.PosTaggerContext in project talismane by joliciel-informatique.

the class PosTagFeatureTester method onNextPosTagSequence.

@Override
public void onNextPosTagSequence(PosTagSequence posTagSequence) throws TalismaneException {
    PosTagSequence currentHistory = new PosTagSequence(posTagSequence.getTokenSequence());
    for (PosTaggedToken posTaggedToken : posTagSequence) {
        if (testWords.contains(posTaggedToken.getToken().getAnalyisText().toLowerCase())) {
            StringBuilder sb = new StringBuilder();
            boolean foundToken = false;
            for (PosTaggedToken taggedToken : posTagSequence) {
                if (taggedToken.equals(posTaggedToken)) {
                    sb.append(" [" + taggedToken.getToken().getOriginalText().replace(' ', '_') + "/" + taggedToken.getTag().toString() + "]");
                    foundToken = true;
                } else if (foundToken) {
                    sb.append(" " + taggedToken.getToken().getOriginalText().replace(' ', '_'));
                } else {
                    sb.append(" " + taggedToken.getToken().getOriginalText().replace(' ', '_') + "/" + taggedToken.getTag().toString());
                }
            }
            LOG.debug(sb.toString());
            String classification = posTaggedToken.getTag().getCode();
            PosTaggerContext context = new PosTaggerContextImpl(posTaggedToken.getToken(), currentHistory);
            List<FeatureResult<?>> posTagFeatureResults = new ArrayList<FeatureResult<?>>();
            for (PosTaggerFeature<?> posTaggerFeature : posTaggerFeatures) {
                RuntimeEnvironment env = new RuntimeEnvironment();
                FeatureResult<?> featureResult = posTaggerFeature.check(context, env);
                if (featureResult != null)
                    posTagFeatureResults.add(featureResult);
            }
            if (LOG.isTraceEnabled()) {
                LOG.trace("Token: " + posTaggedToken.getToken().getAnalyisText());
                for (FeatureResult<?> result : posTagFeatureResults) {
                    LOG.trace(result.toString());
                }
            }
            for (FeatureResult<?> featureResult : posTagFeatureResults) {
                Map<String, List<String>> classificationMap = featureResultMap.get(featureResult.toString());
                if (classificationMap == null) {
                    classificationMap = new TreeMap<String, List<String>>();
                    featureResultMap.put(featureResult.toString(), classificationMap);
                }
                List<String> sentences = classificationMap.get(classification);
                if (sentences == null) {
                    sentences = new ArrayList<String>();
                    classificationMap.put(classification, sentences);
                }
                sentences.add(sb.toString());
            }
        }
        currentHistory.addPosTaggedToken(posTaggedToken);
    }
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) ArrayList(java.util.ArrayList) PosTaggerContext(com.joliciel.talismane.posTagger.PosTaggerContext) PosTaggerContextImpl(com.joliciel.talismane.posTagger.PosTaggerContextImpl) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) ArrayList(java.util.ArrayList) List(java.util.List) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult)

Example 3 with PosTaggerContext

use of com.joliciel.talismane.posTagger.PosTaggerContext in project talismane by joliciel-informatique.

the class CombinedLexicalAttributesTest method testCheckInternalMultipleEntries.

@Test
public void testCheckInternalMultipleEntries() throws Exception {
    System.setProperty("config.file", "src/test/resources/testWithLex.conf");
    ConfigFactory.invalidateCaches();
    final Config config = ConfigFactory.load();
    final String sessionId = "test";
    Sentence sentence = new Sentence("je demande", sessionId);
    TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
    Token token = new Token("demande", tokenSequence, 1, "je ".length(), "je demande".length(), sessionId);
    Decision decision = new Decision("V", 1.0);
    final PosTaggedToken posTaggedToken = new PosTaggedToken(token, decision, sessionId);
    PosTaggedTokenAddressFunction<PosTaggerContext> addressFunction = new AbstractPosTaggedTokenAddressFunction() {

        @Override
        protected FeatureResult<PosTaggedTokenWrapper> checkInternal(PosTaggerContext context, RuntimeEnvironment env) {
            return this.generateResult(posTaggedToken);
        }
    };
    StringLiteralFeature<PosTaggedTokenWrapper> person = new StringLiteralFeature<>(LexicalAttribute.Person.name());
    CombinedLexicalAttributesFeature<PosTaggerContext> feature = new CombinedLexicalAttributesFeature<>(addressFunction, person);
    PosTagSequence history = new PosTagSequence(tokenSequence);
    PosTaggerContext context = new PosTaggerContextImpl(token, history);
    RuntimeEnvironment env = new RuntimeEnvironment();
    FeatureResult<String> featureResult = feature.checkInternal(context, env);
    String outcome = featureResult.getOutcome();
    System.out.println(outcome);
    assertEquals("1;3", outcome);
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) Config(com.typesafe.config.Config) StringLiteralFeature(com.joliciel.talismane.machineLearning.features.StringLiteralFeature) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) Token(com.joliciel.talismane.tokeniser.Token) PosTaggerContext(com.joliciel.talismane.posTagger.PosTaggerContext) Decision(com.joliciel.talismane.machineLearning.Decision) PosTaggerContextImpl(com.joliciel.talismane.posTagger.PosTaggerContextImpl) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) Sentence(com.joliciel.talismane.rawText.Sentence) TokenSequence(com.joliciel.talismane.tokeniser.TokenSequence) TalismaneTest(com.joliciel.talismane.TalismaneTest) Test(org.junit.Test)

Example 4 with PosTaggerContext

use of com.joliciel.talismane.posTagger.PosTaggerContext in project talismane by joliciel-informatique.

the class CombinedLexicalAttributesTest method testCheckInternalMultipleAttributes.

@Test
public void testCheckInternalMultipleAttributes() throws Exception {
    System.setProperty("config.file", "src/test/resources/testWithLex.conf");
    ConfigFactory.invalidateCaches();
    final Config config = ConfigFactory.load();
    final String sessionId = "test";
    Sentence sentence = new Sentence("blah", sessionId);
    TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
    Token token = new Token("blah", tokenSequence, 1, "".length(), "blah".length(), sessionId);
    Decision decision = new Decision("V", 1.0);
    final PosTaggedToken posTaggedToken = new PosTaggedToken(token, decision, sessionId);
    PosTaggedTokenAddressFunction<PosTaggerContext> addressFunction = new AbstractPosTaggedTokenAddressFunction() {

        @Override
        protected FeatureResult<PosTaggedTokenWrapper> checkInternal(PosTaggerContext context, RuntimeEnvironment env) {
            return this.generateResult(posTaggedToken);
        }
    };
    StringLiteralFeature<PosTaggedTokenWrapper> person = new StringLiteralFeature<>(LexicalAttribute.Person.name());
    StringLiteralFeature<PosTaggedTokenWrapper> number = new StringLiteralFeature<>(LexicalAttribute.Number.name());
    CombinedLexicalAttributesFeature<PosTaggerContext> feature = new CombinedLexicalAttributesFeature<>(addressFunction, person, number);
    PosTagSequence history = new PosTagSequence(tokenSequence);
    PosTaggerContext context = new PosTaggerContextImpl(token, history);
    RuntimeEnvironment env = new RuntimeEnvironment();
    FeatureResult<String> featureResult = feature.checkInternal(context, env);
    String outcome = featureResult.getOutcome();
    System.out.println(outcome);
    assertEquals("1;3|p;s", outcome);
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) Config(com.typesafe.config.Config) StringLiteralFeature(com.joliciel.talismane.machineLearning.features.StringLiteralFeature) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) Token(com.joliciel.talismane.tokeniser.Token) PosTaggerContext(com.joliciel.talismane.posTagger.PosTaggerContext) Decision(com.joliciel.talismane.machineLearning.Decision) PosTaggerContextImpl(com.joliciel.talismane.posTagger.PosTaggerContextImpl) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) Sentence(com.joliciel.talismane.rawText.Sentence) TokenSequence(com.joliciel.talismane.tokeniser.TokenSequence) TalismaneTest(com.joliciel.talismane.TalismaneTest) Test(org.junit.Test)

Example 5 with PosTaggerContext

use of com.joliciel.talismane.posTagger.PosTaggerContext in project talismane by joliciel-informatique.

the class LexicalAttributeFeatureTest method testCheckInternalMultipleAttributes.

@Test
public void testCheckInternalMultipleAttributes() throws Exception {
    System.setProperty("config.file", "src/test/resources/testWithLex.conf");
    ConfigFactory.invalidateCaches();
    final Config config = ConfigFactory.load();
    final String sessionId = "test";
    Sentence sentence = new Sentence("blah", sessionId);
    TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
    Token token = new Token("blah", tokenSequence, 1, "".length(), "blah".length(), sessionId);
    Decision decision = new Decision("V", 1.0);
    final PosTaggedToken posTaggedToken = new PosTaggedToken(token, decision, sessionId);
    PosTaggedTokenAddressFunction<PosTaggerContext> addressFunction = new AbstractPosTaggedTokenAddressFunction() {

        @Override
        protected FeatureResult<PosTaggedTokenWrapper> checkInternal(PosTaggerContext context, RuntimeEnvironment env) {
            return this.generateResult(posTaggedToken);
        }
    };
    StringLiteralFeature<PosTaggedTokenWrapper> person = new StringLiteralFeature<>(LexicalAttribute.Person.name());
    StringLiteralFeature<PosTaggedTokenWrapper> number = new StringLiteralFeature<>(LexicalAttribute.Number.name());
    LexicalAttributeFeature<PosTaggerContext> feature = new LexicalAttributeFeature<>(addressFunction, person, number);
    PosTagSequence history = new PosTagSequence(tokenSequence);
    PosTaggerContext context = new PosTaggerContextImpl(token, history);
    RuntimeEnvironment env = new RuntimeEnvironment();
    FeatureResult<List<WeightedOutcome<String>>> featureResult = feature.checkInternal(context, env);
    List<WeightedOutcome<String>> outcomes = featureResult.getOutcome();
    System.out.println(outcomes);
    for (WeightedOutcome<String> outcome : outcomes) {
        assertTrue("3|p".equals(outcome.getOutcome()) || "1|s".equals(outcome.getOutcome()) || "3|s".equals(outcome.getOutcome()));
    }
    assertEquals(3, outcomes.size());
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) Config(com.typesafe.config.Config) StringLiteralFeature(com.joliciel.talismane.machineLearning.features.StringLiteralFeature) WeightedOutcome(com.joliciel.talismane.utils.WeightedOutcome) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) Token(com.joliciel.talismane.tokeniser.Token) PosTaggerContext(com.joliciel.talismane.posTagger.PosTaggerContext) Decision(com.joliciel.talismane.machineLearning.Decision) PosTaggerContextImpl(com.joliciel.talismane.posTagger.PosTaggerContextImpl) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) List(java.util.List) Sentence(com.joliciel.talismane.rawText.Sentence) TokenSequence(com.joliciel.talismane.tokeniser.TokenSequence) TalismaneTest(com.joliciel.talismane.TalismaneTest) Test(org.junit.Test)

Aggregations

PosTaggerContext (com.joliciel.talismane.posTagger.PosTaggerContext)7 RuntimeEnvironment (com.joliciel.talismane.machineLearning.features.RuntimeEnvironment)6 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)6 PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)6 PosTaggerContextImpl (com.joliciel.talismane.posTagger.PosTaggerContextImpl)6 TalismaneTest (com.joliciel.talismane.TalismaneTest)5 Decision (com.joliciel.talismane.machineLearning.Decision)5 StringLiteralFeature (com.joliciel.talismane.machineLearning.features.StringLiteralFeature)5 Sentence (com.joliciel.talismane.rawText.Sentence)5 Token (com.joliciel.talismane.tokeniser.Token)5 TokenSequence (com.joliciel.talismane.tokeniser.TokenSequence)5 Config (com.typesafe.config.Config)5 Test (org.junit.Test)5 List (java.util.List)4 WeightedOutcome (com.joliciel.talismane.utils.WeightedOutcome)3 ArrayList (java.util.ArrayList)2 TalismaneException (com.joliciel.talismane.TalismaneException)1 BooleanFeature (com.joliciel.talismane.machineLearning.features.BooleanFeature)1 FeatureResult (com.joliciel.talismane.machineLearning.features.FeatureResult)1 FunctionDescriptor (com.joliciel.talismane.machineLearning.features.FunctionDescriptor)1