use of com.joliciel.talismane.posTagger.PosTaggerContext in project talismane by joliciel-informatique.
the class PosTaggerFeatureParser method getRules.
/**
* @param ruleDescriptors
* @return
* @throws TalismaneException
* if a rule is incorrectly configured
*/
public List<PosTaggerRule> getRules(List<String> ruleDescriptors) throws TalismaneException {
List<PosTaggerRule> rules = new ArrayList<PosTaggerRule>();
FunctionDescriptorParser descriptorParser = new FunctionDescriptorParser();
for (String ruleDescriptor : ruleDescriptors) {
LOG.debug(ruleDescriptor);
if (ruleDescriptor.length() > 0 && !ruleDescriptor.startsWith("#")) {
String[] ruleParts = ruleDescriptor.split("\t");
String posTagCode = ruleParts[0];
PosTag posTag = null;
boolean negative = false;
String descriptor = null;
String descriptorName = null;
if (ruleParts.length > 2) {
descriptor = ruleParts[2];
descriptorName = ruleParts[1];
} else {
descriptor = ruleParts[1];
}
if (posTagCode.length() == 0) {
if (descriptorName == null) {
throw new TalismaneException("Rule without PosTag must have a name.");
}
} else {
if (posTagCode.startsWith("!")) {
negative = true;
posTagCode = posTagCode.substring(1);
}
posTag = TalismaneSession.get(sessionId).getPosTagSet().getPosTag(posTagCode);
}
FunctionDescriptor functionDescriptor = descriptorParser.parseDescriptor(descriptor);
if (descriptorName != null)
functionDescriptor.setDescriptorName(descriptorName);
List<PosTaggerFeature<?>> myFeatures = this.parseDescriptor(functionDescriptor);
if (posTag != null) {
for (PosTaggerFeature<?> feature : myFeatures) {
if (feature instanceof BooleanFeature) {
@SuppressWarnings("unchecked") BooleanFeature<PosTaggerContext> condition = (BooleanFeature<PosTaggerContext>) feature;
PosTaggerRule rule = new PosTaggerRule(condition, posTag);
rule.setNegative(negative);
rules.add(rule);
} else {
throw new TalismaneException("Rule must be based on a boolean feature.");
}
}
// next feature
}
// is it a rule, or just a descriptor
}
// proper rule descriptor
}
// next rule descriptor
return rules;
}
use of com.joliciel.talismane.posTagger.PosTaggerContext in project talismane by joliciel-informatique.
the class PosTagFeatureTester method onNextPosTagSequence.
@Override
public void onNextPosTagSequence(PosTagSequence posTagSequence) throws TalismaneException {
PosTagSequence currentHistory = new PosTagSequence(posTagSequence.getTokenSequence());
for (PosTaggedToken posTaggedToken : posTagSequence) {
if (testWords.contains(posTaggedToken.getToken().getAnalyisText().toLowerCase())) {
StringBuilder sb = new StringBuilder();
boolean foundToken = false;
for (PosTaggedToken taggedToken : posTagSequence) {
if (taggedToken.equals(posTaggedToken)) {
sb.append(" [" + taggedToken.getToken().getOriginalText().replace(' ', '_') + "/" + taggedToken.getTag().toString() + "]");
foundToken = true;
} else if (foundToken) {
sb.append(" " + taggedToken.getToken().getOriginalText().replace(' ', '_'));
} else {
sb.append(" " + taggedToken.getToken().getOriginalText().replace(' ', '_') + "/" + taggedToken.getTag().toString());
}
}
LOG.debug(sb.toString());
String classification = posTaggedToken.getTag().getCode();
PosTaggerContext context = new PosTaggerContextImpl(posTaggedToken.getToken(), currentHistory);
List<FeatureResult<?>> posTagFeatureResults = new ArrayList<FeatureResult<?>>();
for (PosTaggerFeature<?> posTaggerFeature : posTaggerFeatures) {
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<?> featureResult = posTaggerFeature.check(context, env);
if (featureResult != null)
posTagFeatureResults.add(featureResult);
}
if (LOG.isTraceEnabled()) {
LOG.trace("Token: " + posTaggedToken.getToken().getAnalyisText());
for (FeatureResult<?> result : posTagFeatureResults) {
LOG.trace(result.toString());
}
}
for (FeatureResult<?> featureResult : posTagFeatureResults) {
Map<String, List<String>> classificationMap = featureResultMap.get(featureResult.toString());
if (classificationMap == null) {
classificationMap = new TreeMap<String, List<String>>();
featureResultMap.put(featureResult.toString(), classificationMap);
}
List<String> sentences = classificationMap.get(classification);
if (sentences == null) {
sentences = new ArrayList<String>();
classificationMap.put(classification, sentences);
}
sentences.add(sb.toString());
}
}
currentHistory.addPosTaggedToken(posTaggedToken);
}
}
use of com.joliciel.talismane.posTagger.PosTaggerContext in project talismane by joliciel-informatique.
the class CombinedLexicalAttributesTest method testCheckInternalMultipleEntries.
@Test
public void testCheckInternalMultipleEntries() throws Exception {
System.setProperty("config.file", "src/test/resources/testWithLex.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
Sentence sentence = new Sentence("je demande", sessionId);
TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
Token token = new Token("demande", tokenSequence, 1, "je ".length(), "je demande".length(), sessionId);
Decision decision = new Decision("V", 1.0);
final PosTaggedToken posTaggedToken = new PosTaggedToken(token, decision, sessionId);
PosTaggedTokenAddressFunction<PosTaggerContext> addressFunction = new AbstractPosTaggedTokenAddressFunction() {
@Override
protected FeatureResult<PosTaggedTokenWrapper> checkInternal(PosTaggerContext context, RuntimeEnvironment env) {
return this.generateResult(posTaggedToken);
}
};
StringLiteralFeature<PosTaggedTokenWrapper> person = new StringLiteralFeature<>(LexicalAttribute.Person.name());
CombinedLexicalAttributesFeature<PosTaggerContext> feature = new CombinedLexicalAttributesFeature<>(addressFunction, person);
PosTagSequence history = new PosTagSequence(tokenSequence);
PosTaggerContext context = new PosTaggerContextImpl(token, history);
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<String> featureResult = feature.checkInternal(context, env);
String outcome = featureResult.getOutcome();
System.out.println(outcome);
assertEquals("1;3", outcome);
}
use of com.joliciel.talismane.posTagger.PosTaggerContext in project talismane by joliciel-informatique.
the class CombinedLexicalAttributesTest method testCheckInternalMultipleAttributes.
@Test
public void testCheckInternalMultipleAttributes() throws Exception {
System.setProperty("config.file", "src/test/resources/testWithLex.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
Sentence sentence = new Sentence("blah", sessionId);
TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
Token token = new Token("blah", tokenSequence, 1, "".length(), "blah".length(), sessionId);
Decision decision = new Decision("V", 1.0);
final PosTaggedToken posTaggedToken = new PosTaggedToken(token, decision, sessionId);
PosTaggedTokenAddressFunction<PosTaggerContext> addressFunction = new AbstractPosTaggedTokenAddressFunction() {
@Override
protected FeatureResult<PosTaggedTokenWrapper> checkInternal(PosTaggerContext context, RuntimeEnvironment env) {
return this.generateResult(posTaggedToken);
}
};
StringLiteralFeature<PosTaggedTokenWrapper> person = new StringLiteralFeature<>(LexicalAttribute.Person.name());
StringLiteralFeature<PosTaggedTokenWrapper> number = new StringLiteralFeature<>(LexicalAttribute.Number.name());
CombinedLexicalAttributesFeature<PosTaggerContext> feature = new CombinedLexicalAttributesFeature<>(addressFunction, person, number);
PosTagSequence history = new PosTagSequence(tokenSequence);
PosTaggerContext context = new PosTaggerContextImpl(token, history);
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<String> featureResult = feature.checkInternal(context, env);
String outcome = featureResult.getOutcome();
System.out.println(outcome);
assertEquals("1;3|p;s", outcome);
}
use of com.joliciel.talismane.posTagger.PosTaggerContext in project talismane by joliciel-informatique.
the class LexicalAttributeFeatureTest method testCheckInternalMultipleAttributes.
@Test
public void testCheckInternalMultipleAttributes() throws Exception {
System.setProperty("config.file", "src/test/resources/testWithLex.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
Sentence sentence = new Sentence("blah", sessionId);
TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
Token token = new Token("blah", tokenSequence, 1, "".length(), "blah".length(), sessionId);
Decision decision = new Decision("V", 1.0);
final PosTaggedToken posTaggedToken = new PosTaggedToken(token, decision, sessionId);
PosTaggedTokenAddressFunction<PosTaggerContext> addressFunction = new AbstractPosTaggedTokenAddressFunction() {
@Override
protected FeatureResult<PosTaggedTokenWrapper> checkInternal(PosTaggerContext context, RuntimeEnvironment env) {
return this.generateResult(posTaggedToken);
}
};
StringLiteralFeature<PosTaggedTokenWrapper> person = new StringLiteralFeature<>(LexicalAttribute.Person.name());
StringLiteralFeature<PosTaggedTokenWrapper> number = new StringLiteralFeature<>(LexicalAttribute.Number.name());
LexicalAttributeFeature<PosTaggerContext> feature = new LexicalAttributeFeature<>(addressFunction, person, number);
PosTagSequence history = new PosTagSequence(tokenSequence);
PosTaggerContext context = new PosTaggerContextImpl(token, history);
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<List<WeightedOutcome<String>>> featureResult = feature.checkInternal(context, env);
List<WeightedOutcome<String>> outcomes = featureResult.getOutcome();
System.out.println(outcomes);
for (WeightedOutcome<String> outcome : outcomes) {
assertTrue("3|p".equals(outcome.getOutcome()) || "1|s".equals(outcome.getOutcome()) || "3|s".equals(outcome.getOutcome()));
}
assertEquals(3, outcomes.size());
}
Aggregations