use of com.joliciel.talismane.utils.WeightedOutcome in project talismane by joliciel-informatique.
the class LanguageDetector method detectLanguages.
/**
* Return a probability distribution of languages for a given text.
*/
public List<WeightedOutcome<Locale>> detectLanguages(String text) throws TalismaneException {
if (LOG.isTraceEnabled()) {
LOG.trace("Testing text: " + text);
}
text = text.toLowerCase(Locale.ENGLISH);
text = Normalizer.normalize(text, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
List<FeatureResult<?>> featureResults = new ArrayList<FeatureResult<?>>();
for (LanguageDetectorFeature<?> feature : features) {
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<?> featureResult = feature.check(text, env);
if (featureResult != null)
featureResults.add(featureResult);
}
if (LOG.isTraceEnabled()) {
for (FeatureResult<?> result : featureResults) {
LOG.trace(result.toString());
}
}
List<Decision> decisions = this.decisionMaker.decide(featureResults);
if (LOG.isTraceEnabled()) {
for (Decision decision : decisions) {
LOG.trace(decision.getOutcome() + ": " + decision.getProbability());
}
}
List<WeightedOutcome<Locale>> results = new ArrayList<WeightedOutcome<Locale>>();
for (Decision decision : decisions) {
Locale locale = Locale.forLanguageTag(decision.getOutcome());
results.add(new WeightedOutcome<Locale>(locale, decision.getProbability()));
}
return results;
}
use of com.joliciel.talismane.utils.WeightedOutcome in project talismane by joliciel-informatique.
the class PosTagSetFeature method checkInternal.
@Override
public FeatureResult<List<WeightedOutcome<String>>> checkInternal(TokenWrapper context, RuntimeEnvironment env) {
PosTagSet posTagSet = TalismaneSession.get(sessionId).getPosTagSet();
Set<PosTag> posTags = posTagSet.getTags();
List<WeightedOutcome<String>> resultList = new ArrayList<WeightedOutcome<String>>();
for (PosTag posTag : posTags) {
resultList.add(new WeightedOutcome<String>(posTag.getCode(), 1.0));
}
return this.generateResult(resultList);
}
use of com.joliciel.talismane.utils.WeightedOutcome in project talismane by joliciel-informatique.
the class TokeniserPatternsAndIndexesFeature method checkInternal.
@Override
public FeatureResult<List<WeightedOutcome<String>>> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
Token token = tokenWrapper.getToken();
List<WeightedOutcome<String>> resultList = new ArrayList<WeightedOutcome<String>>();
for (TokenPatternMatch tokenMatch : token.getMatches()) {
if (tokenMatch.getIndex() != tokenMatch.getPattern().getIndexesToTest().get(0)) {
resultList.add(new WeightedOutcome<String>(tokenMatch.getPattern().getName() + "ยค" + tokenMatch.getIndex(), 1.0));
}
}
return this.generateResult(resultList);
}
use of com.joliciel.talismane.utils.WeightedOutcome in project talismane by joliciel-informatique.
the class LinearSVMUtils method prepareData.
public static List<Feature> prepareData(List<FeatureResult<?>> featureResults, TObjectIntMap<String> featureIndexMap) {
List<Feature> featureList = new ArrayList<Feature>(featureResults.size());
for (FeatureResult<?> featureResult : featureResults) {
if (featureResult.getOutcome() instanceof List) {
@SuppressWarnings("unchecked") FeatureResult<List<WeightedOutcome<String>>> stringCollectionResult = (FeatureResult<List<WeightedOutcome<String>>>) featureResult;
for (WeightedOutcome<String> stringOutcome : stringCollectionResult.getOutcome()) {
int index = featureIndexMap.get(featureResult.getTrainingName() + "|" + featureResult.getTrainingOutcome(stringOutcome.getOutcome()));
if (index >= 0) {
double value = stringOutcome.getWeight();
FeatureNode featureNode = new FeatureNode(index, value);
featureList.add(featureNode);
}
}
} else {
double value = 1.0;
if (featureResult.getOutcome() instanceof Double) {
@SuppressWarnings("unchecked") FeatureResult<Double> doubleResult = (FeatureResult<Double>) featureResult;
value = doubleResult.getOutcome().doubleValue();
}
int index = featureIndexMap.get(featureResult.getTrainingName());
if (index >= 0) {
// we only need to bother adding features which existed in the
// training set
FeatureNode featureNode = new FeatureNode(index, value);
featureList.add(featureNode);
}
}
}
return featureList;
}
use of com.joliciel.talismane.utils.WeightedOutcome in project talismane by joliciel-informatique.
the class LexicalAttributeFeatureTest method testCheckInternalMultipleAttributes.
@Test
public void testCheckInternalMultipleAttributes() throws Exception {
System.setProperty("config.file", "src/test/resources/testWithLex.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
Sentence sentence = new Sentence("blah", sessionId);
TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
Token token = new Token("blah", tokenSequence, 1, "".length(), "blah".length(), sessionId);
Decision decision = new Decision("V", 1.0);
final PosTaggedToken posTaggedToken = new PosTaggedToken(token, decision, sessionId);
PosTaggedTokenAddressFunction<PosTaggerContext> addressFunction = new AbstractPosTaggedTokenAddressFunction() {
@Override
protected FeatureResult<PosTaggedTokenWrapper> checkInternal(PosTaggerContext context, RuntimeEnvironment env) {
return this.generateResult(posTaggedToken);
}
};
StringLiteralFeature<PosTaggedTokenWrapper> person = new StringLiteralFeature<>(LexicalAttribute.Person.name());
StringLiteralFeature<PosTaggedTokenWrapper> number = new StringLiteralFeature<>(LexicalAttribute.Number.name());
LexicalAttributeFeature<PosTaggerContext> feature = new LexicalAttributeFeature<>(addressFunction, person, number);
PosTagSequence history = new PosTagSequence(tokenSequence);
PosTaggerContext context = new PosTaggerContextImpl(token, history);
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<List<WeightedOutcome<String>>> featureResult = feature.checkInternal(context, env);
List<WeightedOutcome<String>> outcomes = featureResult.getOutcome();
System.out.println(outcomes);
for (WeightedOutcome<String> outcome : outcomes) {
assertTrue("3|p".equals(outcome.getOutcome()) || "1|s".equals(outcome.getOutcome()) || "3|s".equals(outcome.getOutcome()));
}
assertEquals(3, outcomes.size());
}
Aggregations