use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.
the class LexicalAttributeFeatureTest method testCheckInternalMultipleEntries.
@Test
public void testCheckInternalMultipleEntries() throws Exception {
System.setProperty("config.file", "src/test/resources/testWithLex.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
Sentence sentence = new Sentence("je demande", sessionId);
TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
Token token = new Token("demande", tokenSequence, 1, "je ".length(), "je demande".length(), sessionId);
Decision decision = new Decision("V", 1.0);
final PosTaggedToken posTaggedToken = new PosTaggedToken(token, decision, sessionId);
PosTaggedTokenAddressFunction<PosTaggerContext> addressFunction = new AbstractPosTaggedTokenAddressFunction() {
@Override
protected FeatureResult<PosTaggedTokenWrapper> checkInternal(PosTaggerContext context, RuntimeEnvironment env) {
return this.generateResult(posTaggedToken);
}
};
StringLiteralFeature<PosTaggedTokenWrapper> person = new StringLiteralFeature<>(LexicalAttribute.Person.name());
LexicalAttributeFeature<PosTaggerContext> feature = new LexicalAttributeFeature<>(addressFunction, person);
PosTagSequence history = new PosTagSequence(tokenSequence);
PosTaggerContext context = new PosTaggerContextImpl(token, history);
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<List<WeightedOutcome<String>>> featureResult = feature.checkInternal(context, env);
List<WeightedOutcome<String>> outcomes = featureResult.getOutcome();
System.out.println(outcomes);
for (WeightedOutcome<String> outcome : outcomes) {
assertTrue("1".equals(outcome.getOutcome()) || "3".equals(outcome.getOutcome()));
}
assertEquals(2, outcomes.size());
}
use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.
the class MaxentDetailedAnalysisWriter method onAnalyse.
/*
* (non-Javadoc)
*
* @see com.joliciel.talismane.maxent.MaxentObserver#onAnalyse(java.util.List,
* java.util.Collection)
*/
@Override
public void onAnalyse(Object event, List<FeatureResult<?>> featureResults, Collection<Decision> outcomes) throws IOException {
Map<String, Double> outcomeTotals = new TreeMap<String, Double>();
double uniformPrior = Math.log(1 / (double) outcomeList.size());
for (String outcome : outcomeList) outcomeTotals.put(outcome, uniformPrior);
writer.append("####### Event: " + event.toString() + "\n");
writer.append("### Feature results:\n");
for (FeatureResult<?> featureResult : featureResults) {
if (featureResult.getOutcome() instanceof List) {
@SuppressWarnings("unchecked") FeatureResult<List<WeightedOutcome<String>>> stringCollectionResult = (FeatureResult<List<WeightedOutcome<String>>>) featureResult;
for (WeightedOutcome<String> stringOutcome : stringCollectionResult.getOutcome()) {
String featureName = featureResult.getTrainingName() + "|" + featureResult.getTrainingOutcome(stringOutcome.getOutcome());
String featureOutcome = stringOutcome.getOutcome();
double value = stringOutcome.getWeight();
this.writeFeatureResult(featureName, featureOutcome, value, outcomeTotals);
}
} else {
double value = 1.0;
if (featureResult.getFeature() instanceof DoubleFeature) {
value = (Double) featureResult.getOutcome();
}
this.writeFeatureResult(featureResult.getTrainingName(), featureResult.getOutcome().toString(), value, outcomeTotals);
}
}
writer.append("### Outcome totals:\n");
writer.append("# Uniform prior: " + uniformPrior + " (=1/" + outcomeList.size() + ")\n");
double grandTotal = 0;
for (String outcome : outcomeList) {
double total = outcomeTotals.get(outcome);
double expTotal = Math.exp(total);
grandTotal += expTotal;
}
writer.append(String.format("%1$-30s", "outcome") + String.format("%1$#15s", "total(log)") + String.format("%1$#15s", "total") + String.format("%1$#15s", "normalised") + "\n");
for (String outcome : outcomeList) {
double total = outcomeTotals.get(outcome);
double expTotal = Math.exp(total);
writer.append(String.format("%1$-30s", outcome) + String.format("%1$#15s", decFormat.format(total)) + String.format("%1$#15s", decFormat.format(expTotal)) + String.format("%1$#15s", decFormat.format(expTotal / grandTotal)) + "\n");
}
writer.append("\n");
Map<String, Double> outcomeWeights = new TreeMap<String, Double>();
for (Decision decision : outcomes) {
outcomeWeights.put(decision.getOutcome(), decision.getProbability());
}
writer.append("### Outcome list:\n");
Set<WeightedOutcome<String>> weightedOutcomes = new TreeSet<WeightedOutcome<String>>();
for (String outcome : outcomeList) {
Double weightObj = outcomeWeights.get(outcome);
double weight = (weightObj == null ? 0.0 : weightObj.doubleValue());
WeightedOutcome<String> weightedOutcome = new WeightedOutcome<String>(outcome, weight);
weightedOutcomes.add(weightedOutcome);
}
for (WeightedOutcome<String> weightedOutcome : weightedOutcomes) {
writer.append(String.format("%1$-30s", weightedOutcome.getOutcome()) + String.format("%1$#15s", decFormat.format(weightedOutcome.getWeight())) + "\n");
}
writer.append("\n");
writer.flush();
}
use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.
the class OpenNLPDecisionMaker method decide.
@Override
public List<Decision> decide(List<FeatureResult<?>> featureResults) {
List<String> contextList = new ArrayList<String>();
List<Float> weightList = new ArrayList<Float>();
OpenNLPDecisionMaker.prepareData(featureResults, contextList, weightList);
String[] contexts = new String[contextList.size()];
float[] weights = new float[weightList.size()];
int i = 0;
for (String context : contextList) {
contexts[i++] = context;
}
i = 0;
for (Float weight : weightList) {
weights[i++] = weight;
}
double[] probs = model.eval(contexts, weights);
String[] outcomes = new String[probs.length];
for (i = 0; i < probs.length; i++) outcomes[i] = model.getOutcome(i);
TreeSet<Decision> outcomeSet = new TreeSet<Decision>();
for (i = 0; i < probs.length; i++) {
Decision decision = new Decision(outcomes[i], probs[i]);
outcomeSet.add(decision);
}
List<Decision> decisions = new ArrayList<Decision>(outcomeSet);
return decisions;
}
use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.
the class PerceptronDecisionMaker method decide.
@Override
public List<Decision> decide(List<FeatureResult<?>> featureResults) {
List<Integer> featureIndexList = new ArrayList<Integer>();
List<Double> featureValueList = new ArrayList<Double>();
modelParameters.prepareData(featureResults, featureIndexList, featureValueList);
double[] results = this.predict(featureIndexList, featureValueList);
double[] probs = new double[results.length];
if (this.getPerceptronScoring() == PerceptronScoring.normalisedExponential) {
// e^(x/absmax)/sum(e^(x/absmax))
// where x/absmax is in [-1,1]
// e^(x/absmax) is in [1/e,e]
double absoluteMax = 1;
for (int i = 0; i < results.length; i++) {
if (Math.abs(results[i]) > absoluteMax)
absoluteMax = Math.abs(results[i]);
}
double total = 0.0;
for (int i = 0; i < results.length; i++) {
probs[i] = Math.exp(results[i] / absoluteMax);
total += probs[i];
}
for (int i = 0; i < probs.length; i++) {
probs[i] /= total;
}
} else {
// make all results >= 1
double min = Double.MAX_VALUE;
for (int i = 0; i < results.length; i++) {
if (results[i] < min)
min = results[i];
}
if (min < 0) {
for (int i = 0; i < results.length; i++) {
probs[i] = (results[i] - min) + 1;
}
}
// then divide by total to get a probability distribution
double total = 0.0;
for (int i = 0; i < probs.length; i++) {
total += probs[i];
}
for (int i = 0; i < probs.length; i++) {
probs[i] /= total;
}
}
int i = 0;
TreeSet<Decision> outcomeSet = new TreeSet<Decision>();
for (String outcome : modelParameters.getOutcomes()) {
Decision decision = new Decision(outcome, results[i], probs[i]);
outcomeSet.add(decision);
i++;
}
List<Decision> decisions = new ArrayList<Decision>(outcomeSet);
return decisions;
}
use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.
the class TokenisedAtomicTokenSequenceTest method testGetTokenSequence.
@Test
public void testGetTokenSequence() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
final Sentence sentence = new Sentence("Je n'ai pas encore l'ourang-outan.", sessionId);
TokeniserOutcome[] tokeniserOutcomeArray = new TokeniserOutcome[] { // Je
TokeniserOutcome.SEPARATE, // _
TokeniserOutcome.SEPARATE, // n
TokeniserOutcome.SEPARATE, // '
TokeniserOutcome.JOIN, // ai
TokeniserOutcome.SEPARATE, // _
TokeniserOutcome.SEPARATE, // pas
TokeniserOutcome.SEPARATE, // _
TokeniserOutcome.JOIN, // encore
TokeniserOutcome.JOIN, // _
TokeniserOutcome.SEPARATE, // l
TokeniserOutcome.SEPARATE, // '
TokeniserOutcome.JOIN, // ourang
TokeniserOutcome.SEPARATE, // -
TokeniserOutcome.JOIN, // outan
TokeniserOutcome.JOIN, // .
TokeniserOutcome.SEPARATE };
TokenisedAtomicTokenSequence atomicTokenSequence = new TokenisedAtomicTokenSequence(sentence, sessionId);
TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
tokenSequence.findDefaultTokens();
int i = 0;
for (Token token : tokenSequence.listWithWhiteSpace()) {
Decision decision = new Decision(tokeniserOutcomeArray[i++].name());
TaggedToken<TokeniserOutcome> taggedToken = new TaggedToken<>(token, decision, TokeniserOutcome.valueOf(decision.getOutcome()));
atomicTokenSequence.add(taggedToken);
}
TokenSequence newTokenSequence = atomicTokenSequence.inferTokenSequence();
LOG.debug(newTokenSequence.toString());
i = 0;
for (Token token : newTokenSequence) {
if (i == 0) {
assertEquals("Je", token.getAnalyisText());
} else if (i == 1) {
assertEquals("n'", token.getAnalyisText());
} else if (i == 2) {
assertEquals("ai", token.getAnalyisText());
} else if (i == 3) {
assertEquals("pas encore", token.getAnalyisText());
} else if (i == 4) {
assertEquals("l'", token.getAnalyisText());
} else if (i == 5) {
assertEquals("ourang-outan", token.getAnalyisText());
} else if (i == 6) {
assertEquals(".", token.getAnalyisText());
}
i++;
}
assertEquals(7, newTokenSequence.size());
}
Aggregations