use of com.joliciel.talismane.machineLearning.features.FeatureResult in project talismane by joliciel-informatique.
the class LinearSVMModelTrainer method getFeatureMatrix.
private Feature[][] getFeatureMatrix(ClassificationEventStream corpusEventStream, TObjectIntMap<String> featureIndexMap, TObjectIntMap<String> outcomeIndexMap, TIntList outcomeList, TIntIntMap featureCountMap, CountingInfo countingInfo) {
try {
int maxFeatureCount = 0;
List<Feature[]> fullFeatureList = new ArrayList<Feature[]>();
while (corpusEventStream.hasNext()) {
ClassificationEvent corpusEvent = corpusEventStream.next();
int outcomeIndex = outcomeIndexMap.get(corpusEvent.getClassification());
if (outcomeIndex < 0) {
outcomeIndex = countingInfo.currentOutcomeIndex++;
outcomeIndexMap.put(corpusEvent.getClassification(), outcomeIndex);
}
outcomeList.add(outcomeIndex);
Map<Integer, Feature> featureList = new TreeMap<Integer, Feature>();
for (FeatureResult<?> featureResult : corpusEvent.getFeatureResults()) {
if (featureResult.getOutcome() instanceof List) {
@SuppressWarnings("unchecked") FeatureResult<List<WeightedOutcome<String>>> stringCollectionResult = (FeatureResult<List<WeightedOutcome<String>>>) featureResult;
for (WeightedOutcome<String> stringOutcome : stringCollectionResult.getOutcome()) {
String featureName = featureResult.getTrainingName() + "|" + featureResult.getTrainingOutcome(stringOutcome.getOutcome());
double value = stringOutcome.getWeight();
this.addFeatureResult(featureName, value, featureList, featureIndexMap, featureCountMap, countingInfo);
}
} else {
double value = 1.0;
if (featureResult.getOutcome() instanceof Double) {
@SuppressWarnings("unchecked") FeatureResult<Double> doubleResult = (FeatureResult<Double>) featureResult;
value = doubleResult.getOutcome().doubleValue();
}
this.addFeatureResult(featureResult.getTrainingName(), value, featureList, featureIndexMap, featureCountMap, countingInfo);
}
}
if (featureList.size() > maxFeatureCount)
maxFeatureCount = featureList.size();
// convert to array immediately, to avoid double storage
int j = 0;
Feature[] featureArray = new Feature[featureList.size()];
for (Feature feature : featureList.values()) {
featureArray[j] = feature;
j++;
}
fullFeatureList.add(featureArray);
countingInfo.numEvents++;
if (countingInfo.numEvents % 1000 == 0) {
LOG.debug("Processed " + countingInfo.numEvents + " events.");
}
}
Feature[][] featureMatrix = new Feature[countingInfo.numEvents][];
int i = 0;
for (Feature[] featureArray : fullFeatureList) {
featureMatrix[i] = featureArray;
i++;
}
fullFeatureList = null;
LOG.debug("Event count: " + countingInfo.numEvents);
LOG.debug("Feature count: " + featureIndexMap.size());
return featureMatrix;
} catch (TalismaneException e) {
LOG.error(e.getMessage(), e);
throw new RuntimeException(e);
} catch (IOException e) {
LOG.error(e.getMessage(), e);
throw new RuntimeException(e);
}
}
use of com.joliciel.talismane.machineLearning.features.FeatureResult in project talismane by joliciel-informatique.
the class PatternEventStream method next.
@Override
public ClassificationEvent next() throws TalismaneException, IOException {
ClassificationEvent event = null;
if (this.hasNext()) {
TokenPatternMatch tokenPatternMatch = currentPatternMatches.get(currentIndex);
TokeniserOutcome outcome = currentOutcomes.get(currentIndex);
String classification = outcome.name();
LOG.debug("next event, pattern match: " + tokenPatternMatch.toString() + ", outcome:" + classification);
List<FeatureResult<?>> tokenFeatureResults = new ArrayList<FeatureResult<?>>();
for (TokenPatternMatchFeature<?> feature : tokenPatternMatchFeatures) {
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<?> featureResult = feature.check(tokenPatternMatch, env);
if (featureResult != null) {
tokenFeatureResults.add(featureResult);
}
}
if (LOG.isTraceEnabled()) {
SortedSet<String> featureResultSet = tokenFeatureResults.stream().map(f -> f.toString()).collect(Collectors.toCollection(() -> new TreeSet<String>()));
for (String featureResultString : featureResultSet) {
LOG.trace(featureResultString);
}
}
event = new ClassificationEvent(tokenFeatureResults, classification);
currentIndex++;
if (currentIndex == currentPatternMatches.size()) {
currentPatternMatches = null;
}
}
return event;
}
use of com.joliciel.talismane.machineLearning.features.FeatureResult in project talismane by joliciel-informatique.
the class PerceptronDetailedAnalysisWriter method onAnalyse.
/*
* (non-Javadoc)
*
* @see com.joliciel.talismane.maxent.MaxentObserver#onAnalyse(java.util.List,
* java.util.Collection)
*/
@Override
public void onAnalyse(Object event, List<FeatureResult<?>> featureResults, Collection<Decision> decisions) throws IOException {
Map<String, Double> outcomeTotals = new TreeMap<String, Double>();
for (String outcome : modelParams.getOutcomes()) outcomeTotals.put(outcome, 0.0);
writer.append("####### Event: " + event.toString() + "\n");
writer.append("### Feature results:\n");
for (FeatureResult<?> featureResult : featureResults) {
if (featureResult.getOutcome() instanceof List) {
@SuppressWarnings("unchecked") FeatureResult<List<WeightedOutcome<String>>> stringCollectionResult = (FeatureResult<List<WeightedOutcome<String>>>) featureResult;
for (WeightedOutcome<String> stringOutcome : stringCollectionResult.getOutcome()) {
String featureName = featureResult.getTrainingName() + "|" + featureResult.getTrainingOutcome(stringOutcome.getOutcome());
String featureOutcome = stringOutcome.getOutcome();
double value = stringOutcome.getWeight();
this.writeFeatureResult(featureName, featureOutcome, value, outcomeTotals);
}
} else {
double value = 1.0;
if (featureResult.getFeature() instanceof DoubleFeature) {
value = (Double) featureResult.getOutcome();
}
this.writeFeatureResult(featureResult.getTrainingName(), featureResult.getOutcome().toString(), value, outcomeTotals);
}
}
List<Integer> featureIndexList = new ArrayList<Integer>();
List<Double> featureValueList = new ArrayList<Double>();
modelParams.prepareData(featureResults, featureIndexList, featureValueList);
double[] results = decisionMaker.predict(featureIndexList, featureValueList);
writer.append("### Outcome totals:\n");
writer.append(String.format("%1$-30s", "outcome") + String.format("%1$#15s", "total") + String.format("%1$#15s", "normalised") + "\n");
int j = 0;
for (String outcome : modelParams.getOutcomes()) {
double total = outcomeTotals.get(outcome);
double normalised = results[j++];
writer.append(String.format("%1$-30s", outcome) + String.format("%1$#15s", decFormat.format(total)) + String.format("%1$#15s", decFormat.format(normalised)) + "\n");
}
writer.append("\n");
Map<String, Double> outcomeWeights = new TreeMap<String, Double>();
for (Decision decision : decisions) {
outcomeWeights.put(decision.getOutcome(), decision.getProbability());
}
writer.append("### Outcome list:\n");
Set<WeightedOutcome<String>> weightedOutcomes = new TreeSet<WeightedOutcome<String>>();
for (String outcome : modelParams.getOutcomes()) {
Double weightObj = outcomeWeights.get(outcome);
double weight = (weightObj == null ? 0.0 : weightObj.doubleValue());
WeightedOutcome<String> weightedOutcome = new WeightedOutcome<String>(outcome, weight);
weightedOutcomes.add(weightedOutcome);
}
for (WeightedOutcome<String> weightedOutcome : weightedOutcomes) {
writer.append(String.format("%1$-30s", weightedOutcome.getOutcome()) + String.format("%1$#15s", decFormat.format(weightedOutcome.getWeight())) + "\n");
}
writer.append("\n");
writer.flush();
}
use of com.joliciel.talismane.machineLearning.features.FeatureResult in project jochre by urieli.
the class JochreLetterEventStream method next.
@Override
public ClassificationEvent next() {
ClassificationEvent event = null;
if (this.hasNext()) {
Shape shape = shapeInSequence.getShape();
LOG.debug("next event, shape: " + shape);
LetterGuesserContext context = new LetterGuesserContext(shapeInSequence, history);
List<FeatureResult<?>> featureResults = new ArrayList<>();
// analyse features
for (LetterFeature<?> feature : features) {
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<?> featureResult = feature.check(context, env);
if (featureResult != null) {
featureResults.add(featureResult);
if (LOG.isTraceEnabled()) {
LOG.trace(featureResult.toString());
}
}
}
String outcome = shape.getLetter();
event = new ClassificationEvent(featureResults, outcome);
history.getLetters().add(outcome);
// set shape to null so that hasNext can retrieve the next one.
this.shapeInSequence = null;
}
return event;
}
use of com.joliciel.talismane.machineLearning.features.FeatureResult in project jochre by urieli.
the class LetterGuesser method guessLetter.
/**
* Analyses this shape, using the context provided for features that are not
* intrinsic. Updates shape.getWeightedOutcomes to include all outcomes
* above a certain threshold of probability.
*
* @return the best outcome for this shape.
*/
public String guessLetter(ShapeInSequence shapeInSequence, LetterSequence history) {
Shape shape = shapeInSequence.getShape();
if (LOG.isTraceEnabled())
LOG.trace("guessLetter, shape: " + shape);
List<FeatureResult<?>> featureResults = new ArrayList<FeatureResult<?>>();
// analyse features
for (LetterFeature<?> feature : features) {
LetterGuesserContext context = new LetterGuesserContext(shapeInSequence, history);
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<?> featureResult = feature.check(context, env);
if (featureResult != null) {
featureResults.add(featureResult);
if (LOG.isTraceEnabled()) {
LOG.trace(featureResult.toString());
}
}
}
List<Decision> letterGuesses = decisionMaker.decide(featureResults);
// store outcomes
String bestOutcome = null;
shape.getLetterGuesses().clear();
for (Decision letterGuess : letterGuesses) {
if (letterGuess.getProbability() >= MIN_PROB_TO_STORE) {
shape.getLetterGuesses().add(letterGuess);
}
}
bestOutcome = shape.getLetterGuesses().iterator().next().getOutcome();
if (LOG.isTraceEnabled()) {
LOG.trace("Shape: " + shape);
LOG.trace("Letter: " + shape.getLetter());
LOG.trace("Best outcome: " + bestOutcome);
}
return bestOutcome;
}
Aggregations