use of com.joliciel.talismane.machineLearning.ClassificationEvent in project talismane by joliciel-informatique.
the class PosTagEventStream method next.
@Override
public ClassificationEvent next() throws TalismaneException, IOException {
ClassificationEvent event = null;
if (this.hasNext()) {
PosTaggedToken taggedToken = currentSentence.get(currentIndex++);
String classification = taggedToken.getTag().getCode();
if (LOG.isDebugEnabled())
LOG.debug("next event, token: " + taggedToken.getToken().getAnalyisText() + " : " + classification);
PosTaggerContext context = new PosTaggerContextImpl(taggedToken.getToken(), currentHistory);
List<FeatureResult<?>> posTagFeatureResults = new ArrayList<FeatureResult<?>>();
for (PosTaggerFeature<?> posTaggerFeature : posTaggerFeatures) {
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<?> featureResult = posTaggerFeature.check(context, env);
if (featureResult != null)
posTagFeatureResults.add(featureResult);
}
if (LOG.isTraceEnabled()) {
LOG.trace("Token: " + taggedToken.getToken().getAnalyisText());
SortedSet<String> featureResultSet = posTagFeatureResults.stream().map(f -> f.toString()).collect(Collectors.toCollection(() -> new TreeSet<String>()));
for (String featureResultString : featureResultSet) {
LOG.trace(featureResultString);
}
}
event = new ClassificationEvent(posTagFeatureResults, classification);
currentHistory.addPosTaggedToken(taggedToken);
if (currentIndex == currentSentence.size()) {
currentSentence = null;
}
}
return event;
}
use of com.joliciel.talismane.machineLearning.ClassificationEvent in project talismane by joliciel-informatique.
the class LanguageDetectorEventStream method next.
@Override
public ClassificationEvent next() throws TalismaneException {
LanguageTaggedText languageTaggedText = this.corpusReader.nextText();
List<FeatureResult<?>> featureResults = new ArrayList<FeatureResult<?>>();
for (LanguageDetectorFeature<?> feature : features) {
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<?> featureResult = feature.check(languageTaggedText.getText(), env);
if (featureResult != null)
featureResults.add(featureResult);
}
String classification = languageTaggedText.getLanguage().toLanguageTag();
if (LOG.isTraceEnabled()) {
for (FeatureResult<?> result : featureResults) {
LOG.trace(result.toString());
}
LOG.trace("classification: " + classification);
}
ClassificationEvent event = new ClassificationEvent(featureResults, classification);
return event;
}
use of com.joliciel.talismane.machineLearning.ClassificationEvent in project talismane by joliciel-informatique.
the class LinearSVMModelTrainer method getFeatureMatrix.
private Feature[][] getFeatureMatrix(ClassificationEventStream corpusEventStream, TObjectIntMap<String> featureIndexMap, TObjectIntMap<String> outcomeIndexMap, TIntList outcomeList, TIntIntMap featureCountMap, CountingInfo countingInfo) {
try {
int maxFeatureCount = 0;
List<Feature[]> fullFeatureList = new ArrayList<Feature[]>();
while (corpusEventStream.hasNext()) {
ClassificationEvent corpusEvent = corpusEventStream.next();
int outcomeIndex = outcomeIndexMap.get(corpusEvent.getClassification());
if (outcomeIndex < 0) {
outcomeIndex = countingInfo.currentOutcomeIndex++;
outcomeIndexMap.put(corpusEvent.getClassification(), outcomeIndex);
}
outcomeList.add(outcomeIndex);
Map<Integer, Feature> featureList = new TreeMap<Integer, Feature>();
for (FeatureResult<?> featureResult : corpusEvent.getFeatureResults()) {
if (featureResult.getOutcome() instanceof List) {
@SuppressWarnings("unchecked") FeatureResult<List<WeightedOutcome<String>>> stringCollectionResult = (FeatureResult<List<WeightedOutcome<String>>>) featureResult;
for (WeightedOutcome<String> stringOutcome : stringCollectionResult.getOutcome()) {
String featureName = featureResult.getTrainingName() + "|" + featureResult.getTrainingOutcome(stringOutcome.getOutcome());
double value = stringOutcome.getWeight();
this.addFeatureResult(featureName, value, featureList, featureIndexMap, featureCountMap, countingInfo);
}
} else {
double value = 1.0;
if (featureResult.getOutcome() instanceof Double) {
@SuppressWarnings("unchecked") FeatureResult<Double> doubleResult = (FeatureResult<Double>) featureResult;
value = doubleResult.getOutcome().doubleValue();
}
this.addFeatureResult(featureResult.getTrainingName(), value, featureList, featureIndexMap, featureCountMap, countingInfo);
}
}
if (featureList.size() > maxFeatureCount)
maxFeatureCount = featureList.size();
// convert to array immediately, to avoid double storage
int j = 0;
Feature[] featureArray = new Feature[featureList.size()];
for (Feature feature : featureList.values()) {
featureArray[j] = feature;
j++;
}
fullFeatureList.add(featureArray);
countingInfo.numEvents++;
if (countingInfo.numEvents % 1000 == 0) {
LOG.debug("Processed " + countingInfo.numEvents + " events.");
}
}
Feature[][] featureMatrix = new Feature[countingInfo.numEvents][];
int i = 0;
for (Feature[] featureArray : fullFeatureList) {
featureMatrix[i] = featureArray;
i++;
}
fullFeatureList = null;
LOG.debug("Event count: " + countingInfo.numEvents);
LOG.debug("Feature count: " + featureIndexMap.size());
return featureMatrix;
} catch (TalismaneException e) {
LOG.error(e.getMessage(), e);
throw new RuntimeException(e);
} catch (IOException e) {
LOG.error(e.getMessage(), e);
throw new RuntimeException(e);
}
}
use of com.joliciel.talismane.machineLearning.ClassificationEvent in project talismane by joliciel-informatique.
the class PatternEventStream method next.
@Override
public ClassificationEvent next() throws TalismaneException, IOException {
ClassificationEvent event = null;
if (this.hasNext()) {
TokenPatternMatch tokenPatternMatch = currentPatternMatches.get(currentIndex);
TokeniserOutcome outcome = currentOutcomes.get(currentIndex);
String classification = outcome.name();
LOG.debug("next event, pattern match: " + tokenPatternMatch.toString() + ", outcome:" + classification);
List<FeatureResult<?>> tokenFeatureResults = new ArrayList<FeatureResult<?>>();
for (TokenPatternMatchFeature<?> feature : tokenPatternMatchFeatures) {
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<?> featureResult = feature.check(tokenPatternMatch, env);
if (featureResult != null) {
tokenFeatureResults.add(featureResult);
}
}
if (LOG.isTraceEnabled()) {
SortedSet<String> featureResultSet = tokenFeatureResults.stream().map(f -> f.toString()).collect(Collectors.toCollection(() -> new TreeSet<String>()));
for (String featureResultString : featureResultSet) {
LOG.trace(featureResultString);
}
}
event = new ClassificationEvent(tokenFeatureResults, classification);
currentIndex++;
if (currentIndex == currentPatternMatches.size()) {
currentPatternMatches = null;
}
}
return event;
}
use of com.joliciel.talismane.machineLearning.ClassificationEvent in project jochre by urieli.
the class JochreLetterEventStream method next.
@Override
public ClassificationEvent next() {
ClassificationEvent event = null;
if (this.hasNext()) {
Shape shape = shapeInSequence.getShape();
LOG.debug("next event, shape: " + shape);
LetterGuesserContext context = new LetterGuesserContext(shapeInSequence, history);
List<FeatureResult<?>> featureResults = new ArrayList<>();
// analyse features
for (LetterFeature<?> feature : features) {
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<?> featureResult = feature.check(context, env);
if (featureResult != null) {
featureResults.add(featureResult);
if (LOG.isTraceEnabled()) {
LOG.trace(featureResult.toString());
}
}
}
String outcome = shape.getLetter();
event = new ClassificationEvent(featureResults, outcome);
history.getLetters().add(outcome);
// set shape to null so that hasNext can retrieve the next one.
this.shapeInSequence = null;
}
return event;
}
Aggregations