use of com.joliciel.talismane.machineLearning.ClassificationEvent in project talismane by joliciel-informatique.
the class ParseEventStream method next.
@Override
public ClassificationEvent next() throws TalismaneException, IOException {
ClassificationEvent event = null;
if (this.hasNext()) {
eventCount++;
LOG.debug("Event " + eventCount + ": " + currentConfiguration.toString());
List<FeatureResult<?>> parseFeatureResults = new ArrayList<FeatureResult<?>>();
for (ParseConfigurationFeature<?> parseFeature : parseFeatures) {
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<?> featureResult = parseFeature.check(currentConfiguration, env);
if (featureResult != null) {
parseFeatureResults.add(featureResult);
}
}
if (LOG.isTraceEnabled()) {
SortedSet<String> featureResultSet = parseFeatureResults.stream().map(f -> f.toString()).collect(Collectors.toCollection(() -> new TreeSet<String>()));
for (String featureResultString : featureResultSet) {
LOG.trace(featureResultString);
}
}
Transition transition = targetConfiguration.getTransitions().get(currentIndex);
String classification = transition.getCode();
event = new ClassificationEvent(parseFeatureResults, classification);
// apply the transition and up the index
currentConfiguration = new ParseConfiguration(currentConfiguration);
transition.apply(currentConfiguration);
currentIndex++;
if (currentIndex == targetConfiguration.getTransitions().size()) {
targetConfiguration = null;
}
}
return event;
}
use of com.joliciel.talismane.machineLearning.ClassificationEvent in project talismane by joliciel-informatique.
the class SentenceDetectorEventStream method next.
@Override
public ClassificationEvent next() throws TalismaneException, IOException {
ClassificationEvent event = null;
if (this.hasNext()) {
int possibleBoundary = possibleBoundaries.get(currentIndex++);
String moreText = "";
int sentenceIndex = 0;
while (moreText.length() < minCharactersAfterBoundary) {
String nextSentence = "";
if (sentenceIndex < sentences.size()) {
nextSentence = sentences.get(sentenceIndex);
} else if (corpusReader.hasNextSentence()) {
nextSentence = corpusReader.nextSentence().getText().toString();
sentences.add(nextSentence);
} else {
break;
}
if (nextSentence.startsWith(" ") || nextSentence.startsWith("\n"))
moreText += sentences.get(sentenceIndex);
else
moreText += " " + sentences.get(sentenceIndex);
sentenceIndex++;
}
String text = previousSentence + currentSentence + moreText;
PossibleSentenceBoundary boundary = new PossibleSentenceBoundary(text, possibleBoundary, sessionId);
LOG.debug("next event, boundary: " + boundary);
List<FeatureResult<?>> featureResults = new ArrayList<FeatureResult<?>>();
for (SentenceDetectorFeature<?> feature : features) {
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<?> featureResult = feature.check(boundary, env);
if (featureResult != null)
featureResults.add(featureResult);
}
if (LOG.isTraceEnabled()) {
SortedSet<String> featureResultSet = featureResults.stream().map(f -> f.toString()).collect(Collectors.toCollection(() -> new TreeSet<String>()));
for (String featureResultString : featureResultSet) {
LOG.trace(featureResultString);
}
}
String classification = SentenceDetectorOutcome.IS_NOT_BOUNDARY.name();
if (possibleBoundary == realBoundary)
classification = SentenceDetectorOutcome.IS_BOUNDARY.name();
event = new ClassificationEvent(featureResults, classification);
if (currentIndex == possibleBoundaries.size()) {
if (currentSentence.endsWith(" "))
previousSentence = currentSentence;
else
previousSentence = currentSentence + " ";
currentSentence = null;
}
}
return event;
}
use of com.joliciel.talismane.machineLearning.ClassificationEvent in project talismane by joliciel-informatique.
the class OpenNLPEventStream method next.
@Override
public Event next() throws IOException {
try {
Event event = null;
if (this.corpusEventStream.hasNext()) {
ClassificationEvent corpusEvent = this.corpusEventStream.next();
List<String> contextList = new ArrayList<String>();
List<Float> weightList = new ArrayList<Float>();
OpenNLPDecisionMaker.prepareData(corpusEvent.getFeatureResults(), contextList, weightList);
String[] contexts = new String[contextList.size()];
float[] weights = new float[weightList.size()];
int i = 0;
for (String context : contextList) {
contexts[i++] = context;
}
i = 0;
for (Float weight : weightList) {
weights[i++] = weight;
}
event = new Event(corpusEvent.getClassification(), contexts, weights);
}
return event;
} catch (TalismaneException e) {
LOG.error(e.getMessage(), e);
throw new RuntimeException(e);
}
}
use of com.joliciel.talismane.machineLearning.ClassificationEvent in project talismane by joliciel-informatique.
the class PerceptronClassificationModelTrainer method prepareData.
void prepareData(ClassificationEventStream eventStream) throws TalismaneException {
try {
eventFile = File.createTempFile("events", "txt");
eventFile.deleteOnExit();
Writer eventWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(eventFile), "UTF-8"));
while (eventStream.hasNext()) {
ClassificationEvent corpusEvent = eventStream.next();
PerceptronEvent event = new PerceptronEvent(corpusEvent, params);
event.write(eventWriter);
}
eventWriter.flush();
eventWriter.close();
if (cutoff > 1) {
params.initialiseCounts();
File originalEventFile = eventFile;
try (Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(new FileInputStream(eventFile), "UTF-8")))) {
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
PerceptronEvent event = new PerceptronEvent(line);
for (int featureIndex : event.getFeatureIndexes()) {
params.getFeatureCounts()[featureIndex]++;
}
}
}
if (LOG.isDebugEnabled()) {
int[] cutoffCounts = new int[21];
for (int count : params.getFeatureCounts()) {
for (int i = 1; i < 21; i++) {
if (count >= i) {
cutoffCounts[i]++;
}
}
}
LOG.debug("Feature counts:");
for (int i = 1; i < 21; i++) {
LOG.debug("Cutoff " + i + ": " + cutoffCounts[i]);
}
}
PerceptronModelParameters cutoffParams = new PerceptronModelParameters();
int[] newIndexes = cutoffParams.initialise(params, cutoff);
decisionMaker = new PerceptronDecisionMaker(cutoffParams, this.scoring);
try (Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(new FileInputStream(eventFile), "UTF-8")))) {
eventFile = File.createTempFile("eventsCutoff", "txt");
eventFile.deleteOnExit();
try (Writer eventCutoffWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(eventFile), "UTF-8"))) {
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
PerceptronEvent oldEvent = new PerceptronEvent(line);
PerceptronEvent newEvent = new PerceptronEvent(oldEvent, newIndexes);
newEvent.write(eventCutoffWriter);
}
eventCutoffWriter.flush();
}
params = cutoffParams;
originalEventFile.delete();
}
}
params.initialiseWeights();
totalFeatureWeights = new double[params.getFeatureCount()][params.getOutcomeCount()];
} catch (IOException e) {
LogUtils.logError(LOG, e);
throw new RuntimeException(e);
}
}
use of com.joliciel.talismane.machineLearning.ClassificationEvent in project jochre by urieli.
the class JochreMergeEventStream method next.
@Override
public ClassificationEvent next() {
ClassificationEvent event = null;
if (this.hasNext()) {
LOG.debug("next event, " + mergeCandidate.getFirstShape() + ", " + mergeCandidate.getSecondShape());
List<FeatureResult<?>> featureResults = new ArrayList<>();
// analyse features
for (MergeFeature<?> feature : mergeFeatures) {
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<?> featureResult = feature.check(mergeCandidate, env);
if (featureResult != null) {
featureResults.add(featureResult);
if (LOG.isTraceEnabled()) {
LOG.trace(featureResult.toString());
}
}
}
MergeOutcome outcome = MergeOutcome.DO_NOT_MERGE;
boolean shouldMerge = false;
if (mergeCandidate.getFirstShape().getLetter().startsWith("|")) {
if (mergeCandidate.getSecondShape().getLetter().length() == 0 || mergeCandidate.getSecondShape().getLetter().endsWith("|"))
shouldMerge = true;
} else if (mergeCandidate.getSecondShape().getLetter().endsWith("|")) {
if (mergeCandidate.getFirstShape().getLetter().length() == 0)
shouldMerge = true;
}
if (shouldMerge)
outcome = MergeOutcome.DO_MERGE;
if (outcome.equals(MergeOutcome.DO_MERGE))
yesCount++;
else
noCount++;
LOG.debug("Outcome: " + outcome);
event = new ClassificationEvent(featureResults, outcome.name());
// set mergeCandidate to null so that hasNext can retrieve the next
// one.
this.mergeCandidate = null;
}
return event;
}
Aggregations