Search in sources :

Example 1 with ClassificationEvent

use of com.joliciel.talismane.machineLearning.ClassificationEvent in project talismane by joliciel-informatique.

the class ParseEventStream method next.

@Override
public ClassificationEvent next() throws TalismaneException, IOException {
    ClassificationEvent event = null;
    if (this.hasNext()) {
        eventCount++;
        LOG.debug("Event " + eventCount + ": " + currentConfiguration.toString());
        List<FeatureResult<?>> parseFeatureResults = new ArrayList<FeatureResult<?>>();
        for (ParseConfigurationFeature<?> parseFeature : parseFeatures) {
            RuntimeEnvironment env = new RuntimeEnvironment();
            FeatureResult<?> featureResult = parseFeature.check(currentConfiguration, env);
            if (featureResult != null) {
                parseFeatureResults.add(featureResult);
            }
        }
        if (LOG.isTraceEnabled()) {
            SortedSet<String> featureResultSet = parseFeatureResults.stream().map(f -> f.toString()).collect(Collectors.toCollection(() -> new TreeSet<String>()));
            for (String featureResultString : featureResultSet) {
                LOG.trace(featureResultString);
            }
        }
        Transition transition = targetConfiguration.getTransitions().get(currentIndex);
        String classification = transition.getCode();
        event = new ClassificationEvent(parseFeatureResults, classification);
        // apply the transition and up the index
        currentConfiguration = new ParseConfiguration(currentConfiguration);
        transition.apply(currentConfiguration);
        currentIndex++;
        if (currentIndex == targetConfiguration.getTransitions().size()) {
            targetConfiguration = null;
        }
    }
    return event;
}
Also used : Logger(org.slf4j.Logger) SortedSet(java.util.SortedSet) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) IOException(java.io.IOException) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent) Collectors(java.util.stream.Collectors) TreeSet(java.util.TreeSet) TalismaneException(com.joliciel.talismane.TalismaneException) ParseConfigurationFeature(com.joliciel.talismane.parser.features.ParseConfigurationFeature) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) List(java.util.List) ClassificationEventStream(com.joliciel.talismane.machineLearning.ClassificationEventStream) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult) Map(java.util.Map) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) ArrayList(java.util.ArrayList) TreeSet(java.util.TreeSet) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult)

Example 2 with ClassificationEvent

use of com.joliciel.talismane.machineLearning.ClassificationEvent in project talismane by joliciel-informatique.

the class SentenceDetectorEventStream method next.

@Override
public ClassificationEvent next() throws TalismaneException, IOException {
    ClassificationEvent event = null;
    if (this.hasNext()) {
        int possibleBoundary = possibleBoundaries.get(currentIndex++);
        String moreText = "";
        int sentenceIndex = 0;
        while (moreText.length() < minCharactersAfterBoundary) {
            String nextSentence = "";
            if (sentenceIndex < sentences.size()) {
                nextSentence = sentences.get(sentenceIndex);
            } else if (corpusReader.hasNextSentence()) {
                nextSentence = corpusReader.nextSentence().getText().toString();
                sentences.add(nextSentence);
            } else {
                break;
            }
            if (nextSentence.startsWith(" ") || nextSentence.startsWith("\n"))
                moreText += sentences.get(sentenceIndex);
            else
                moreText += " " + sentences.get(sentenceIndex);
            sentenceIndex++;
        }
        String text = previousSentence + currentSentence + moreText;
        PossibleSentenceBoundary boundary = new PossibleSentenceBoundary(text, possibleBoundary, sessionId);
        LOG.debug("next event, boundary: " + boundary);
        List<FeatureResult<?>> featureResults = new ArrayList<FeatureResult<?>>();
        for (SentenceDetectorFeature<?> feature : features) {
            RuntimeEnvironment env = new RuntimeEnvironment();
            FeatureResult<?> featureResult = feature.check(boundary, env);
            if (featureResult != null)
                featureResults.add(featureResult);
        }
        if (LOG.isTraceEnabled()) {
            SortedSet<String> featureResultSet = featureResults.stream().map(f -> f.toString()).collect(Collectors.toCollection(() -> new TreeSet<String>()));
            for (String featureResultString : featureResultSet) {
                LOG.trace(featureResultString);
            }
        }
        String classification = SentenceDetectorOutcome.IS_NOT_BOUNDARY.name();
        if (possibleBoundary == realBoundary)
            classification = SentenceDetectorOutcome.IS_BOUNDARY.name();
        event = new ClassificationEvent(featureResults, classification);
        if (currentIndex == possibleBoundaries.size()) {
            if (currentSentence.endsWith(" "))
                previousSentence = currentSentence;
            else
                previousSentence = currentSentence + " ";
            currentSentence = null;
        }
    }
    return event;
}
Also used : Logger(org.slf4j.Logger) SortedSet(java.util.SortedSet) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) IOException(java.io.IOException) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent) Collectors(java.util.stream.Collectors) TreeSet(java.util.TreeSet) TalismaneException(com.joliciel.talismane.TalismaneException) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) List(java.util.List) ClassificationEventStream(com.joliciel.talismane.machineLearning.ClassificationEventStream) Matcher(java.util.regex.Matcher) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult) Map(java.util.Map) Pattern(java.util.regex.Pattern) LinkedList(java.util.LinkedList) SentenceDetectorFeature(com.joliciel.talismane.sentenceDetector.features.SentenceDetectorFeature) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) ArrayList(java.util.ArrayList) TreeSet(java.util.TreeSet) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult)

Example 3 with ClassificationEvent

use of com.joliciel.talismane.machineLearning.ClassificationEvent in project talismane by joliciel-informatique.

the class OpenNLPEventStream method next.

@Override
public Event next() throws IOException {
    try {
        Event event = null;
        if (this.corpusEventStream.hasNext()) {
            ClassificationEvent corpusEvent = this.corpusEventStream.next();
            List<String> contextList = new ArrayList<String>();
            List<Float> weightList = new ArrayList<Float>();
            OpenNLPDecisionMaker.prepareData(corpusEvent.getFeatureResults(), contextList, weightList);
            String[] contexts = new String[contextList.size()];
            float[] weights = new float[weightList.size()];
            int i = 0;
            for (String context : contextList) {
                contexts[i++] = context;
            }
            i = 0;
            for (Float weight : weightList) {
                weights[i++] = weight;
            }
            event = new Event(corpusEvent.getClassification(), contexts, weights);
        }
        return event;
    } catch (TalismaneException e) {
        LOG.error(e.getMessage(), e);
        throw new RuntimeException(e);
    }
}
Also used : TalismaneException(com.joliciel.talismane.TalismaneException) ArrayList(java.util.ArrayList) Event(opennlp.model.Event) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent)

Example 4 with ClassificationEvent

use of com.joliciel.talismane.machineLearning.ClassificationEvent in project talismane by joliciel-informatique.

the class PerceptronClassificationModelTrainer method prepareData.

void prepareData(ClassificationEventStream eventStream) throws TalismaneException {
    try {
        eventFile = File.createTempFile("events", "txt");
        eventFile.deleteOnExit();
        Writer eventWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(eventFile), "UTF-8"));
        while (eventStream.hasNext()) {
            ClassificationEvent corpusEvent = eventStream.next();
            PerceptronEvent event = new PerceptronEvent(corpusEvent, params);
            event.write(eventWriter);
        }
        eventWriter.flush();
        eventWriter.close();
        if (cutoff > 1) {
            params.initialiseCounts();
            File originalEventFile = eventFile;
            try (Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(new FileInputStream(eventFile), "UTF-8")))) {
                while (scanner.hasNextLine()) {
                    String line = scanner.nextLine();
                    PerceptronEvent event = new PerceptronEvent(line);
                    for (int featureIndex : event.getFeatureIndexes()) {
                        params.getFeatureCounts()[featureIndex]++;
                    }
                }
            }
            if (LOG.isDebugEnabled()) {
                int[] cutoffCounts = new int[21];
                for (int count : params.getFeatureCounts()) {
                    for (int i = 1; i < 21; i++) {
                        if (count >= i) {
                            cutoffCounts[i]++;
                        }
                    }
                }
                LOG.debug("Feature counts:");
                for (int i = 1; i < 21; i++) {
                    LOG.debug("Cutoff " + i + ": " + cutoffCounts[i]);
                }
            }
            PerceptronModelParameters cutoffParams = new PerceptronModelParameters();
            int[] newIndexes = cutoffParams.initialise(params, cutoff);
            decisionMaker = new PerceptronDecisionMaker(cutoffParams, this.scoring);
            try (Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(new FileInputStream(eventFile), "UTF-8")))) {
                eventFile = File.createTempFile("eventsCutoff", "txt");
                eventFile.deleteOnExit();
                try (Writer eventCutoffWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(eventFile), "UTF-8"))) {
                    while (scanner.hasNextLine()) {
                        String line = scanner.nextLine();
                        PerceptronEvent oldEvent = new PerceptronEvent(line);
                        PerceptronEvent newEvent = new PerceptronEvent(oldEvent, newIndexes);
                        newEvent.write(eventCutoffWriter);
                    }
                    eventCutoffWriter.flush();
                }
                params = cutoffParams;
                originalEventFile.delete();
            }
        }
        params.initialiseWeights();
        totalFeatureWeights = new double[params.getFeatureCount()][params.getOutcomeCount()];
    } catch (IOException e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}
Also used : Scanner(java.util.Scanner) InputStreamReader(java.io.InputStreamReader) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) BufferedWriter(java.io.BufferedWriter) FileOutputStream(java.io.FileOutputStream) BufferedReader(java.io.BufferedReader) OutputStreamWriter(java.io.OutputStreamWriter) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent) File(java.io.File) BufferedWriter(java.io.BufferedWriter) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter)

Example 5 with ClassificationEvent

use of com.joliciel.talismane.machineLearning.ClassificationEvent in project jochre by urieli.

the class JochreMergeEventStream method next.

@Override
public ClassificationEvent next() {
    ClassificationEvent event = null;
    if (this.hasNext()) {
        LOG.debug("next event, " + mergeCandidate.getFirstShape() + ", " + mergeCandidate.getSecondShape());
        List<FeatureResult<?>> featureResults = new ArrayList<>();
        // analyse features
        for (MergeFeature<?> feature : mergeFeatures) {
            RuntimeEnvironment env = new RuntimeEnvironment();
            FeatureResult<?> featureResult = feature.check(mergeCandidate, env);
            if (featureResult != null) {
                featureResults.add(featureResult);
                if (LOG.isTraceEnabled()) {
                    LOG.trace(featureResult.toString());
                }
            }
        }
        MergeOutcome outcome = MergeOutcome.DO_NOT_MERGE;
        boolean shouldMerge = false;
        if (mergeCandidate.getFirstShape().getLetter().startsWith("|")) {
            if (mergeCandidate.getSecondShape().getLetter().length() == 0 || mergeCandidate.getSecondShape().getLetter().endsWith("|"))
                shouldMerge = true;
        } else if (mergeCandidate.getSecondShape().getLetter().endsWith("|")) {
            if (mergeCandidate.getFirstShape().getLetter().length() == 0)
                shouldMerge = true;
        }
        if (shouldMerge)
            outcome = MergeOutcome.DO_MERGE;
        if (outcome.equals(MergeOutcome.DO_MERGE))
            yesCount++;
        else
            noCount++;
        LOG.debug("Outcome: " + outcome);
        event = new ClassificationEvent(featureResults, outcome.name());
        // set mergeCandidate to null so that hasNext can retrieve the next
        // one.
        this.mergeCandidate = null;
    }
    return event;
}
Also used : RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) ArrayList(java.util.ArrayList) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult)

Aggregations

ClassificationEvent (com.joliciel.talismane.machineLearning.ClassificationEvent)11 ArrayList (java.util.ArrayList)10 FeatureResult (com.joliciel.talismane.machineLearning.features.FeatureResult)9 RuntimeEnvironment (com.joliciel.talismane.machineLearning.features.RuntimeEnvironment)8 TalismaneException (com.joliciel.talismane.TalismaneException)6 IOException (java.io.IOException)6 List (java.util.List)5 ClassificationEventStream (com.joliciel.talismane.machineLearning.ClassificationEventStream)4 LinkedHashMap (java.util.LinkedHashMap)4 Map (java.util.Map)4 Set (java.util.Set)4 SortedSet (java.util.SortedSet)4 TreeSet (java.util.TreeSet)4 Collectors (java.util.stream.Collectors)4 Logger (org.slf4j.Logger)4 LoggerFactory (org.slf4j.LoggerFactory)4 Shape (com.joliciel.jochre.graphics.Shape)1 TalismaneSession (com.joliciel.talismane.TalismaneSession)1 Decision (com.joliciel.talismane.machineLearning.Decision)1 ParseConfigurationFeature (com.joliciel.talismane.parser.features.ParseConfigurationFeature)1