Search in sources :

Example 6 with TalismaneException

use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.

the class ParseEventStream method next.

@Override
public ClassificationEvent next() throws TalismaneException, IOException {
    ClassificationEvent event = null;
    if (this.hasNext()) {
        eventCount++;
        LOG.debug("Event " + eventCount + ": " + currentConfiguration.toString());
        List<FeatureResult<?>> parseFeatureResults = new ArrayList<FeatureResult<?>>();
        for (ParseConfigurationFeature<?> parseFeature : parseFeatures) {
            RuntimeEnvironment env = new RuntimeEnvironment();
            FeatureResult<?> featureResult = parseFeature.check(currentConfiguration, env);
            if (featureResult != null) {
                parseFeatureResults.add(featureResult);
            }
        }
        if (LOG.isTraceEnabled()) {
            SortedSet<String> featureResultSet = parseFeatureResults.stream().map(f -> f.toString()).collect(Collectors.toCollection(() -> new TreeSet<String>()));
            for (String featureResultString : featureResultSet) {
                LOG.trace(featureResultString);
            }
        }
        Transition transition = targetConfiguration.getTransitions().get(currentIndex);
        String classification = transition.getCode();
        event = new ClassificationEvent(parseFeatureResults, classification);
        // apply the transition and up the index
        currentConfiguration = new ParseConfiguration(currentConfiguration);
        transition.apply(currentConfiguration);
        currentIndex++;
        if (currentIndex == targetConfiguration.getTransitions().size()) {
            targetConfiguration = null;
        }
    }
    return event;
}
Also used : Logger(org.slf4j.Logger) SortedSet(java.util.SortedSet) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) IOException(java.io.IOException) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent) Collectors(java.util.stream.Collectors) TreeSet(java.util.TreeSet) TalismaneException(com.joliciel.talismane.TalismaneException) ParseConfigurationFeature(com.joliciel.talismane.parser.features.ParseConfigurationFeature) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) List(java.util.List) ClassificationEventStream(com.joliciel.talismane.machineLearning.ClassificationEventStream) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult) Map(java.util.Map) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) ArrayList(java.util.ArrayList) TreeSet(java.util.TreeSet) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult)

Example 7 with TalismaneException

use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.

the class ParserRegexBasedCorpusReader method processSentence.

@Override
protected void processSentence(Sentence sentence, List<CorpusLine> corpusLines) throws TalismaneException, IOException {
    try {
        super.processSentence(sentence, corpusLines);
        PosTaggedToken rootToken = posTagSequence.prependRoot();
        idTokenMap.put(0, rootToken);
        TransitionSystem transitionSystem = TalismaneSession.get(sessionId).getTransitionSystem();
        Set<DependencyArc> dependencies = new TreeSet<>();
        for (CorpusLine dataLine : corpusLines) {
            int headIndex = 0;
            if (dataLine.hasElement(CorpusElement.GOVERNOR))
                headIndex = Integer.parseInt(dataLine.getElement(CorpusElement.GOVERNOR));
            PosTaggedToken head = idTokenMap.get(headIndex);
            PosTaggedToken dependent = idTokenMap.get(dataLine.getIndex());
            String dependencyLabel = dataLine.getElement(CorpusElement.LABEL);
            if (transitionSystem.getDependencyLabels().size() > 1) {
                if (dependencyLabel.length() > 0 && !transitionSystem.getDependencyLabels().contains(dependencyLabel)) {
                    throw new UnknownDependencyLabelException((this.getCurrentFile() == null ? "" : this.getCurrentFile().getPath()), dataLine.getLineNumber(), dependencyLabel);
                }
                String nonProjectiveLabel = dataLine.getElement(CorpusElement.NON_PROJ_LABEL);
                if (nonProjectiveLabel != null && nonProjectiveLabel.length() > 0 && !transitionSystem.getDependencyLabels().contains(nonProjectiveLabel)) {
                    throw new UnknownDependencyLabelException((this.getCurrentFile() == null ? "" : this.getCurrentFile().getPath()), dataLine.getLineNumber(), nonProjectiveLabel);
                }
            }
            DependencyArc arc = new DependencyArc(head, dependent, dependencyLabel);
            if (LOG.isTraceEnabled())
                LOG.trace(arc.toString());
            dependencies.add(arc);
            if (dataLine.hasElement(CorpusElement.DEP_COMMENT))
                arc.setComment(dataLine.getElement(CorpusElement.DEP_COMMENT));
        }
        configuration = new ParseConfiguration(posTagSequence);
        if (this.predictTransitions) {
            transitionSystem.predictTransitions(configuration, dependencies);
        } else {
            for (DependencyArc arc : dependencies) {
                configuration.addDependency(arc.getHead(), arc.getDependent(), arc.getLabel(), null);
            }
        }
        // if there are any
        if (this.getCorpusLineReader().hasPlaceholder(CorpusElement.NON_PROJ_GOVERNOR)) {
            Set<DependencyArc> nonProjDeps = new TreeSet<>();
            if (LOG.isTraceEnabled())
                LOG.trace("Non projective dependencies: ");
            for (CorpusLine dataLine : corpusLines) {
                int headIndex = 0;
                if (dataLine.hasElement(CorpusElement.NON_PROJ_GOVERNOR))
                    headIndex = Integer.parseInt(dataLine.getElement(CorpusElement.NON_PROJ_GOVERNOR));
                PosTaggedToken head = idTokenMap.get(headIndex);
                PosTaggedToken dependent = idTokenMap.get(dataLine.getIndex());
                DependencyArc nonProjArc = new DependencyArc(head, dependent, dataLine.getElement(CorpusElement.NON_PROJ_LABEL));
                if (LOG.isTraceEnabled())
                    LOG.trace(nonProjArc.toString());
                nonProjDeps.add(nonProjArc);
                if (dataLine.hasElement(CorpusElement.DEP_COMMENT))
                    nonProjArc.setComment(dataLine.getElement(CorpusElement.DEP_COMMENT));
            }
            for (DependencyArc nonProjArc : nonProjDeps) {
                configuration.addManualNonProjectiveDependency(nonProjArc.getHead(), nonProjArc.getDependent(), nonProjArc.getLabel());
            }
        }
    } catch (TalismaneException e) {
        this.clearSentence();
        throw e;
    }
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) TalismaneException(com.joliciel.talismane.TalismaneException) TreeSet(java.util.TreeSet) CorpusLine(com.joliciel.talismane.corpus.CorpusLine)

Example 8 with TalismaneException

use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.

the class SentenceDetector method detectSentences.

/**
 * Detect sentences within an annotated text. Sentences are added in the form
 * of an Annotation around a {@link SentenceBoundary}, with the start position
 * (relative to the start of the annotated text) at the start of the sentence
 * and the end position immediately after the end of the sentence. <br>
 * <br>
 * Sentence boundaries will not be detected within any annotation of type
 * {@link RawTextNoSentenceBreakMarker}, nor will they be detected before or
 * after the {@link AnnotatedText#getAnalysisStart()} and
 * {@link AnnotatedText#getAnalysisEnd()} respectively. <br>
 * <br>
 * If the text contained existing {@link SentenceBoundary} annotations before
 * analysis start, the first sentence will begin where the last existing
 * annotation ended. Otherwise, the first boundary will begin at position 0.
 * <br>
 * <br>
 * If the text's analysis end is equal to the text length, it is assumed that
 * the text end is a sentence boundary. In this case, an additional sentence
 * is added starting at the final detected boundary and ending at text end.
 *
 * @param text
 *          the annotated text in which we need to detect sentences.
 * @return in addition to the annotations added, we return a List of integers
 *         marking the end position of each sentence boundary.
 */
public List<Integer> detectSentences(AnnotatedText text, String... labels) throws TalismaneException {
    LOG.debug("detectSentences");
    List<Annotation<RawTextNoSentenceBreakMarker>> noSentenceBreakMarkers = text.getAnnotations(RawTextNoSentenceBreakMarker.class);
    Matcher matcher = possibleBoundaryPattern.matcher(text.getText());
    List<Integer> possibleBoundaries = new ArrayList<>();
    while (matcher.find()) {
        if (matcher.start() >= text.getAnalysisStart() && matcher.start() < text.getAnalysisEnd()) {
            boolean noSentences = false;
            int position = matcher.start();
            for (Annotation<RawTextNoSentenceBreakMarker> noSentenceBreakMarker : noSentenceBreakMarkers) {
                if (noSentenceBreakMarker.getStart() <= position && position < noSentenceBreakMarker.getEnd()) {
                    noSentences = true;
                    break;
                }
            }
            if (!noSentences)
                possibleBoundaries.add(position);
        }
    }
    // collect all deterministic sentence boundaries
    List<Annotation<RawTextSentenceBreakMarker>> sentenceBreakMarkers = text.getAnnotations(RawTextSentenceBreakMarker.class);
    Set<Integer> guessedBoundaries = new TreeSet<>(sentenceBreakMarkers.stream().filter(f -> f.getEnd() >= text.getAnalysisStart()).map(f -> f.getEnd()).collect(Collectors.toList()));
    // Share one token sequence for all possible boundaries, to avoid tokenising
    // multiple times
    Sentence sentence = new Sentence(text.getText(), sessionId);
    TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
    List<PossibleSentenceBoundary> boundaries = new ArrayList<>();
    for (int possibleBoundary : possibleBoundaries) {
        PossibleSentenceBoundary boundary = new PossibleSentenceBoundary(tokenSequence, possibleBoundary);
        if (LOG.isTraceEnabled()) {
            LOG.trace("Testing boundary: " + boundary);
            LOG.trace(" at position: " + possibleBoundary);
        }
        List<FeatureResult<?>> featureResults = new ArrayList<>();
        for (SentenceDetectorFeature<?> feature : features) {
            RuntimeEnvironment env = new RuntimeEnvironment();
            FeatureResult<?> featureResult = feature.check(boundary, env);
            if (featureResult != null)
                featureResults.add(featureResult);
        }
        if (LOG.isTraceEnabled()) {
            SortedSet<String> featureResultSet = featureResults.stream().map(f -> f.toString()).collect(Collectors.toCollection(() -> new TreeSet<String>()));
            for (String featureResultString : featureResultSet) {
                LOG.trace(featureResultString);
            }
        }
        List<Decision> decisions = this.decisionMaker.decide(featureResults);
        if (LOG.isTraceEnabled()) {
            for (Decision decision : decisions) {
                LOG.trace(decision.getOutcome() + ": " + decision.getProbability());
            }
        }
        if (decisions.get(0).getOutcome().equals(SentenceDetectorOutcome.IS_BOUNDARY.name())) {
            if (LOG.isTraceEnabled()) {
                LOG.trace("Adding boundary: " + possibleBoundary + 1);
            }
            guessedBoundaries.add(possibleBoundary + 1);
            boundaries.add(boundary);
        }
    }
    if (LOG.isTraceEnabled()) {
        LOG.trace("context: " + text.getText().toString().replace('\n', '¶').replace('\r', '¶'));
        for (PossibleSentenceBoundary boundary : boundaries) LOG.trace("boundary: " + boundary.toString());
    }
    if (LOG.isDebugEnabled())
        LOG.debug("guessedBoundaries : " + guessedBoundaries.toString());
    List<Annotation<SentenceBoundary>> newBoundaries = new ArrayList<>();
    int lastBoundary = 0;
    List<Annotation<SentenceBoundary>> existingBoundaries = text.getAnnotations(SentenceBoundary.class);
    if (existingBoundaries.size() > 0) {
        lastBoundary = existingBoundaries.get(existingBoundaries.size() - 1).getEnd();
    }
    // advance boundary start until a non space character is encountered
    while (lastBoundary < text.getAnalysisEnd() && Character.isWhitespace(text.getText().charAt(lastBoundary))) {
        lastBoundary++;
    }
    for (int guessedBoundary : guessedBoundaries) {
        if (guessedBoundary > lastBoundary) {
            Annotation<SentenceBoundary> sentenceBoundary = new Annotation<>(lastBoundary, guessedBoundary, new SentenceBoundary(), labels);
            newBoundaries.add(sentenceBoundary);
            if (LOG.isTraceEnabled()) {
                LOG.trace("Added boundary: " + sentenceBoundary);
            }
            lastBoundary = guessedBoundary;
        }
    }
    if (text.getAnalysisEnd() == text.getText().length()) {
        if (text.getAnalysisEnd() > lastBoundary) {
            Annotation<SentenceBoundary> sentenceBoundary = new Annotation<>(lastBoundary, text.getAnalysisEnd(), new SentenceBoundary(), labels);
            newBoundaries.add(sentenceBoundary);
            if (LOG.isTraceEnabled()) {
                LOG.trace("Added final boundary: " + sentenceBoundary);
            }
        }
    }
    text.addAnnotations(newBoundaries);
    return new ArrayList<>(guessedBoundaries);
}
Also used : ZipInputStream(java.util.zip.ZipInputStream) SortedSet(java.util.SortedSet) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) TokenSequence(com.joliciel.talismane.tokeniser.TokenSequence) MachineLearningModelFactory(com.joliciel.talismane.machineLearning.MachineLearningModelFactory) TreeSet(java.util.TreeSet) TalismaneException(com.joliciel.talismane.TalismaneException) RawTextNoSentenceBreakMarker(com.joliciel.talismane.rawText.RawTextMarker.RawTextNoSentenceBreakMarker) ArrayList(java.util.ArrayList) ClassificationModel(com.joliciel.talismane.machineLearning.ClassificationModel) HashSet(java.util.HashSet) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) SentenceDetectorFeatureParser(com.joliciel.talismane.sentenceDetector.features.SentenceDetectorFeatureParser) Matcher(java.util.regex.Matcher) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult) Map(java.util.Map) ConfigUtils(com.joliciel.talismane.utils.ConfigUtils) ConfigFactory(com.typesafe.config.ConfigFactory) ExternalResourceFinder(com.joliciel.talismane.machineLearning.ExternalResourceFinder) AnnotatedText(com.joliciel.talismane.AnnotatedText) ExternalResource(com.joliciel.talismane.machineLearning.ExternalResource) SentenceDetectorFeature(com.joliciel.talismane.sentenceDetector.features.SentenceDetectorFeature) DecisionMaker(com.joliciel.talismane.machineLearning.DecisionMaker) Logger(org.slf4j.Logger) Config(com.typesafe.config.Config) Collection(java.util.Collection) Set(java.util.Set) IOException(java.io.IOException) Decision(com.joliciel.talismane.machineLearning.Decision) Collectors(java.util.stream.Collectors) RawTextSentenceBreakMarker(com.joliciel.talismane.rawText.RawTextMarker.RawTextSentenceBreakMarker) List(java.util.List) Annotation(com.joliciel.talismane.Annotation) Annotator(com.joliciel.talismane.Annotator) Pattern(java.util.regex.Pattern) Sentence(com.joliciel.talismane.rawText.Sentence) InputStream(java.io.InputStream) Matcher(java.util.regex.Matcher) ArrayList(java.util.ArrayList) RawTextNoSentenceBreakMarker(com.joliciel.talismane.rawText.RawTextMarker.RawTextNoSentenceBreakMarker) TreeSet(java.util.TreeSet) Sentence(com.joliciel.talismane.rawText.Sentence) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) Annotation(com.joliciel.talismane.Annotation) Decision(com.joliciel.talismane.machineLearning.Decision) TokenSequence(com.joliciel.talismane.tokeniser.TokenSequence) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult)

Example 9 with TalismaneException

use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.

the class SentenceDetectorEventStream method next.

@Override
public ClassificationEvent next() throws TalismaneException, IOException {
    ClassificationEvent event = null;
    if (this.hasNext()) {
        int possibleBoundary = possibleBoundaries.get(currentIndex++);
        String moreText = "";
        int sentenceIndex = 0;
        while (moreText.length() < minCharactersAfterBoundary) {
            String nextSentence = "";
            if (sentenceIndex < sentences.size()) {
                nextSentence = sentences.get(sentenceIndex);
            } else if (corpusReader.hasNextSentence()) {
                nextSentence = corpusReader.nextSentence().getText().toString();
                sentences.add(nextSentence);
            } else {
                break;
            }
            if (nextSentence.startsWith(" ") || nextSentence.startsWith("\n"))
                moreText += sentences.get(sentenceIndex);
            else
                moreText += " " + sentences.get(sentenceIndex);
            sentenceIndex++;
        }
        String text = previousSentence + currentSentence + moreText;
        PossibleSentenceBoundary boundary = new PossibleSentenceBoundary(text, possibleBoundary, sessionId);
        LOG.debug("next event, boundary: " + boundary);
        List<FeatureResult<?>> featureResults = new ArrayList<FeatureResult<?>>();
        for (SentenceDetectorFeature<?> feature : features) {
            RuntimeEnvironment env = new RuntimeEnvironment();
            FeatureResult<?> featureResult = feature.check(boundary, env);
            if (featureResult != null)
                featureResults.add(featureResult);
        }
        if (LOG.isTraceEnabled()) {
            SortedSet<String> featureResultSet = featureResults.stream().map(f -> f.toString()).collect(Collectors.toCollection(() -> new TreeSet<String>()));
            for (String featureResultString : featureResultSet) {
                LOG.trace(featureResultString);
            }
        }
        String classification = SentenceDetectorOutcome.IS_NOT_BOUNDARY.name();
        if (possibleBoundary == realBoundary)
            classification = SentenceDetectorOutcome.IS_BOUNDARY.name();
        event = new ClassificationEvent(featureResults, classification);
        if (currentIndex == possibleBoundaries.size()) {
            if (currentSentence.endsWith(" "))
                previousSentence = currentSentence;
            else
                previousSentence = currentSentence + " ";
            currentSentence = null;
        }
    }
    return event;
}
Also used : Logger(org.slf4j.Logger) SortedSet(java.util.SortedSet) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) IOException(java.io.IOException) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent) Collectors(java.util.stream.Collectors) TreeSet(java.util.TreeSet) TalismaneException(com.joliciel.talismane.TalismaneException) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) List(java.util.List) ClassificationEventStream(com.joliciel.talismane.machineLearning.ClassificationEventStream) Matcher(java.util.regex.Matcher) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult) Map(java.util.Map) Pattern(java.util.regex.Pattern) LinkedList(java.util.LinkedList) SentenceDetectorFeature(com.joliciel.talismane.sentenceDetector.features.SentenceDetectorFeature) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) ArrayList(java.util.ArrayList) TreeSet(java.util.TreeSet) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult)

Example 10 with TalismaneException

use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.

the class RawTextAnnotatorFactory method getAnnotator.

/**
 * @param descriptor
 * @param blockSize
 * @return
 * @throws TalismaneException
 *           if a descriptor is incorrectly configured
 */
public RawTextAnnotator getAnnotator(String descriptor, int blockSize) throws TalismaneException {
    RawTextAnnotator filter = null;
    List<Class<? extends RawTextAnnotator>> classes = new ArrayListNoNulls<Class<? extends RawTextAnnotator>>();
    classes.add(DuplicateWhiteSpaceFilter.class);
    classes.add(NewlineEndOfSentenceMarker.class);
    classes.add(NewlineSpaceMarker.class);
    String[] parts = descriptor.split("\t");
    String filterName = parts[0];
    // add equality to RegexMarkerFilter for historical reasons
    if (filterName.equals("RegexMarkerFilter") || filterName.equals(RawTextRegexAnnotator.class.getSimpleName())) {
        String[] filterTypeStrings = parts[1].split(",");
        List<RawTextMarkType> filterTypes = new ArrayListNoNulls<RawTextMarkType>();
        for (String filterTypeString : filterTypeStrings) {
            filterTypes.add(RawTextMarkType.valueOf(filterTypeString));
        }
        boolean needsReplacement = false;
        boolean needsTag = false;
        int minParams = 3;
        if (filterTypes.contains(RawTextMarkType.REPLACE)) {
            needsReplacement = true;
            minParams = 4;
        } else if (filterTypes.contains(RawTextMarkType.TAG)) {
            needsTag = true;
            minParams = 4;
        }
        if (parts.length == minParams + 1) {
            filter = new RawTextRegexAnnotator(filterTypes, parts[2], Integer.parseInt(parts[3]), blockSize);
            if (needsReplacement)
                filter.setReplacement(parts[4]);
            if (needsTag) {
                if (parts[4].indexOf('=') >= 0) {
                    String attribute = parts[4].substring(0, parts[4].indexOf('='));
                    String value = parts[4].substring(parts[4].indexOf('=') + 1);
                    filter.setAttribute(new StringAttribute(attribute, value));
                } else {
                    filter.setAttribute(new StringAttribute(parts[4], ""));
                }
            }
        } else if (parts.length == minParams) {
            filter = new RawTextRegexAnnotator(filterTypes, parts[2], 0, blockSize);
            if (needsReplacement)
                filter.setReplacement(parts[3]);
            if (needsTag) {
                if (parts[3].indexOf('=') >= 0) {
                    String attribute = parts[3].substring(0, parts[3].indexOf('='));
                    String value = parts[3].substring(parts[3].indexOf('=') + 1);
                    filter.setAttribute(new StringAttribute(attribute, value));
                } else {
                    filter.setAttribute(new StringAttribute(parts[4], ""));
                }
            }
        } else {
            throw new TalismaneException("Wrong number of arguments for " + RawTextRegexAnnotator.class.getSimpleName() + ". Expected " + minParams + " or " + (minParams + 1) + ", but was " + parts.length);
        }
    } else {
        for (Class<? extends RawTextAnnotator> clazz : classes) {
            if (filterName.equals(clazz.getSimpleName())) {
                try {
                    Constructor<? extends RawTextAnnotator> constructor = clazz.getConstructor(Integer.class);
                    filter = constructor.newInstance(blockSize);
                } catch (ReflectiveOperationException e) {
                    throw new TalismaneException("Problem building class: " + filterName, e);
                }
            }
        }
        if (filter == null)
            throw new TalismaneException("Unknown text filter class: " + filterName);
    }
    return filter;
}
Also used : TalismaneException(com.joliciel.talismane.TalismaneException) StringAttribute(com.joliciel.talismane.tokeniser.StringAttribute) ArrayListNoNulls(com.joliciel.talismane.utils.ArrayListNoNulls)

Aggregations

TalismaneException (com.joliciel.talismane.TalismaneException)47 ArrayList (java.util.ArrayList)27 Config (com.typesafe.config.Config)14 File (java.io.File)11 List (java.util.List)10 TreeSet (java.util.TreeSet)10 FeatureResult (com.joliciel.talismane.machineLearning.features.FeatureResult)9 IOException (java.io.IOException)9 HashMap (java.util.HashMap)9 Set (java.util.Set)9 Decision (com.joliciel.talismane.machineLearning.Decision)8 RuntimeEnvironment (com.joliciel.talismane.machineLearning.features.RuntimeEnvironment)8 PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)8 Token (com.joliciel.talismane.tokeniser.Token)8 Map (java.util.Map)8 SortedSet (java.util.SortedSet)8 Collectors (java.util.stream.Collectors)8 Logger (org.slf4j.Logger)8 LoggerFactory (org.slf4j.LoggerFactory)8 Sentence (com.joliciel.talismane.rawText.Sentence)7