Search in sources :

Example 16 with ParseConfiguration

use of com.joliciel.talismane.parser.ParseConfiguration in project talismane by joliciel-informatique.

the class StackSearchFeature method check.

@Override
public FeatureResult<PosTaggedTokenWrapper> check(ParseConfigurationWrapper wrapper, RuntimeEnvironment env) throws TalismaneException {
    ParseConfiguration configuration = wrapper.getParseConfiguration();
    int index = 1;
    if (indexFeature != null) {
        FeatureResult<Integer> indexResult = indexFeature.check(wrapper, env);
        if (indexResult == null)
            return null;
        index = indexResult.getOutcome();
    }
    Iterator<PosTaggedToken> stackIterator = configuration.getStack().iterator();
    ParseConfigurationAddress parseConfigurationAddress = new ParseConfigurationAddress(env);
    parseConfigurationAddress.setParseConfiguration(configuration);
    int i = -1;
    PosTaggedToken resultToken = null;
    while (stackIterator.hasNext()) {
        PosTaggedToken token = stackIterator.next();
        i++;
        if (i < index)
            continue;
        parseConfigurationAddress.setPosTaggedToken(token);
        FeatureResult<Boolean> criterionResult = criterionFeature.check(parseConfigurationAddress, env);
        if (criterionResult != null) {
            boolean criterion = criterionResult.getOutcome();
            if (criterion) {
                resultToken = token;
                break;
            }
        }
    }
    FeatureResult<PosTaggedTokenWrapper> featureResult = null;
    if (resultToken != null)
        featureResult = this.generateResult(resultToken);
    return featureResult;
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) PosTaggedTokenWrapper(com.joliciel.talismane.posTagger.features.PosTaggedTokenWrapper) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration)

Example 17 with ParseConfiguration

use of com.joliciel.talismane.parser.ParseConfiguration in project talismane by joliciel-informatique.

the class Talismane method analyse.

/**
 * Analyse the data provided by this reader, as specified by the
 * configuration.
 *
 * @param reader
 * @throws IOException
 * @throws ReflectiveOperationException
 * @throws TalismaneException
 *           if it's impossible to read a sentence from an annotated corpus
 */
public void analyse(Reader reader) throws IOException, ReflectiveOperationException, TalismaneException {
    long startTime = System.currentTimeMillis();
    try {
        TokeniserAnnotatedCorpusReader tokenCorpusReader = null;
        PosTagAnnotatedCorpusReader posTagCorpusReader = null;
        if (this.startModule.equals(Module.posTagger)) {
            tokenCorpusReader = TokeniserAnnotatedCorpusReader.getCorpusReader(reader, config.getConfig("talismane.core." + sessionId + ".tokeniser.input"), sessionId);
        }
        if (this.startModule.equals(Module.parser)) {
            posTagCorpusReader = PosTagAnnotatedCorpusReader.getCorpusReader(reader, config.getConfig("talismane.core." + sessionId + ".pos-tagger.input"), sessionId);
        }
        LinkedList<String> textSegments = new LinkedList<String>();
        LinkedList<Sentence> sentences = new LinkedList<Sentence>();
        TokenSequence tokenSequence = null;
        PosTagSequence posTagSequence = null;
        StringBuilder stringBuilder = new StringBuilder();
        boolean finished = false;
        int sentenceCount = 0;
        CurrentFileProvider currentFileProvider = reader instanceof CurrentFileProvider ? (CurrentFileProvider) reader : null;
        RollingTextBlock rollingTextBlock = new RollingTextBlock(this.processByDefault, currentFileProvider, sessionId);
        int endBlockCharacterCount = 0;
        URI currentURI = null;
        File currentFile = null;
        while (!finished) {
            if (this.startModule.equals(Module.sentenceDetector) || this.startModule.equals(Module.tokeniser)) {
                // Note SentenceDetector and Tokeniser start modules treated
                // identically,
                // except that for SentenceDetector we apply a probabilistic
                // sentence detector
                // whereas for Tokeniser we assume all sentence breaks are
                // marked by filters
                // read characters from the reader, one at a time
                char c;
                int r = -1;
                try {
                    r = reader.read();
                } catch (IOException e) {
                    LogUtils.logError(LOG, e);
                }
                if (r == -1) {
                    finished = true;
                    c = '\n';
                } else {
                    c = (char) r;
                }
                // Jump out if we have 3 consecutive end-block characters.
                if (c == TalismaneSession.get(sessionId).getEndBlockCharacter()) {
                    endBlockCharacterCount++;
                    if (endBlockCharacterCount == 3) {
                        LOG.info("Three consecutive end-block characters. Exiting.");
                        finished = true;
                    }
                } else {
                    endBlockCharacterCount = 0;
                }
                // have sentence detector
                if (finished || (Character.isWhitespace(c) && c != '\r' && c != '\n' && stringBuilder.length() > TalismaneSession.get(sessionId).getBlockSize()) || c == TalismaneSession.get(sessionId).getEndBlockCharacter()) {
                    if (c == TalismaneSession.get(sessionId).getEndBlockCharacter())
                        stringBuilder.append(c);
                    if (stringBuilder.length() > 0) {
                        String textSegment = stringBuilder.toString();
                        stringBuilder = new StringBuilder();
                        textSegments.add(textSegment);
                    }
                    // is the current block > 0 characters?
                    if (c == TalismaneSession.get(sessionId).getEndBlockCharacter()) {
                        textSegments.addLast("");
                    }
                }
                if (finished) {
                    if (stringBuilder.length() > 0) {
                        textSegments.addLast(stringBuilder.toString());
                        stringBuilder = new StringBuilder();
                    }
                    // add three final text segments to roll everything
                    // through processing
                    textSegments.addLast("");
                    textSegments.addLast("");
                    textSegments.addLast("");
                }
                if (c != TalismaneSession.get(sessionId).getEndBlockCharacter())
                    stringBuilder.append(c);
                while (textSegments.size() > 0) {
                    // roll in a new block 4, and roll the other blocks
                    // leftwards
                    String nextText = textSegments.removeFirst();
                    rollingTextBlock = rollingTextBlock.roll(nextText);
                    // annotate block 3 with raw text filters
                    AnnotatedText rawTextBlock = rollingTextBlock.getRawTextBlock();
                    for (RawTextAnnotator textAnnotator : TalismaneSession.get(sessionId).getTextAnnotators()) {
                        textAnnotator.annotate(rawTextBlock);
                    }
                    // detect sentences in block 2 using the sentence
                    // detector
                    AnnotatedText processedText = rollingTextBlock.getProcessedText();
                    if (LOG.isTraceEnabled()) {
                        LOG.trace("processedText: " + processedText.getText().toString().replace('\n', '¶').replace('\r', '¶'));
                    }
                    if (this.startModule.equals(Module.sentenceDetector)) {
                        sentenceDetector.detectSentences(processedText);
                    }
                    // get the sentences detected in block 2
                    List<Sentence> theSentences = rollingTextBlock.getDetectedSentences();
                    for (Sentence sentence : theSentences) {
                        sentences.add(sentence);
                        sentenceCount++;
                    }
                    if (this.sentenceCount > 0 && sentenceCount >= this.sentenceCount) {
                        finished = true;
                    }
                }
            // we have at least one text segment to process
            } else if (this.startModule.equals(Module.posTagger)) {
                if (tokenCorpusReader.hasNextSentence()) {
                    tokenSequence = tokenCorpusReader.nextTokenSequence();
                } else {
                    tokenSequence = null;
                    finished = true;
                }
            } else if (this.startModule.equals(Module.parser)) {
                if (posTagCorpusReader.hasNextSentence()) {
                    posTagSequence = posTagCorpusReader.nextPosTagSequence();
                } else {
                    posTagSequence = null;
                    finished = true;
                }
            }
            // which start module?
            boolean needToProcess = false;
            if (this.startModule.equals(Module.sentenceDetector) || this.startModule.equals(Module.tokeniser))
                needToProcess = !sentences.isEmpty();
            else if (this.startModule.equals(Module.posTagger))
                needToProcess = tokenSequence != null;
            else if (this.startModule.equals(Module.parser))
                needToProcess = posTagSequence != null;
            while (needToProcess) {
                Sentence sentence = null;
                if (this.startModule.compareTo(Module.tokeniser) <= 0 && this.endModule.compareTo(Module.sentenceDetector) >= 0) {
                    sentence = sentences.poll();
                    LOG.debug("Sentence: " + sentence);
                    for (SentenceAnnotator annotator : TalismaneSession.get(sessionId).getSentenceAnnotators()) annotator.annotate(sentence);
                    if (sentence.getFileURI() != null && !sentence.getFileURI().equals(currentURI)) {
                        currentURI = sentence.getFileURI();
                        currentFile = sentence.getFile();
                        LOG.debug("Setting current file to " + currentFile.getPath());
                        if (writer instanceof CurrentFileObserver)
                            ((CurrentFileObserver) writer).onNextFile(currentFile);
                        for (SentenceProcessor processor : sentenceProcessors) if (processor instanceof CurrentFileObserver)
                            ((CurrentFileObserver) processor).onNextFile(currentFile);
                        for (TokenSequenceProcessor processor : tokenSequenceProcessors) if (processor instanceof CurrentFileObserver)
                            ((CurrentFileObserver) processor).onNextFile(currentFile);
                        for (PosTagSequenceProcessor processor : posTagSequenceProcessors) if (processor instanceof CurrentFileObserver)
                            ((CurrentFileObserver) processor).onNextFile(currentFile);
                        for (ParseConfigurationProcessor processor : parseConfigurationProcessors) if (processor instanceof CurrentFileObserver)
                            ((CurrentFileObserver) processor).onNextFile(currentFile);
                    }
                    if (sentence.getLeftoverOriginalText().length() > 0) {
                        writer.append(sentence.getLeftoverOriginalText() + "\n");
                    }
                    for (SentenceProcessor sentenceProcessor : sentenceProcessors) {
                        sentenceProcessor.onNextSentence(sentence);
                    }
                }
                // need to read next sentence
                List<TokenSequence> tokenSequences = null;
                if (this.needsTokeniser()) {
                    tokenSequences = tokeniser.tokenise(sentence);
                    tokenSequence = tokenSequences.get(0);
                    for (TokenSequenceProcessor tokenSequenceProcessor : tokenSequenceProcessors) {
                        tokenSequenceProcessor.onNextTokenSequence(tokenSequence);
                    }
                }
                // need to tokenise ?
                List<PosTagSequence> posTagSequences = null;
                if (this.needsPosTagger()) {
                    posTagSequence = null;
                    if (tokenSequences == null) {
                        tokenSequences = new ArrayListNoNulls<>();
                        tokenSequences.add(tokenSequence);
                    }
                    if (posTagger instanceof NonDeterministicPosTagger) {
                        NonDeterministicPosTagger nonDeterministicPosTagger = (NonDeterministicPosTagger) posTagger;
                        posTagSequences = nonDeterministicPosTagger.tagSentence(tokenSequences);
                        posTagSequence = posTagSequences.get(0);
                    } else {
                        posTagSequence = posTagger.tagSentence(tokenSequence);
                    }
                    for (PosTagSequenceProcessor posTagSequenceProcessor : this.posTagSequenceProcessors) {
                        posTagSequenceProcessor.onNextPosTagSequence(posTagSequence);
                    }
                    tokenSequence = null;
                }
                if (this.needsParser()) {
                    if (posTagSequences == null) {
                        posTagSequences = new ArrayListNoNulls<>();
                        posTagSequences.add(posTagSequence);
                    }
                    ParseConfiguration parseConfiguration = null;
                    List<ParseConfiguration> parseConfigurations = null;
                    try {
                        if (parser instanceof NonDeterministicParser) {
                            NonDeterministicParser nonDeterministicParser = (NonDeterministicParser) parser;
                            parseConfigurations = nonDeterministicParser.parseSentence(posTagSequences);
                            parseConfiguration = parseConfigurations.get(0);
                        } else {
                            parseConfiguration = parser.parseSentence(posTagSequence);
                        }
                        for (ParseConfigurationProcessor parseConfigurationProcessor : this.parseConfigurationProcessors) {
                            parseConfigurationProcessor.onNextParseConfiguration(parseConfiguration);
                        }
                    } catch (Exception e) {
                        LogUtils.logError(LOG, e);
                        if (stopOnError)
                            throw new RuntimeException(e);
                    }
                    posTagSequence = null;
                }
                if (this.startModule.equals(Module.sentenceDetector) || this.startModule.equals(Module.tokeniser))
                    needToProcess = !sentences.isEmpty();
                else if (this.startModule.equals(Module.posTagger))
                    needToProcess = tokenSequence != null;
                else if (this.startModule.equals(Module.parser))
                    needToProcess = posTagSequence != null;
            }
        // next sentence
        }
        // Check if there's any leftover output to output!
        if (rollingTextBlock.getLeftoverOriginalText().length() > 0)
            writer.append(rollingTextBlock.getLeftoverOriginalText());
    } finally {
        IOException exception = null;
        try {
            reader.close();
            writer.flush();
        } catch (IOException e) {
            LogUtils.logError(LOG, e);
            exception = e;
        }
        for (SentenceProcessor processor : this.sentenceProcessors) try {
            processor.close();
        } catch (IOException e) {
            LogUtils.logError(LOG, e);
            exception = e;
        }
        for (TokenSequenceProcessor processor : this.tokenSequenceProcessors) try {
            processor.close();
        } catch (IOException e) {
            LogUtils.logError(LOG, e);
            exception = e;
        }
        for (PosTagSequenceProcessor processor : this.posTagSequenceProcessors) {
            try {
                processor.onCompleteAnalysis();
                processor.close();
            } catch (IOException e) {
                LogUtils.logError(LOG, e);
                exception = e;
            }
        }
        for (ParseConfigurationProcessor processor : this.parseConfigurationProcessors) {
            try {
                processor.onCompleteParse();
                processor.close();
            } catch (IOException e) {
                LogUtils.logError(LOG, e);
                exception = e;
            }
        }
        long endTime = System.currentTimeMillis();
        long totalTime = endTime - startTime;
        LOG.debug("Total time for Talismane.process(): " + totalTime);
        try {
            writer.close();
        } catch (IOException e) {
            LogUtils.logError(LOG, e);
            exception = e;
        }
        if (exception != null)
            throw exception;
    }
}
Also used : TokenSequenceProcessor(com.joliciel.talismane.tokeniser.output.TokenSequenceProcessor) SentenceProcessor(com.joliciel.talismane.sentenceDetector.SentenceProcessor) RawTextAnnotator(com.joliciel.talismane.rawText.RawTextAnnotator) URI(java.net.URI) NonDeterministicParser(com.joliciel.talismane.parser.NonDeterministicParser) NonDeterministicPosTagger(com.joliciel.talismane.posTagger.NonDeterministicPosTagger) Sentence(com.joliciel.talismane.rawText.Sentence) ParseConfigurationProcessor(com.joliciel.talismane.parser.output.ParseConfigurationProcessor) PosTagAnnotatedCorpusReader(com.joliciel.talismane.posTagger.PosTagAnnotatedCorpusReader) RollingTextBlock(com.joliciel.talismane.rawText.RollingTextBlock) IOException(java.io.IOException) PosTagSequenceProcessor(com.joliciel.talismane.posTagger.output.PosTagSequenceProcessor) LinkedList(java.util.LinkedList) IOException(java.io.IOException) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration) CurrentFileProvider(com.joliciel.talismane.utils.io.CurrentFileProvider) TokeniserAnnotatedCorpusReader(com.joliciel.talismane.tokeniser.TokeniserAnnotatedCorpusReader) SentenceAnnotator(com.joliciel.talismane.sentenceAnnotators.SentenceAnnotator) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) CurrentFileObserver(com.joliciel.talismane.utils.io.CurrentFileObserver) TokenSequence(com.joliciel.talismane.tokeniser.TokenSequence) File(java.io.File)

Example 18 with ParseConfiguration

use of com.joliciel.talismane.parser.ParseConfiguration in project talismane by joliciel-informatique.

the class ParseOutputRewriterTest method testGetCorpusLines.

@Test
public void testGetCorpusLines() throws Exception {
    TalismaneSession.clearSessions();
    System.setProperty("config.file", "src/test/resources/testWithOutputRules.conf");
    ConfigFactory.invalidateCaches();
    final Config config = ConfigFactory.load();
    final String sessionId = "test";
    String input = "";
    input += "1\tAu\tau\tADP+DET\t0\troot\n";
    input += "2\tsein\tsein\tNOUN\t1\tfixed\n";
    input += "3\tmême\tmême\tADV\t1\tadvmod\n";
    input += "4\tdu\tdu\tADP+DET\t5\tcase\n";
    input += "5\tParti\tParti\tPROPN\t1\tnmod\n";
    input += "6\tsocialiste\tsocialiste\tADJ\t5\tfixed\n";
    input += "7\tauquel\tauquel\tADP+PRON\t8\tobl\n";
    input += "8\tappartient\tappartenir\tVERB\t5\tacl:relcl\n";
    input += "9\tM.\tmonsieur\tNOUN\t8\tnsubj\n";
    input += "10\tDupont\tDupont\tPROPN\t9\tflat:name\n";
    StringReader stringReader = new StringReader(input);
    ParserRegexBasedCorpusReader reader = new ParserRegexBasedCorpusReader(stringReader, config.getConfig("talismane.core.test.parser.input"), sessionId);
    ParseConfiguration parseConfiguration = reader.nextConfiguration();
    final StringWriter writer = new StringWriter();
    try (ParseOutputRewriter rewriter = new ParseOutputRewriter(writer, sessionId)) {
        List<CorpusLine> corpusLines = rewriter.getCorpusLines(parseConfiguration);
        int i = 1;
        for (CorpusLine corpusLine : corpusLines) {
            LOG.debug("line " + corpusLine.getIndex() + ": " + corpusLine.getElements());
            if (i == 1) {
                assertEquals(1, corpusLine.getIndex());
                assertEquals("à", corpusLine.getToken());
                assertEquals("à", corpusLine.getLemma());
                assertEquals("ADP", corpusLine.getPosTag());
                assertEquals(0, corpusLine.getGovernorIndex());
                assertEquals("root", corpusLine.getLabel());
            } else if (i == 2) {
                assertEquals(2, corpusLine.getIndex());
                assertEquals("le", corpusLine.getToken());
                assertEquals("le", corpusLine.getLemma());
                assertEquals("DET", corpusLine.getPosTag());
                assertEquals(1, corpusLine.getGovernorIndex());
                assertEquals("fixed", corpusLine.getLabel());
            } else if (i == 3) {
                assertEquals(3, corpusLine.getIndex());
                assertEquals("sein", corpusLine.getToken());
                assertEquals(1, corpusLine.getGovernorIndex());
                assertEquals("fixed", corpusLine.getLabel());
            } else if (i == 4) {
                assertEquals(4, corpusLine.getIndex());
                assertEquals("même", corpusLine.getToken());
                assertEquals(1, corpusLine.getGovernorIndex());
                assertEquals("advmod", corpusLine.getLabel());
            } else if (i == 5) {
                assertEquals(5, corpusLine.getIndex());
                assertEquals("de", corpusLine.getToken());
                assertEquals("de", corpusLine.getLemma());
                assertEquals("ADP", corpusLine.getPosTag());
                assertEquals(7, corpusLine.getGovernorIndex());
                assertEquals("case", corpusLine.getLabel());
            } else if (i == 6) {
                assertEquals(6, corpusLine.getIndex());
                assertEquals("le", corpusLine.getToken());
                assertEquals("le", corpusLine.getLemma());
                assertEquals("DET", corpusLine.getPosTag());
                assertEquals(7, corpusLine.getGovernorIndex());
                assertEquals("det", corpusLine.getLabel());
            } else if (i == 7) {
                assertEquals(7, corpusLine.getIndex());
                assertEquals("Parti", corpusLine.getToken());
                assertEquals(1, corpusLine.getGovernorIndex());
                assertEquals("nmod", corpusLine.getLabel());
            } else if (i == 8) {
                assertEquals(8, corpusLine.getIndex());
                assertEquals("socialiste", corpusLine.getToken());
                assertEquals(7, corpusLine.getGovernorIndex());
                assertEquals("fixed", corpusLine.getLabel());
            } else if (i == 9) {
                assertEquals(9, corpusLine.getIndex());
                assertEquals("à", corpusLine.getToken());
                assertEquals("à", corpusLine.getLemma());
                assertEquals("ADP", corpusLine.getPosTag());
                assertEquals(10, corpusLine.getGovernorIndex());
                assertEquals("case", corpusLine.getLabel());
            } else if (i == 10) {
                assertEquals(10, corpusLine.getIndex());
                assertEquals("lequel", corpusLine.getToken());
                assertEquals("lequel", corpusLine.getLemma());
                assertEquals("PRON", corpusLine.getPosTag());
                assertEquals(11, corpusLine.getGovernorIndex());
                assertEquals("obl", corpusLine.getLabel());
            } else if (i == 11) {
                assertEquals(11, corpusLine.getIndex());
                assertEquals("appartient", corpusLine.getToken());
                assertEquals("VERB", corpusLine.getPosTag());
                assertEquals(7, corpusLine.getGovernorIndex());
                assertEquals("acl:relcl", corpusLine.getLabel());
            } else if (i == 12) {
                assertEquals(12, corpusLine.getIndex());
                assertEquals("M.", corpusLine.getToken());
                assertEquals("NOUN", corpusLine.getPosTag());
                assertEquals(11, corpusLine.getGovernorIndex());
                assertEquals("nsubj", corpusLine.getLabel());
            } else if (i == 13) {
                assertEquals(13, corpusLine.getIndex());
                assertEquals("Dupont", corpusLine.getToken());
                assertEquals("PROPN", corpusLine.getPosTag());
                assertEquals(12, corpusLine.getGovernorIndex());
                assertEquals("flat:name", corpusLine.getLabel());
            }
            i++;
        }
        assertEquals(13, corpusLines.size());
    }
}
Also used : StringWriter(java.io.StringWriter) Config(com.typesafe.config.Config) StringReader(java.io.StringReader) CorpusLine(com.joliciel.talismane.corpus.CorpusLine) ParserRegexBasedCorpusReader(com.joliciel.talismane.parser.ParserRegexBasedCorpusReader) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration) TalismaneTest(com.joliciel.talismane.TalismaneTest) Test(org.junit.Test)

Example 19 with ParseConfiguration

use of com.joliciel.talismane.parser.ParseConfiguration in project talismane by joliciel-informatique.

the class StandoffReader method nextConfiguration.

@Override
public ParseConfiguration nextConfiguration() throws TalismaneException, IOException {
    ParseConfiguration nextConfiguration = null;
    if (this.hasNextSentence()) {
        nextConfiguration = configuration;
        configuration = null;
    }
    return nextConfiguration;
}
Also used : ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration)

Example 20 with ParseConfiguration

use of com.joliciel.talismane.parser.ParseConfiguration in project talismane by joliciel-informatique.

the class ValencyByLabelFeature method check.

@Override
public FeatureResult<Integer> check(ParseConfigurationWrapper wrapper, RuntimeEnvironment env) throws TalismaneException {
    ParseConfiguration configuration = wrapper.getParseConfiguration();
    FeatureResult<PosTaggedTokenWrapper> tokenResult = addressFunction.check(wrapper, env);
    FeatureResult<Integer> featureResult = null;
    if (tokenResult != null) {
        FeatureResult<String> depLabelResult = dependencyLabelFeature.check(wrapper, env);
        if (depLabelResult != null) {
            PosTaggedToken posTaggedToken = tokenResult.getOutcome().getPosTaggedToken();
            String label = depLabelResult.getOutcome();
            int valency = configuration.getDependents(posTaggedToken, label).size();
            featureResult = this.generateResult(valency);
        }
    }
    return featureResult;
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) PosTaggedTokenWrapper(com.joliciel.talismane.posTagger.features.PosTaggedTokenWrapper) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration)

Aggregations

ParseConfiguration (com.joliciel.talismane.parser.ParseConfiguration)31 PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)24 PosTaggedTokenWrapper (com.joliciel.talismane.posTagger.features.PosTaggedTokenWrapper)18 DependencyArc (com.joliciel.talismane.parser.DependencyArc)7 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)7 TalismaneException (com.joliciel.talismane.TalismaneException)4 Sentence (com.joliciel.talismane.rawText.Sentence)4 Transition (com.joliciel.talismane.parser.Transition)3 SentenceAnnotator (com.joliciel.talismane.sentenceAnnotators.SentenceAnnotator)3 TokenSequence (com.joliciel.talismane.tokeniser.TokenSequence)3 NonDeterministicParser (com.joliciel.talismane.parser.NonDeterministicParser)2 ParseConfigurationProcessor (com.joliciel.talismane.parser.output.ParseConfigurationProcessor)2 NonDeterministicPosTagger (com.joliciel.talismane.posTagger.NonDeterministicPosTagger)2 PosTagAnnotatedCorpusReader (com.joliciel.talismane.posTagger.PosTagAnnotatedCorpusReader)2 PosTagSequenceProcessor (com.joliciel.talismane.posTagger.output.PosTagSequenceProcessor)2 SentenceProcessor (com.joliciel.talismane.sentenceDetector.SentenceProcessor)2 Token (com.joliciel.talismane.tokeniser.Token)2 TokeniserAnnotatedCorpusReader (com.joliciel.talismane.tokeniser.TokeniserAnnotatedCorpusReader)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2