Search in sources :

Example 16 with PosTagSequence

use of com.joliciel.talismane.posTagger.PosTagSequence in project talismane by joliciel-informatique.

the class LexicalAttributeFeatureTest method testCheckInternalMultipleEntries.

@Test
public void testCheckInternalMultipleEntries() throws Exception {
    System.setProperty("config.file", "src/test/resources/testWithLex.conf");
    ConfigFactory.invalidateCaches();
    final Config config = ConfigFactory.load();
    final String sessionId = "test";
    Sentence sentence = new Sentence("je demande", sessionId);
    TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
    Token token = new Token("demande", tokenSequence, 1, "je ".length(), "je demande".length(), sessionId);
    Decision decision = new Decision("V", 1.0);
    final PosTaggedToken posTaggedToken = new PosTaggedToken(token, decision, sessionId);
    PosTaggedTokenAddressFunction<PosTaggerContext> addressFunction = new AbstractPosTaggedTokenAddressFunction() {

        @Override
        protected FeatureResult<PosTaggedTokenWrapper> checkInternal(PosTaggerContext context, RuntimeEnvironment env) {
            return this.generateResult(posTaggedToken);
        }
    };
    StringLiteralFeature<PosTaggedTokenWrapper> person = new StringLiteralFeature<>(LexicalAttribute.Person.name());
    LexicalAttributeFeature<PosTaggerContext> feature = new LexicalAttributeFeature<>(addressFunction, person);
    PosTagSequence history = new PosTagSequence(tokenSequence);
    PosTaggerContext context = new PosTaggerContextImpl(token, history);
    RuntimeEnvironment env = new RuntimeEnvironment();
    FeatureResult<List<WeightedOutcome<String>>> featureResult = feature.checkInternal(context, env);
    List<WeightedOutcome<String>> outcomes = featureResult.getOutcome();
    System.out.println(outcomes);
    for (WeightedOutcome<String> outcome : outcomes) {
        assertTrue("1".equals(outcome.getOutcome()) || "3".equals(outcome.getOutcome()));
    }
    assertEquals(2, outcomes.size());
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) Config(com.typesafe.config.Config) StringLiteralFeature(com.joliciel.talismane.machineLearning.features.StringLiteralFeature) WeightedOutcome(com.joliciel.talismane.utils.WeightedOutcome) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) Token(com.joliciel.talismane.tokeniser.Token) PosTaggerContext(com.joliciel.talismane.posTagger.PosTaggerContext) Decision(com.joliciel.talismane.machineLearning.Decision) PosTaggerContextImpl(com.joliciel.talismane.posTagger.PosTaggerContextImpl) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) List(java.util.List) Sentence(com.joliciel.talismane.rawText.Sentence) TokenSequence(com.joliciel.talismane.tokeniser.TokenSequence) TalismaneTest(com.joliciel.talismane.TalismaneTest) Test(org.junit.Test)

Example 17 with PosTagSequence

use of com.joliciel.talismane.posTagger.PosTagSequence in project talismane by joliciel-informatique.

the class TalismaneAPIExamples method example1.

/**
 * An example tokenising, pos-tagging and parsing a pre-existing sentence.
 */
public static void example1(String sessionId) throws Exception {
    String text = "Les amoureux qui se bécotent sur les bancs publics ont des petites gueules bien sympathiques.";
    // tokenise the text
    Tokeniser tokeniser = Tokeniser.getInstance(sessionId);
    TokenSequence tokenSequence = tokeniser.tokeniseText(text);
    // pos-tag the token sequence
    PosTagger posTagger = PosTaggers.getPosTagger(sessionId);
    PosTagSequence posTagSequence = posTagger.tagSentence(tokenSequence);
    System.out.println(posTagSequence);
    // parse the pos-tag sequence
    Parser parser = Parsers.getParser(sessionId);
    ParseConfiguration parseConfiguration = parser.parseSentence(posTagSequence);
    ParseTree parseTree = new ParseTree(parseConfiguration, true);
    System.out.println(parseTree);
}
Also used : PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) Tokeniser(com.joliciel.talismane.tokeniser.Tokeniser) TokenSequence(com.joliciel.talismane.tokeniser.TokenSequence) PosTagger(com.joliciel.talismane.posTagger.PosTagger) OptionParser(joptsimple.OptionParser)

Example 18 with PosTagSequence

use of com.joliciel.talismane.posTagger.PosTagSequence in project talismane by joliciel-informatique.

the class PosTagEvaluationSentenceWriter method onNextPosTagSequence.

@Override
public void onNextPosTagSequence(PosTagSequence realSequence, List<PosTagSequence> guessedSequences) throws IOException {
    for (int i = 0; i < realSequence.size(); i++) {
        String token = realSequence.get(i).getToken().getAnalyisText();
        writer.write(CSV.format(token));
    }
    writer.write("\n");
    for (int i = 0; i < realSequence.size(); i++) writer.write(CSV.format(realSequence.get(i).getTag().getCode()));
    writer.write("\n");
    for (int k = 0; k < guessCount; k++) {
        PosTagSequence posTagSequence = null;
        if (k < guessedSequences.size()) {
            posTagSequence = guessedSequences.get(k);
        } else {
            writer.write("\n");
            writer.write("\n");
            continue;
        }
        int j = 0;
        String probs = "";
        for (int i = 0; i < realSequence.size(); i++) {
            TaggedToken<PosTag> realToken = realSequence.get(i);
            TaggedToken<PosTag> testToken = posTagSequence.get(j);
            boolean tokenError = false;
            if (realToken.getToken().getStartIndex() == testToken.getToken().getStartIndex() && realToken.getToken().getEndIndex() == testToken.getToken().getEndIndex()) {
                // no token error
                j++;
                if (j == posTagSequence.size()) {
                    j--;
                }
            } else {
                tokenError = true;
                while (realToken.getToken().getEndIndex() >= testToken.getToken().getEndIndex()) {
                    j++;
                    if (j == posTagSequence.size()) {
                        j--;
                        break;
                    }
                    testToken = posTagSequence.get(j);
                }
            }
            if (tokenError) {
                writer.write(CSV.format("BAD_TOKEN"));
            } else {
                writer.write(CSV.format(testToken.getTag().getCode()));
            }
            probs += CSV.format(testToken.getDecision().getProbability());
        }
        writer.write("\n");
        writer.write(probs + "\n");
    }
    writer.flush();
}
Also used : PosTag(com.joliciel.talismane.posTagger.PosTag) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence)

Example 19 with PosTagSequence

use of com.joliciel.talismane.posTagger.PosTagSequence in project talismane by joliciel-informatique.

the class ParseEvaluationSentenceWriter method onParseEnd.

@Override
public void onParseEnd(ParseConfiguration realConfiguration, List<ParseConfiguration> guessedConfigurations) throws IOException {
    TreeSet<Integer> startIndexes = new TreeSet<Integer>();
    for (PosTaggedToken posTaggedToken : realConfiguration.getPosTagSequence()) {
        if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
            Token token = posTaggedToken.getToken();
            startIndexes.add(token.getStartIndex());
        }
    }
    if (hasTokeniser || hasPosTagger) {
        int i = 0;
        for (ParseConfiguration guessedConfiguration : guessedConfigurations) {
            for (PosTaggedToken posTaggedToken : guessedConfiguration.getPosTagSequence()) {
                if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
                    Token token = posTaggedToken.getToken();
                    startIndexes.add(token.getStartIndex());
                }
            }
            i++;
            if (i == guessCount)
                break;
        }
    }
    Map<Integer, Integer> startIndexMap = new HashMap<Integer, Integer>();
    int j = 0;
    for (int startIndex : startIndexes) {
        startIndexMap.put(startIndex, j++);
    }
    PosTagSequence posTagSequence = realConfiguration.getPosTagSequence();
    PosTaggedToken[] realTokens = new PosTaggedToken[startIndexes.size()];
    for (PosTaggedToken posTaggedToken : posTagSequence) {
        if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
            realTokens[startIndexMap.get(posTaggedToken.getToken().getStartIndex())] = posTaggedToken;
        }
    }
    for (PosTaggedToken posTaggedToken : realTokens) {
        if (posTaggedToken != null) {
            csvFileWriter.write(CSV.format(posTaggedToken.getToken().getOriginalText()));
        } else {
            csvFileWriter.write(CSV.getCsvSeparator());
        }
    }
    csvFileWriter.write("\n");
    for (PosTaggedToken posTaggedToken : realTokens) {
        if (posTaggedToken != null) {
            csvFileWriter.write(CSV.format(posTaggedToken.getTag().getCode()));
        } else {
            csvFileWriter.write(CSV.getCsvSeparator());
        }
    }
    csvFileWriter.write("\n");
    for (PosTaggedToken posTaggedToken : realTokens) {
        if (posTaggedToken != null) {
            DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken);
            String realLabel = realArc.getLabel() == null ? "null" : realArc.getLabel();
            csvFileWriter.write(CSV.format(realLabel));
        } else {
            csvFileWriter.write(CSV.getCsvSeparator());
        }
    }
    csvFileWriter.write("\n");
    for (PosTaggedToken posTaggedToken : realTokens) {
        if (posTaggedToken != null) {
            DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken);
            int startIndex = -1;
            if (realArc != null) {
                PosTaggedToken head = realArc.getHead();
                if (!head.getTag().equals(PosTag.ROOT_POS_TAG)) {
                    startIndex = head.getToken().getStartIndex();
                }
            }
            if (startIndex < 0)
                csvFileWriter.write(CSV.format("ROOT"));
            else
                csvFileWriter.write(CSV.getColumnLabel(startIndexMap.get(startIndex)) + CSV.getCsvSeparator());
        } else {
            csvFileWriter.write(CSV.getCsvSeparator());
        }
    }
    csvFileWriter.write("\n");
    for (int i = 0; i < guessCount; i++) {
        if (i < guessedConfigurations.size()) {
            ParseConfiguration guessedConfiguration = guessedConfigurations.get(i);
            PosTaggedToken[] guessedTokens = new PosTaggedToken[startIndexes.size()];
            for (PosTaggedToken posTaggedToken : guessedConfiguration.getPosTagSequence()) {
                if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
                    guessedTokens[startIndexMap.get(posTaggedToken.getToken().getStartIndex())] = posTaggedToken;
                }
            }
            if (hasTokeniser) {
                for (PosTaggedToken posTaggedToken : guessedTokens) {
                    if (posTaggedToken != null) {
                        csvFileWriter.write(CSV.format(posTaggedToken.getToken().getOriginalText()));
                    } else {
                        csvFileWriter.write(CSV.getCsvSeparator());
                    }
                }
                csvFileWriter.write("\n");
            }
            if (hasPosTagger) {
                for (PosTaggedToken posTaggedToken : guessedTokens) {
                    if (posTaggedToken != null) {
                        csvFileWriter.write(CSV.format(posTaggedToken.getTag().getCode()));
                    } else {
                        csvFileWriter.write(CSV.getCsvSeparator());
                    }
                }
                csvFileWriter.write("\n");
            }
            for (PosTaggedToken posTaggedToken : guessedTokens) {
                if (posTaggedToken != null) {
                    DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
                    String guessedLabel = "";
                    if (guessedArc != null) {
                        guessedLabel = guessedArc.getLabel() == null ? "null" : guessedArc.getLabel();
                    }
                    csvFileWriter.write(CSV.format(guessedLabel));
                } else {
                    csvFileWriter.write(CSV.getCsvSeparator());
                }
            }
            csvFileWriter.write("\n");
            for (PosTaggedToken posTaggedToken : guessedTokens) {
                if (posTaggedToken != null) {
                    DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
                    int startIndex = -1;
                    if (guessedArc != null) {
                        PosTaggedToken head = guessedArc.getHead();
                        if (!head.getTag().equals(PosTag.ROOT_POS_TAG)) {
                            startIndex = head.getToken().getStartIndex();
                        }
                    }
                    if (startIndex < 0)
                        csvFileWriter.write(CSV.format("ROOT"));
                    else
                        csvFileWriter.write(CSV.getColumnLabel(startIndexMap.get(startIndex)) + CSV.getCsvSeparator());
                } else {
                    csvFileWriter.write(CSV.getCsvSeparator());
                }
            }
            csvFileWriter.write("\n");
            for (PosTaggedToken posTaggedToken : guessedTokens) {
                if (posTaggedToken != null) {
                    DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
                    double prob = 1.0;
                    if (guessedArc != null) {
                        Transition transition = guessedConfiguration.getTransition(guessedArc);
                        if (transition != null)
                            prob = transition.getDecision().getProbability();
                    }
                    csvFileWriter.write(CSV.format(prob));
                } else {
                    csvFileWriter.write(CSV.getCsvSeparator());
                }
            }
            csvFileWriter.write("\n");
        } else {
            csvFileWriter.write("\n");
            csvFileWriter.write("\n");
        }
    // have more configurations
    }
    // next guessed configuration
    csvFileWriter.flush();
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) HashMap(java.util.HashMap) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) Token(com.joliciel.talismane.tokeniser.Token) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration) TreeSet(java.util.TreeSet) Transition(com.joliciel.talismane.parser.Transition) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) DependencyArc(com.joliciel.talismane.parser.DependencyArc)

Example 20 with PosTagSequence

use of com.joliciel.talismane.posTagger.PosTagSequence in project talismane by joliciel-informatique.

the class TalismaneMain method execute.

/**
 * Execute Talismane based on the configuration provided.
 *
 * @param sessionId
 *          The current session's id
 * @param inFile
 *          The file or directory to analyse
 * @param outFile
 *          The file or directory to write the analysis.
 * @param outDir
 *          The directory for writing additional output files (other than the
 *          main analysis).
 * @param keepDirectoryStructure
 *          For analyse and process: if true, and inFile is a directory,
 *          outFile will be interpreted as a directory and the inFile
 *          directory struture will be maintained
 * @param evalFile
 * @throws IOException
 * @throws ReflectiveOperationException
 * @throws TalismaneException
 *           if attempt is made to start and end on two unsupported modules.
 * @throws SentenceAnnotatorLoadException
 */
public void execute(String sessionId, File inFile, File outFile, File outDir, File evalFile, boolean keepDirectoryStructure) throws IOException, ReflectiveOperationException, TalismaneException, SentenceAnnotatorLoadException {
    long startTime = System.currentTimeMillis();
    TalismaneSession session = TalismaneSession.get(sessionId);
    session.setFileForBasename(inFile);
    Config config = ConfigFactory.load();
    try {
        switch(session.getCommand()) {
            case analyse:
                {
                    Module startModule = Module.valueOf(config.getString("talismane.core." + sessionId + ".analysis.start-module"));
                    Module endModule = Module.valueOf(config.getString("talismane.core." + sessionId + ".analysis.end-module"));
                    Reader reader = getReader(inFile, true, sessionId);
                    Writer writer = getWriter(outFile, inFile, keepDirectoryStructure, reader, sessionId);
                    if (startModule == Module.languageDetector) {
                        if (endModule != Module.languageDetector)
                            throw new TalismaneException("Talismane does not currently support analysis starting with " + startModule.name() + " and ending with another module.");
                        LanguageDetector languageDetector = LanguageDetector.getInstance(sessionId);
                        LanguageDetectorProcessor processor = LanguageDetectorProcessor.getProcessor(writer, sessionId);
                        SentenceDetectorAnnotatedCorpusReader corpusReader = SentenceDetectorAnnotatedCorpusReader.getCorpusReader(reader, config.getConfig("talismane.core." + sessionId + ".language-detector.input"), sessionId);
                        while (corpusReader.hasNextSentence()) {
                            String sentence = corpusReader.nextSentence().getText().toString();
                            List<WeightedOutcome<Locale>> results = languageDetector.detectLanguages(sentence);
                            processor.onNextText(sentence, results);
                        }
                    } else {
                        Mode mode = Mode.valueOf(config.getString("talismane.core." + sessionId + ".mode"));
                        switch(mode) {
                            case normal:
                                Talismane talismane = new Talismane(writer, outDir, sessionId);
                                talismane.analyse(reader);
                                break;
                            case server:
                                TalismaneServer talismaneServer = new TalismaneServer(sessionId);
                                talismaneServer.analyse();
                                break;
                        }
                    }
                    break;
                }
            case train:
                {
                    Reader reader = getReader(inFile, false, sessionId);
                    switch(session.getModule()) {
                        case languageDetector:
                            {
                                LanguageDetectorTrainer trainer = new LanguageDetectorTrainer(sessionId);
                                trainer.train();
                                break;
                            }
                        case sentenceDetector:
                            {
                                SentenceDetectorTrainer trainer = new SentenceDetectorTrainer(reader, sessionId);
                                trainer.train();
                                break;
                            }
                        case tokeniser:
                            {
                                PatternTokeniserTrainer trainer = new PatternTokeniserTrainer(reader, sessionId);
                                trainer.train();
                                break;
                            }
                        case posTagger:
                            {
                                PosTaggerTrainer trainer = new PosTaggerTrainer(reader, sessionId);
                                trainer.train();
                                break;
                            }
                        case parser:
                            {
                                ParserTrainer trainer = new ParserTrainer(reader, sessionId);
                                trainer.train();
                                break;
                            }
                    }
                    break;
                }
            case evaluate:
                {
                    Reader reader = getReader(inFile, false, sessionId);
                    switch(session.getModule()) {
                        case sentenceDetector:
                            {
                                SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator(reader, outDir, sessionId);
                                evaluator.evaluate();
                                break;
                            }
                        case tokeniser:
                            {
                                TokeniserEvaluator evaluator = new TokeniserEvaluator(reader, outDir, sessionId);
                                evaluator.evaluate();
                                break;
                            }
                        case posTagger:
                            {
                                PosTaggerEvaluator evaluator = new PosTaggerEvaluator(reader, outDir, sessionId);
                                evaluator.evaluate();
                                break;
                            }
                        case parser:
                            {
                                ParserEvaluator evaluator = new ParserEvaluator(reader, outDir, sessionId);
                                evaluator.evaluate();
                                break;
                            }
                        default:
                            throw new TalismaneException("Command '" + session.getCommand() + "' does not yet support module: " + session.getModule());
                    }
                    break;
                }
            case compare:
                {
                    Reader reader = getReader(inFile, false, sessionId);
                    Reader evalReader = getReader(evalFile, false, sessionId);
                    switch(session.getModule()) {
                        case tokeniser:
                            {
                                TokenComparator comparator = new TokenComparator(reader, evalReader, outDir, sessionId);
                                comparator.compare();
                                break;
                            }
                        case posTagger:
                            {
                                PosTagComparator comparator = new PosTagComparator(reader, evalReader, outDir, sessionId);
                                comparator.evaluate();
                                break;
                            }
                        case parser:
                            {
                                ParseComparator comparator = new ParseComparator(reader, evalReader, outDir, sessionId);
                                comparator.evaluate();
                                break;
                            }
                        default:
                            throw new TalismaneException("Command '" + session.getCommand() + "' does not yet support module: " + session.getModule());
                    }
                    break;
                }
            case process:
                {
                    Reader reader = getReader(inFile, false, sessionId);
                    Writer writer = getWriter(outFile, inFile, keepDirectoryStructure, reader, sessionId);
                    File currentFile = null;
                    URI currentURI = null;
                    IOException ioException = null;
                    switch(session.getModule()) {
                        case sentenceDetector:
                            {
                                List<SentenceProcessor> processors = SentenceProcessor.getProcessors(writer, outDir, sessionId);
                                try {
                                    SentenceDetectorAnnotatedCorpusReader corpusReader = SentenceDetectorAnnotatedCorpusReader.getCorpusReader(reader, config.getConfig("talismane.core." + sessionId + ".sentence-detector.input"), sessionId);
                                    while (corpusReader.hasNextSentence()) {
                                        Sentence sentence = corpusReader.nextSentence();
                                        if (sentence.getFileURI() != null && !sentence.getFileURI().equals(currentURI)) {
                                            currentURI = sentence.getFileURI();
                                            currentFile = sentence.getFile();
                                            if (writer instanceof CurrentFileObserver)
                                                ((CurrentFileObserver) writer).onNextFile(currentFile);
                                            for (SentenceProcessor processor : processors) if (processor instanceof CurrentFileObserver)
                                                ((CurrentFileObserver) processor).onNextFile(currentFile);
                                        }
                                        for (SentenceProcessor processor : processors) processor.onNextSentence(sentence);
                                    }
                                } finally {
                                    for (SentenceProcessor processor : processors) {
                                        try {
                                            processor.close();
                                        } catch (IOException e) {
                                            LogUtils.logError(LOG, e);
                                            ioException = e;
                                        }
                                    }
                                }
                                break;
                            }
                        case tokeniser:
                            {
                                List<TokenSequenceProcessor> processors = TokenSequenceProcessor.getProcessors(writer, outDir, sessionId);
                                try {
                                    TokeniserAnnotatedCorpusReader corpusReader = TokeniserAnnotatedCorpusReader.getCorpusReader(reader, config.getConfig("talismane.core." + sessionId + ".tokeniser.input"), sessionId);
                                    while (corpusReader.hasNextSentence()) {
                                        TokenSequence tokenSequence = corpusReader.nextTokenSequence();
                                        Sentence sentence = tokenSequence.getSentence();
                                        if (sentence.getFileURI() != null && !sentence.getFileURI().equals(currentFile)) {
                                            currentFile = sentence.getFile();
                                            if (writer instanceof CurrentFileObserver)
                                                ((CurrentFileObserver) writer).onNextFile(currentFile);
                                            for (TokenSequenceProcessor processor : processors) if (processor instanceof CurrentFileObserver)
                                                ((CurrentFileObserver) processor).onNextFile(currentFile);
                                        }
                                        for (TokenSequenceProcessor processor : processors) processor.onNextTokenSequence(tokenSequence);
                                    }
                                } finally {
                                    for (TokenSequenceProcessor processor : processors) {
                                        try {
                                            processor.close();
                                        } catch (IOException e) {
                                            LogUtils.logError(LOG, e);
                                            ioException = e;
                                        }
                                    }
                                }
                                break;
                            }
                        case posTagger:
                            {
                                List<PosTagSequenceProcessor> processors = PosTagSequenceProcessor.getProcessors(writer, outDir, sessionId);
                                try {
                                    PosTagAnnotatedCorpusReader corpusReader = PosTagAnnotatedCorpusReader.getCorpusReader(reader, config.getConfig("talismane.core." + sessionId + ".pos-tagger.input"), sessionId);
                                    while (corpusReader.hasNextSentence()) {
                                        PosTagSequence posTagSequence = corpusReader.nextPosTagSequence();
                                        Sentence sentence = posTagSequence.getTokenSequence().getSentence();
                                        if (sentence.getFile() != null && !sentence.getFile().equals(currentFile)) {
                                            currentFile = sentence.getFile();
                                            if (writer instanceof CurrentFileObserver)
                                                ((CurrentFileObserver) writer).onNextFile(currentFile);
                                            for (PosTagSequenceProcessor processor : processors) if (processor instanceof CurrentFileObserver)
                                                ((CurrentFileObserver) processor).onNextFile(currentFile);
                                        }
                                        for (PosTagSequenceProcessor processor : processors) processor.onNextPosTagSequence(posTagSequence);
                                    }
                                } finally {
                                    for (PosTagSequenceProcessor processor : processors) {
                                        try {
                                            processor.onCompleteAnalysis();
                                            processor.close();
                                        } catch (IOException e) {
                                            LogUtils.logError(LOG, e);
                                            ioException = e;
                                        }
                                    }
                                }
                                break;
                            }
                        case parser:
                            {
                                List<ParseConfigurationProcessor> processors = ParseConfigurationProcessor.getProcessors(writer, outDir, sessionId);
                                try {
                                    ParserAnnotatedCorpusReader corpusReader = ParserAnnotatedCorpusReader.getCorpusReader(reader, config.getConfig("talismane.core." + sessionId + ".parser.input"), sessionId);
                                    while (corpusReader.hasNextSentence()) {
                                        ParseConfiguration configuration = corpusReader.nextConfiguration();
                                        Sentence sentence = configuration.getSentence();
                                        if (sentence.getFile() != null && !sentence.getFile().equals(currentFile)) {
                                            currentFile = sentence.getFile();
                                            if (writer instanceof CurrentFileObserver)
                                                ((CurrentFileObserver) writer).onNextFile(currentFile);
                                            for (ParseConfigurationProcessor processor : processors) if (processor instanceof CurrentFileObserver)
                                                ((CurrentFileObserver) processor).onNextFile(currentFile);
                                        }
                                        for (ParseConfigurationProcessor processor : processors) processor.onNextParseConfiguration(configuration);
                                    }
                                } finally {
                                    for (ParseConfigurationProcessor processor : processors) {
                                        try {
                                            processor.onCompleteParse();
                                            processor.close();
                                        } catch (IOException e) {
                                            LogUtils.logError(LOG, e);
                                            ioException = e;
                                        }
                                    }
                                }
                                break;
                            }
                        default:
                            throw new TalismaneException("Command '" + session.getCommand() + "' does not yet support module: " + session.getModule());
                    }
                    if (ioException != null)
                        throw ioException;
                    break;
                }
        }
    } finally {
        long endTime = System.currentTimeMillis();
        long totalTime = endTime - startTime;
        LOG.debug("Total time for Talismane.process(): " + totalTime);
        if (config.getBoolean("talismane.core." + sessionId + ".output.log-execution-time")) {
            try {
                CSVFormatter CSV = new CSVFormatter();
                Writer csvFileWriter = null;
                File csvFile = new File(outDir, session.getBaseName() + ".stats.csv");
                csvFile.delete();
                csvFile.createNewFile();
                csvFileWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(csvFile, false), "UTF8"));
                csvFileWriter.write(CSV.format("total time") + CSV.format(totalTime) + "\n");
                csvFileWriter.flush();
                csvFileWriter.close();
            } catch (Exception e) {
                LogUtils.logError(LOG, e);
            }
        }
    }
}
Also used : Locale(java.util.Locale) SentenceDetectorAnnotatedCorpusReader(com.joliciel.talismane.sentenceDetector.SentenceDetectorAnnotatedCorpusReader) SentenceDetectorTrainer(com.joliciel.talismane.sentenceDetector.SentenceDetectorTrainer) ParserTrainer(com.joliciel.talismane.parser.ParserTrainer) PosTagComparator(com.joliciel.talismane.posTagger.evaluate.PosTagComparator) List(java.util.List) ParserAnnotatedCorpusReader(com.joliciel.talismane.parser.ParserAnnotatedCorpusReader) PosTaggerTrainer(com.joliciel.talismane.posTagger.PosTaggerTrainer) Sentence(com.joliciel.talismane.rawText.Sentence) ParserEvaluator(com.joliciel.talismane.parser.evaluate.ParserEvaluator) PosTagSequenceProcessor(com.joliciel.talismane.posTagger.output.PosTagSequenceProcessor) TokeniserAnnotatedCorpusReader(com.joliciel.talismane.tokeniser.TokeniserAnnotatedCorpusReader) PatternTokeniserTrainer(com.joliciel.talismane.tokeniser.patterns.PatternTokeniserTrainer) FileOutputStream(java.io.FileOutputStream) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) CSVFormatter(com.joliciel.talismane.utils.CSVFormatter) Module(com.joliciel.talismane.Talismane.Module) LanguageDetectorTrainer(com.joliciel.talismane.languageDetector.LanguageDetectorTrainer) ParseComparator(com.joliciel.talismane.parser.evaluate.ParseComparator) File(java.io.File) SentenceDetectorEvaluator(com.joliciel.talismane.sentenceDetector.SentenceDetectorEvaluator) TokenSequenceProcessor(com.joliciel.talismane.tokeniser.output.TokenSequenceProcessor) Config(com.typesafe.config.Config) SentenceProcessor(com.joliciel.talismane.sentenceDetector.SentenceProcessor) LexiconReader(com.joliciel.talismane.lexicon.LexiconReader) PosTagAnnotatedCorpusReader(com.joliciel.talismane.posTagger.PosTagAnnotatedCorpusReader) SingleFileReader(com.joliciel.talismane.utils.io.SingleFileReader) ParserAnnotatedCorpusReader(com.joliciel.talismane.parser.ParserAnnotatedCorpusReader) Reader(java.io.Reader) SentenceDetectorAnnotatedCorpusReader(com.joliciel.talismane.sentenceDetector.SentenceDetectorAnnotatedCorpusReader) TokeniserAnnotatedCorpusReader(com.joliciel.talismane.tokeniser.TokeniserAnnotatedCorpusReader) DirectoryReader(com.joliciel.talismane.utils.io.DirectoryReader) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) URI(java.net.URI) PosTaggerEvaluator(com.joliciel.talismane.posTagger.evaluate.PosTaggerEvaluator) TokenComparator(com.joliciel.talismane.tokeniser.evaluate.TokenComparator) BufferedWriter(java.io.BufferedWriter) ParseConfigurationProcessor(com.joliciel.talismane.parser.output.ParseConfigurationProcessor) PosTagAnnotatedCorpusReader(com.joliciel.talismane.posTagger.PosTagAnnotatedCorpusReader) LanguageDetectorProcessor(com.joliciel.talismane.languageDetector.LanguageDetectorProcessor) Mode(com.joliciel.talismane.Talismane.Mode) TokeniserEvaluator(com.joliciel.talismane.tokeniser.evaluate.TokeniserEvaluator) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) SentenceAnnotatorLoadException(com.joliciel.talismane.sentenceAnnotators.SentenceAnnotatorLoadException) IOException(java.io.IOException) JoranException(ch.qos.logback.core.joran.spi.JoranException) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration) LanguageDetector(com.joliciel.talismane.languageDetector.LanguageDetector) OutputStreamWriter(java.io.OutputStreamWriter) CurrentFileObserver(com.joliciel.talismane.utils.io.CurrentFileObserver) TokenSequence(com.joliciel.talismane.tokeniser.TokenSequence) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter) DirectoryWriter(com.joliciel.talismane.utils.io.DirectoryWriter)

Aggregations

PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)23 PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)14 TokenSequence (com.joliciel.talismane.tokeniser.TokenSequence)13 Sentence (com.joliciel.talismane.rawText.Sentence)12 Decision (com.joliciel.talismane.machineLearning.Decision)8 RuntimeEnvironment (com.joliciel.talismane.machineLearning.features.RuntimeEnvironment)7 ParseConfiguration (com.joliciel.talismane.parser.ParseConfiguration)7 Token (com.joliciel.talismane.tokeniser.Token)7 TalismaneTest (com.joliciel.talismane.TalismaneTest)6 PosTaggerContext (com.joliciel.talismane.posTagger.PosTaggerContext)6 PosTaggerContextImpl (com.joliciel.talismane.posTagger.PosTaggerContextImpl)6 Config (com.typesafe.config.Config)6 StringLiteralFeature (com.joliciel.talismane.machineLearning.features.StringLiteralFeature)5 List (java.util.List)5 Test (org.junit.Test)5 TalismaneException (com.joliciel.talismane.TalismaneException)4 DependencyArc (com.joliciel.talismane.parser.DependencyArc)4 SentenceAnnotator (com.joliciel.talismane.sentenceAnnotators.SentenceAnnotator)4 NonDeterministicPosTagger (com.joliciel.talismane.posTagger.NonDeterministicPosTagger)3 PosTag (com.joliciel.talismane.posTagger.PosTag)3