Search in sources :

Example 1 with TalismaneSession

use of com.joliciel.talismane.TalismaneSession in project talismane by joliciel-informatique.

the class LexiconDeserializer method main.

public static void main(String[] args) throws IOException, SentenceAnnotatorLoadException, TalismaneException, ReflectiveOperationException {
    OptionParser parser = new OptionParser();
    parser.accepts("testLexicon", "test lexicon");
    parser.acceptsAll(Arrays.asList("?", "help"), "show help").availableUnless("testLexicon").forHelp();
    OptionSpec<String> sessionIdOption = parser.accepts("sessionId", "the current session id - configuration read as talismane.core.[sessionId]").requiredUnless("?", "help").withRequiredArg().ofType(String.class);
    OptionSpec<String> lexiconFilesOption = parser.accepts("lexicon", "lexicon(s), semi-colon delimited").withRequiredArg().ofType(String.class).withValuesSeparatedBy(';');
    OptionSpec<String> wordsOption = parser.accepts("words", "comma-delimited list of words to test").withRequiredArg().required().ofType(String.class).withValuesSeparatedBy(',');
    if (args.length <= 1) {
        parser.printHelpOn(System.out);
        return;
    }
    OptionSet options = parser.parse(args);
    String sessionId = options.valueOf(sessionIdOption);
    Config config = null;
    if (options.has(lexiconFilesOption)) {
        List<String> lexiconFiles = options.valuesOf(lexiconFilesOption);
        Map<String, Object> values = new HashMap<>();
        values.put("talismane.core." + sessionId + ".lexicons", lexiconFiles);
        config = ConfigFactory.parseMap(values).withFallback(ConfigFactory.load());
    } else {
        config = ConfigFactory.load();
    }
    TalismaneSession talismaneSession = TalismaneSession.get(sessionId);
    List<String> words = options.valuesOf(wordsOption);
    PosTaggerLexicon mergedLexicon = talismaneSession.getMergedLexicon();
    for (String word : words) {
        LOG.info("################");
        LOG.info("Word: " + word);
        List<LexicalEntry> entries = mergedLexicon.getEntries(word);
        for (LexicalEntry entry : entries) {
            LOG.info(entry + ", Full morph: " + entry.getMorphology() + ", PosTags: " + mergedLexicon.findPossiblePosTags(word));
        }
    }
}
Also used : HashMap(java.util.HashMap) Config(com.typesafe.config.Config) OptionParser(joptsimple.OptionParser) TalismaneSession(com.joliciel.talismane.TalismaneSession) OptionSet(joptsimple.OptionSet)

Example 2 with TalismaneSession

use of com.joliciel.talismane.TalismaneSession in project talismane by joliciel-informatique.

the class Diacriticizer method main.

public static void main(String[] args) throws IOException, SentenceAnnotatorLoadException, TalismaneException, ReflectiveOperationException {
    OptionParser parser = new OptionParser();
    parser.accepts("serializeDiacriticizer", "serialize diacriticizer from lexicon");
    parser.accepts("testDiacriticizer", "test serialized diacriticizer").availableUnless("serializeDiacriticizer");
    parser.acceptsAll(Arrays.asList("?", "help"), "show help").availableUnless("serializeDiacriticizer", "testDiacriticizer").forHelp();
    OptionSpec<String> sessionIdOption = parser.accepts("sessionId", "the current session id - configuration read as talismane.core.[sessionId]").requiredUnless("?", "help").withRequiredArg().ofType(String.class);
    OptionSpec<String> lexiconFilesOption = parser.accepts("lexicon", "lexicon(s), semi-colon delimited").withRequiredArg().ofType(String.class).withValuesSeparatedBy(';');
    OptionSpec<File> diacriticizerOption = parser.accepts("diacriticizer", "diacriticizer file location (in or out)").withRequiredArg().required().ofType(File.class);
    OptionSpec<String> wordsOption = parser.accepts("words", "comma-delimited list of words to test").requiredIf("testDiacriticizer").withRequiredArg().ofType(String.class).withValuesSeparatedBy(',');
    if (args.length <= 1) {
        parser.printHelpOn(System.out);
        return;
    }
    OptionSet options = parser.parse(args);
    String sessionId = options.valueOf(sessionIdOption);
    Config config = null;
    if (options.has(lexiconFilesOption)) {
        List<String> lexiconFiles = options.valuesOf(lexiconFilesOption);
        Map<String, Object> values = new HashMap<>();
        values.put("talismane.core." + sessionId + ".lexicons", lexiconFiles);
        config = ConfigFactory.parseMap(values).withFallback(ConfigFactory.load());
    } else {
        config = ConfigFactory.load();
    }
    TalismaneSession talismaneSession = TalismaneSession.get(sessionId);
    File diacriticizerFile = options.valueOf(diacriticizerOption);
    if (options.has("serializeDiacriticizer")) {
        Diacriticizer diacriticizer = new Diacriticizer(talismaneSession.getMergedLexicon());
        File outDir = diacriticizerFile.getParentFile();
        if (outDir != null)
            outDir.mkdirs();
        FileOutputStream fos = new FileOutputStream(diacriticizerFile);
        ZipOutputStream zos = new ZipOutputStream(fos);
        zos.putNextEntry(new ZipEntry("diacriticizer.obj"));
        ObjectOutputStream out = new ObjectOutputStream(zos);
        try {
            out.writeObject(diacriticizer);
        } finally {
            out.flush();
        }
        zos.flush();
        zos.close();
    } else if (options.has("testDiacriticizer")) {
        List<String> words = options.valuesOf(wordsOption);
        Diacriticizer diacriticizer = Diacriticizer.deserialize(diacriticizerFile);
        for (String word : words) {
            LOG.info("################");
            LOG.info("Word: " + word);
            Set<String> entries = diacriticizer.diacriticize(word);
            for (String entry : entries) {
                LOG.info(entry);
            }
        }
    } else {
        System.out.println("No command provided.");
        parser.printHelpOn(System.out);
    }
}
Also used : TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) OptionSet(joptsimple.OptionSet) Set(java.util.Set) HashMap(java.util.HashMap) Config(com.typesafe.config.Config) ZipEntry(java.util.zip.ZipEntry) ObjectOutputStream(java.io.ObjectOutputStream) OptionParser(joptsimple.OptionParser) TalismaneSession(com.joliciel.talismane.TalismaneSession) ZipOutputStream(java.util.zip.ZipOutputStream) FileOutputStream(java.io.FileOutputStream) List(java.util.List) OptionSet(joptsimple.OptionSet) File(java.io.File)

Aggregations

TalismaneSession (com.joliciel.talismane.TalismaneSession)2 Config (com.typesafe.config.Config)2 HashMap (java.util.HashMap)2 OptionParser (joptsimple.OptionParser)2 OptionSet (joptsimple.OptionSet)2 File (java.io.File)1 FileOutputStream (java.io.FileOutputStream)1 ObjectOutputStream (java.io.ObjectOutputStream)1 HashSet (java.util.HashSet)1 LinkedHashSet (java.util.LinkedHashSet)1 List (java.util.List)1 Set (java.util.Set)1 TreeSet (java.util.TreeSet)1 ZipEntry (java.util.zip.ZipEntry)1 ZipOutputStream (java.util.zip.ZipOutputStream)1