Search in sources :

Example 11 with LanguageResult

use of org.apache.tika.language.detect.LanguageResult in project tika by apache.

the class CachedTranslator method translate.

@Override
public String translate(String text, String targetLanguage) throws TikaException, IOException {
    LanguageResult language = detectLanguage(text);
    String sourceLanguage = language.getLanguage();
    return translate(text, sourceLanguage, targetLanguage);
}
Also used : LanguageResult(org.apache.tika.language.detect.LanguageResult)

Example 12 with LanguageResult

use of org.apache.tika.language.detect.LanguageResult in project tika by apache.

the class CachedTranslator method contains.

/**
     * Check whether this CachedTranslator's cache contains a translation of the text to the target language,
     * attempting to auto-detect the source language.
     *
     * @param text What string to check for.
     * @param targetLanguage The target language of translation.
     * @return true if the cache contains a translation of the text, false otherwise.
     */
public boolean contains(String text, String targetLanguage) {
    try {
        LanguageResult language = detectLanguage(text);
        String sourceLanguage = language.getLanguage();
        return contains(text, sourceLanguage, targetLanguage);
    } catch (IOException e) {
        // TODO what to do if we get an error?
        return false;
    }
}
Also used : LanguageResult(org.apache.tika.language.detect.LanguageResult) IOException(java.io.IOException)

Example 13 with LanguageResult

use of org.apache.tika.language.detect.LanguageResult in project tika by apache.

the class TextLangDetectorTest method test.

@Test
public void test() throws Exception {
    assumeTrue(TextLangDetector.canRun());
    LanguageDetector detector = new TextLangDetector();
    LanguageWriter writer = new LanguageWriter(detector);
    List<String> lines = IOUtils.readLines(TextLangDetectorTest.class.getResourceAsStream("text-test.tsv"));
    for (String line : lines) {
        String[] data = line.split("\t");
        if (data.length != 2)
            continue;
        writer.reset();
        writer.append(data[1]);
        LanguageResult result = detector.detect();
        assertNotNull(result);
        assertEquals(data[0], result.getLanguage());
    }
    writer.close();
}
Also used : LanguageDetector(org.apache.tika.language.detect.LanguageDetector) LanguageResult(org.apache.tika.language.detect.LanguageResult) LanguageWriter(org.apache.tika.language.detect.LanguageWriter) Test(org.junit.Test)

Example 14 with LanguageResult

use of org.apache.tika.language.detect.LanguageResult in project tika by apache.

the class Language method languageDetectionWithWriter.

public static void languageDetectionWithWriter() throws IOException {
    // TODO support version of LanguageWriter that doesn't need a detector.
    LanguageDetector detector = new OptimaizeLangDetector().loadModels();
    LanguageWriter writer = new LanguageWriter(detector);
    writer.append("Minden emberi lény");
    writer.append(" szabadon születik és");
    writer.append(" egyenlő méltósága és");
    writer.append(" joga van.");
    LanguageResult result = writer.getLanguage();
    System.out.println(result.getLanguage());
    writer.close();
}
Also used : LanguageDetector(org.apache.tika.language.detect.LanguageDetector) LanguageResult(org.apache.tika.language.detect.LanguageResult) OptimaizeLangDetector(org.apache.tika.langdetect.OptimaizeLangDetector) LanguageWriter(org.apache.tika.language.detect.LanguageWriter)

Example 15 with LanguageResult

use of org.apache.tika.language.detect.LanguageResult in project tika by apache.

the class LanguageDetectingParser method parse.

public void parse(InputStream stream, ContentHandler handler, final Metadata metadata, ParseContext context) throws SAXException, IOException, TikaException {
    LanguageHandler langHandler = new LanguageHandler();
    ContentHandler tee = new TeeContentHandler(handler, langHandler);
    super.parse(stream, tee, metadata, context);
    LanguageResult result = langHandler.getLanguage();
    if (result.isReasonablyCertain()) {
        metadata.set(TikaCoreProperties.LANGUAGE, result.getLanguage());
    }
}
Also used : LanguageHandler(org.apache.tika.language.detect.LanguageHandler) LanguageResult(org.apache.tika.language.detect.LanguageResult) TeeContentHandler(org.apache.tika.sax.TeeContentHandler) TeeContentHandler(org.apache.tika.sax.TeeContentHandler) ContentHandler(org.xml.sax.ContentHandler)

Aggregations

LanguageResult (org.apache.tika.language.detect.LanguageResult)20 LanguageDetector (org.apache.tika.language.detect.LanguageDetector)10 OptimaizeLangDetector (org.apache.tika.langdetect.OptimaizeLangDetector)7 LanguageWriter (org.apache.tika.language.detect.LanguageWriter)7 Test (org.junit.Test)6 Consumes (javax.ws.rs.Consumes)3 POST (javax.ws.rs.POST)3 PUT (javax.ws.rs.PUT)3 Path (javax.ws.rs.Path)3 Produces (javax.ws.rs.Produces)3 ArrayList (java.util.ArrayList)2 LanguageHandler (org.apache.tika.language.detect.LanguageHandler)2 AutoDetectParser (org.apache.tika.parser.AutoDetectParser)2 ParseContext (org.apache.tika.parser.ParseContext)2 ContentHandler (org.xml.sax.ContentHandler)2 DetectedLanguage (com.optimaize.langdetect.DetectedLanguage)1 File (java.io.File)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 Detector (org.apache.tika.detect.Detector)1