Search in sources :

Example 1 with NetworkParser

use of org.apache.tika.parser.NetworkParser in project tika by apache.

the class TikaCLI method process.

public void process(String arg) throws Exception {
    if (arg.equals("-?") || arg.equals("--help")) {
        pipeMode = false;
        usage();
    } else if (arg.equals("-V") || arg.equals("--version")) {
        pipeMode = false;
        version();
    } else if (arg.equals("-v") || arg.equals("--verbose")) {
        org.apache.log4j.Logger.getRootLogger().setLevel(Level.DEBUG);
    } else if (arg.equals("-g") || arg.equals("--gui")) {
        pipeMode = false;
        if (configFilePath != null) {
            TikaGUI.main(new String[] { configFilePath });
        } else {
            TikaGUI.main(new String[0]);
        }
    } else if (arg.equals("--list-parser") || arg.equals("--list-parsers")) {
        pipeMode = false;
        displayParsers(false, false);
    } else if (arg.equals("--list-detector") || arg.equals("--list-detectors")) {
        pipeMode = false;
        displayDetectors();
    } else if (arg.equals("--list-parser-detail") || arg.equals("--list-parser-details")) {
        pipeMode = false;
        displayParsers(true, false);
    } else if (arg.equals("--list-parser-detail-apt") || arg.equals("--list-parser-details-apt")) {
        pipeMode = false;
        displayParsers(true, true);
    } else if (arg.equals("--list-met-models")) {
        pipeMode = false;
        displayMetModels();
    } else if (arg.equals("--list-supported-types")) {
        pipeMode = false;
        displaySupportedTypes();
    } else if (arg.startsWith("--compare-file-magic=")) {
        pipeMode = false;
        compareFileMagic(arg.substring(arg.indexOf('=') + 1));
    } else if (arg.equals("--dump-minimal-config")) {
        pipeMode = false;
        dumpConfig(TikaConfigSerializer.Mode.MINIMAL);
    } else if (arg.equals("--dump-current-config")) {
        pipeMode = false;
        dumpConfig(TikaConfigSerializer.Mode.CURRENT);
    } else if (arg.equals("--dump-static-config")) {
        pipeMode = false;
        dumpConfig(TikaConfigSerializer.Mode.STATIC);
    } else if (arg.equals("--dump-static-full-config")) {
        pipeMode = false;
        dumpConfig(TikaConfigSerializer.Mode.STATIC_FULL);
    } else if (arg.equals("--container-aware") || arg.equals("--container-aware-detector")) {
    // ignore, as container-aware detectors are now always used
    } else if (arg.equals("-f") || arg.equals("--fork")) {
        fork = true;
    } else if (arg.startsWith("--config=")) {
        configure(arg.substring("--config=".length()));
    } else if (arg.startsWith("--digest=")) {
        CommonsDigester.DigestAlgorithm[] algos = CommonsDigester.parse(arg.substring("--digest=".length()));
        digester = new CommonsDigester(MAX_MARK, algos);
        parser = new DigestingParser(parser, digester);
    } else if (arg.startsWith("-e")) {
        encoding = arg.substring("-e".length());
    } else if (arg.startsWith("--encoding=")) {
        encoding = arg.substring("--encoding=".length());
    } else if (arg.startsWith("-p") && !arg.equals("-p")) {
        password = arg.substring("-p".length());
    } else if (arg.startsWith("--password=")) {
        password = arg.substring("--password=".length());
    } else if (arg.equals("-j") || arg.equals("--json")) {
        type = JSON;
    } else if (arg.equals("-J") || arg.equals("--jsonRecursive")) {
        recursiveJSON = true;
    } else if (arg.equals("-y") || arg.equals("--xmp")) {
        type = XMP;
    } else if (arg.equals("-x") || arg.equals("--xml")) {
        type = XML;
    } else if (arg.equals("-h") || arg.equals("--html")) {
        type = HTML;
    } else if (arg.equals("-t") || arg.equals("--text")) {
        type = TEXT;
    } else if (arg.equals("-T") || arg.equals("--text-main")) {
        type = TEXT_MAIN;
    } else if (arg.equals("-m") || arg.equals("--metadata")) {
        type = METADATA;
    } else if (arg.equals("-l") || arg.equals("--language")) {
        type = LANGUAGE;
    } else if (arg.equals("-d") || arg.equals("--detect")) {
        type = DETECT;
    } else if (arg.startsWith("--extract-dir=")) {
        extractDir = new File(arg.substring("--extract-dir=".length()));
    } else if (arg.equals("-z") || arg.equals("--extract")) {
        type = NO_OUTPUT;
        context.set(EmbeddedDocumentExtractor.class, new FileEmbeddedDocumentExtractor());
    } else if (arg.equals("-r") || arg.equals("--pretty-print")) {
        prettyPrint = true;
    } else if (arg.equals("-p") || arg.equals("--port") || arg.equals("-s") || arg.equals("--server")) {
        serverMode = true;
        pipeMode = false;
    } else if (arg.startsWith("-c")) {
        URI uri = new URI(arg.substring("-c".length()));
        parser = new NetworkParser(uri);
    } else if (arg.startsWith("--client=")) {
        URI uri = new URI(arg.substring("--client=".length()));
        parser = new NetworkParser(uri);
    } else {
        pipeMode = false;
        if (serverMode) {
            new TikaServer(Integer.parseInt(arg)).start();
        } else if (arg.equals("-")) {
            try (InputStream stream = TikaInputStream.get(new CloseShieldInputStream(System.in))) {
                type.process(stream, System.out, new Metadata());
            }
        } else {
            URL url;
            File file = new File(arg);
            if (file.isFile()) {
                url = file.toURI().toURL();
            } else {
                url = new URL(arg);
            }
            if (recursiveJSON) {
                handleRecursiveJson(url, System.out);
            } else {
                Metadata metadata = new Metadata();
                try (InputStream input = TikaInputStream.get(url, metadata)) {
                    type.process(input, System.out, metadata);
                } finally {
                    System.out.flush();
                }
            }
        }
    }
}
Also used : CloseShieldInputStream(org.apache.commons.io.input.CloseShieldInputStream) TikaInputStream(org.apache.tika.io.TikaInputStream) FileInputStream(java.io.FileInputStream) DocumentInputStream(org.apache.poi.poifs.filesystem.DocumentInputStream) InputStream(java.io.InputStream) JsonMetadata(org.apache.tika.metadata.serialization.JsonMetadata) Metadata(org.apache.tika.metadata.Metadata) XMPMetadata(org.apache.tika.xmp.XMPMetadata) DigestingParser(org.apache.tika.parser.DigestingParser) URI(java.net.URI) NetworkParser(org.apache.tika.parser.NetworkParser) URL(java.net.URL) CommonsDigester(org.apache.tika.parser.utils.CommonsDigester) File(java.io.File) CloseShieldInputStream(org.apache.commons.io.input.CloseShieldInputStream)

Aggregations

File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 InputStream (java.io.InputStream)1 URI (java.net.URI)1 URL (java.net.URL)1 CloseShieldInputStream (org.apache.commons.io.input.CloseShieldInputStream)1 DocumentInputStream (org.apache.poi.poifs.filesystem.DocumentInputStream)1 TikaInputStream (org.apache.tika.io.TikaInputStream)1 Metadata (org.apache.tika.metadata.Metadata)1 JsonMetadata (org.apache.tika.metadata.serialization.JsonMetadata)1 DigestingParser (org.apache.tika.parser.DigestingParser)1 NetworkParser (org.apache.tika.parser.NetworkParser)1 CommonsDigester (org.apache.tika.parser.utils.CommonsDigester)1 XMPMetadata (org.apache.tika.xmp.XMPMetadata)1