Search in sources :

Example 56 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project nutch by apache.

the class ParseText method main.

public static void main(String[] argv) throws Exception {
    String usage = "ParseText (-local | -dfs <namenode:port>) recno segment";
    if (argv.length < 3) {
        System.out.println("usage:" + usage);
        return;
    }
    Options opts = new Options();
    Configuration conf = NutchConfiguration.create();
    GenericOptionsParser parser = new GenericOptionsParser(conf, opts, argv);
    String[] remainingArgs = parser.getRemainingArgs();
    try (FileSystem fs = FileSystem.get(conf)) {
        int recno = Integer.parseInt(remainingArgs[0]);
        String segment = remainingArgs[1];
        String filename = new Path(segment, ParseText.DIR_NAME).toString();
        ParseText parseText = new ParseText();
        ArrayFile.Reader parseTexts = new ArrayFile.Reader(fs, filename, conf);
        parseTexts.get(recno, parseText);
        System.out.println("Retrieved " + recno + " from file " + filename);
        System.out.println(parseText);
        parseTexts.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) NutchConfiguration(org.apache.nutch.util.NutchConfiguration) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayFile(org.apache.hadoop.io.ArrayFile) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 57 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project metron by apache.

the class PcapCli method run.

public int run(String[] args) {
    if (args.length < 1) {
        printBasicHelp();
        return -1;
    }
    String jobType = args[0];
    SequenceFileIterable results = null;
    String[] commandArgs = Arrays.copyOfRange(args, 1, args.length);
    Configuration hadoopConf = new Configuration();
    String[] otherArgs = null;
    try {
        otherArgs = new GenericOptionsParser(hadoopConf, commandArgs).getRemainingArgs();
    } catch (IOException e) {
        LOGGER.error("Failed to configure hadoop with provided options: {}", e.getMessage(), e);
        return -1;
    }
    CliConfig commonConfig = null;
    if ("fixed".equals(jobType)) {
        FixedCliParser fixedParser = new FixedCliParser(prefixStrategy);
        FixedCliConfig config = null;
        try {
            config = fixedParser.parse(otherArgs);
            commonConfig = config;
        } catch (ParseException | java.text.ParseException e) {
            System.err.println(e.getMessage());
            System.err.flush();
            fixedParser.printHelp();
            return -1;
        }
        if (config.showHelp()) {
            fixedParser.printHelp();
            return 0;
        }
        Pair<Long, Long> time = timeAsNanosecondsSinceEpoch(config.getStartTime(), config.getEndTime());
        long startTime = time.getLeft();
        long endTime = time.getRight();
        try {
            results = jobRunner.query(new Path(config.getBasePath()), new Path(config.getBaseOutputPath()), startTime, endTime, config.getNumReducers(), config.getFixedFields(), hadoopConf, FileSystem.get(hadoopConf), new FixedPcapFilter.Configurator());
        } catch (IOException | ClassNotFoundException e) {
            LOGGER.error("Failed to execute fixed filter job: {}", e.getMessage(), e);
            return -1;
        } catch (InterruptedException e) {
            LOGGER.error("Failed to execute fixed filter job: {}", e.getMessage(), e);
            return -1;
        }
    } else if ("query".equals(jobType)) {
        QueryCliParser queryParser = new QueryCliParser(prefixStrategy);
        QueryCliConfig config = null;
        try {
            config = queryParser.parse(otherArgs);
            commonConfig = config;
        } catch (ParseException | java.text.ParseException e) {
            System.err.println(e.getMessage());
            queryParser.printHelp();
            return -1;
        }
        if (config.showHelp()) {
            queryParser.printHelp();
            return 0;
        }
        Pair<Long, Long> time = timeAsNanosecondsSinceEpoch(config.getStartTime(), config.getEndTime());
        long startTime = time.getLeft();
        long endTime = time.getRight();
        try {
            results = jobRunner.query(new Path(config.getBasePath()), new Path(config.getBaseOutputPath()), startTime, endTime, config.getNumReducers(), config.getQuery(), hadoopConf, FileSystem.get(hadoopConf), new QueryPcapFilter.Configurator());
        } catch (IOException | ClassNotFoundException e) {
            LOGGER.error("Failed to execute query filter job: {}", e.getMessage(), e);
            return -1;
        } catch (InterruptedException e) {
            LOGGER.error("Failed to execute query filter job: {}", e.getMessage(), e);
            return -1;
        }
    } else {
        printBasicHelp();
        return -1;
    }
    try {
        Iterable<List<byte[]>> partitions = Iterables.partition(results, commonConfig.getNumRecordsPerFile());
        int part = 1;
        if (partitions.iterator().hasNext()) {
            for (List<byte[]> data : partitions) {
                String outFileName = String.format("pcap-data-%s+%04d.pcap", commonConfig.getPrefix(), part++);
                if (data.size() > 0) {
                    resultsWriter.write(data, outFileName);
                }
            }
        } else {
            System.out.println("No results returned.");
        }
    } catch (IOException e) {
        LOGGER.error("Unable to write file", e);
        return -1;
    } finally {
        try {
            results.cleanup();
        } catch (IOException e) {
            LOGGER.warn("Unable to cleanup files in HDFS", e);
        }
    }
    return 0;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) QueryPcapFilter(org.apache.metron.pcap.filter.query.QueryPcapFilter) List(java.util.List) Pair(org.apache.commons.lang3.tuple.Pair) Path(org.apache.hadoop.fs.Path) IOException(java.io.IOException) SequenceFileIterable(org.apache.metron.common.hadoop.SequenceFileIterable) ParseException(org.apache.commons.cli.ParseException) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 58 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project metron by apache.

the class PcapInspector method main.

public static void main(String... argv) throws IOException {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, argv).getRemainingArgs();
    CommandLine cli = InspectorOptions.parse(new PosixParser(), otherArgs);
    Path inputPath = new Path(InspectorOptions.INPUT.get(cli));
    int n = -1;
    if (InspectorOptions.NUM.has(cli)) {
        n = Integer.parseInt(InspectorOptions.NUM.get(cli));
    }
    SequenceFile.Reader reader = new SequenceFile.Reader(new Configuration(), SequenceFile.Reader.file(inputPath));
    LongWritable key = new LongWritable();
    BytesWritable value = new BytesWritable();
    for (int i = 0; (n < 0 || i < n) && reader.next(key, value); ++i) {
        long millis = Long.divideUnsigned(key.get(), 1000000);
        String ts = DATE_FORMAT.format(new Date(millis));
        try {
            for (PacketInfo pi : PcapHelper.toPacketInfo(value.copyBytes())) {
                Map<String, Object> result = PcapHelper.packetToFields(pi);
                List<String> fieldResults = new ArrayList<String>() {

                    {
                        add("TS: " + ts);
                    }
                };
                for (Constants.Fields field : Constants.Fields.values()) {
                    if (result.containsKey(field.getName())) {
                        fieldResults.add(field.getName() + ": " + result.get(field.getName()));
                    }
                }
                System.out.println(Joiner.on(",").join(fieldResults));
            }
        } catch (Exception e) {
            System.out.println(String.format("Error: malformed packet #=%s, ts=%s, error msg=%s", i + 1, ts, e.getMessage()));
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) Constants(org.apache.metron.common.Constants) BytesWritable(org.apache.hadoop.io.BytesWritable) Date(java.util.Date) IOException(java.io.IOException) SequenceFile(org.apache.hadoop.io.SequenceFile) PacketInfo(org.apache.metron.pcap.PacketInfo) LongWritable(org.apache.hadoop.io.LongWritable) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 59 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project metron by apache.

the class SimpleEnrichmentFlatFileLoader method main.

public static void main(String... argv) throws Exception {
    Configuration hadoopConfig = HBaseConfiguration.create();
    String[] otherArgs = new GenericOptionsParser(hadoopConfig, argv).getRemainingArgs();
    main(hadoopConfig, otherArgs);
}
Also used : HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) Configuration(org.apache.hadoop.conf.Configuration) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 60 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project metron by apache.

the class SimpleFlatFileSummarizer method main.

public static void main(String... argv) throws Exception {
    Configuration hadoopConfig = HBaseConfiguration.create();
    String[] otherArgs = new GenericOptionsParser(hadoopConfig, argv).getRemainingArgs();
    main(hadoopConfig, otherArgs);
}
Also used : HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) Configuration(org.apache.hadoop.conf.Configuration) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Aggregations

GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)63 Configuration (org.apache.hadoop.conf.Configuration)44 Job (org.apache.hadoop.mapreduce.Job)26 Path (org.apache.hadoop.fs.Path)22 CommandLine (org.apache.commons.cli.CommandLine)12 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)10 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)8 PosixParser (org.apache.commons.cli.PosixParser)7 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)7 IOException (java.io.IOException)6 Options (org.apache.commons.cli.Options)6 FileSystem (org.apache.hadoop.fs.FileSystem)6 HashMap (java.util.HashMap)5 YarnUncaughtExceptionHandler (org.apache.hadoop.yarn.YarnUncaughtExceptionHandler)5 ArrayList (java.util.ArrayList)4 ParseException (org.apache.commons.cli.ParseException)4 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)4 JobConf (org.apache.hadoop.mapred.JobConf)4 File (java.io.File)3 Random (java.util.Random)3