Search in sources :

Example 1 with KeyValueSinkFactory

use of com.mozilla.bagheera.sink.KeyValueSinkFactory in project bagheera by mozilla-metrics.

the class KafkaSequenceFileConsumer method main.

public static void main(String[] args) {
    OptionFactory optFactory = OptionFactory.getInstance();
    Options options = KafkaConsumer.getOptions();
    options.addOption(optFactory.create("o", "output", true, "HDFS base path for output."));
    options.addOption(optFactory.create("df", "dateformat", true, "Date format for the date subdirectories."));
    options.addOption(optFactory.create("fs", "filesize", true, "Max file size for output files."));
    options.addOption(optFactory.create("b", "usebytes", false, "Use BytesWritable for value rather than Text."));
    options.addOption(optFactory.create("ts", "addtimestamp", false, "Adds bagheera timestamp to the json"));
    CommandLineParser parser = new GnuParser();
    ShutdownHook sh = ShutdownHook.getInstance();
    try {
        // Parse command line options
        CommandLine cmd = parser.parse(options, args);
        final KafkaConsumer consumer = KafkaConsumer.fromOptions(cmd);
        sh.addFirst(consumer);
        // Create a sink for storing data
        SinkConfiguration sinkConfig = new SinkConfiguration();
        sinkConfig.setString("hdfssink.hdfs.basedir.path", cmd.getOptionValue("output", "/bagheera"));
        sinkConfig.setString("hdfssink.hdfs.date.format", cmd.getOptionValue("dateformat", "yyyy-MM-dd"));
        sinkConfig.setLong("hdfssink.hdfs.max.filesize", Long.parseLong(cmd.getOptionValue("filesize", "536870912")));
        sinkConfig.setBoolean("hdfssink.hdfs.usebytes", cmd.hasOption("usebytes"));
        if (cmd.hasOption("addtimestamp")) {
            sinkConfig.setBoolean("hdfssink.hdfs.addtimestamp", true);
        }
        KeyValueSinkFactory sinkFactory = KeyValueSinkFactory.getInstance(SequenceFileSink.class, sinkConfig);
        sh.addLast(sinkFactory);
        // Set the sink for consumer storage
        consumer.setSinkFactory(sinkFactory);
        // Initialize metrics collection, reporting, etc.
        final MetricsManager manager = MetricsManager.getDefaultMetricsManager();
        prepareHealthChecks();
        // Begin polling
        consumer.poll();
    } catch (ParseException e) {
        LOG.error("Error parsing command line options", e);
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(KafkaSequenceFileConsumer.class.getName(), options);
    } catch (NumberFormatException e) {
        LOG.error("Failed to parse filesize option", e);
    }
}
Also used : Options(org.apache.commons.cli.Options) ShutdownHook(com.mozilla.bagheera.util.ShutdownHook) GnuParser(org.apache.commons.cli.GnuParser) OptionFactory(com.mozilla.bagheera.cli.OptionFactory) HelpFormatter(org.apache.commons.cli.HelpFormatter) CommandLine(org.apache.commons.cli.CommandLine) MetricsManager(com.mozilla.bagheera.metrics.MetricsManager) SinkConfiguration(com.mozilla.bagheera.sink.SinkConfiguration) CommandLineParser(org.apache.commons.cli.CommandLineParser) KeyValueSinkFactory(com.mozilla.bagheera.sink.KeyValueSinkFactory) ParseException(org.apache.commons.cli.ParseException)

Example 2 with KeyValueSinkFactory

use of com.mozilla.bagheera.sink.KeyValueSinkFactory in project bagheera by mozilla-metrics.

the class KafkaHBaseConsumer method main.

public static void main(String[] args) {
    OptionFactory optFactory = OptionFactory.getInstance();
    Options options = KafkaConsumer.getOptions();
    options.addOption(optFactory.create("tbl", "table", true, "HBase table name.").required());
    options.addOption(optFactory.create("f", "family", true, "Column family."));
    options.addOption(optFactory.create("q", "qualifier", true, "Column qualifier."));
    options.addOption(optFactory.create("b", "batchsize", true, "Batch size (number of messages per HBase flush)."));
    options.addOption(optFactory.create("pd", "prefixdate", false, "Prefix key with salted date."));
    CommandLineParser parser = new GnuParser();
    ShutdownHook sh = ShutdownHook.getInstance();
    try {
        // Parse command line options
        CommandLine cmd = parser.parse(options, args);
        final KafkaConsumer consumer = KafkaConsumer.fromOptions(cmd);
        sh.addFirst(consumer);
        // Create a sink for storing data
        SinkConfiguration sinkConfig = new SinkConfiguration();
        if (cmd.hasOption("numthreads")) {
            sinkConfig.setInt("hbasesink.hbase.numthreads", Integer.parseInt(cmd.getOptionValue("numthreads")));
        }
        if (cmd.hasOption("batchsize")) {
            sinkConfig.setInt("hbasesink.hbase.batchsize", Integer.parseInt(cmd.getOptionValue("batchsize")));
        }
        sinkConfig.setString("hbasesink.hbase.tablename", cmd.getOptionValue("table"));
        sinkConfig.setString("hbasesink.hbase.column.family", cmd.getOptionValue("family", "data"));
        sinkConfig.setString("hbasesink.hbase.column.qualifier", cmd.getOptionValue("qualifier", "json"));
        sinkConfig.setBoolean("hbasesink.hbase.rowkey.prefixdate", cmd.hasOption("prefixdate"));
        KeyValueSinkFactory sinkFactory = KeyValueSinkFactory.getInstance(HBaseSink.class, sinkConfig);
        sh.addLast(sinkFactory);
        // Set the sink factory for consumer storage
        consumer.setSinkFactory(sinkFactory);
        prepareHealthChecks();
        // Initialize metrics collection, reporting, etc.
        final MetricsManager manager = MetricsManager.getDefaultMetricsManager();
        // Begin polling
        consumer.poll();
    } catch (ParseException e) {
        LOG.error("Error parsing command line options", e);
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(KafkaHBaseConsumer.class.getName(), options);
    }
}
Also used : HelpFormatter(org.apache.commons.cli.HelpFormatter) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) MetricsManager(com.mozilla.bagheera.metrics.MetricsManager) ShutdownHook(com.mozilla.bagheera.util.ShutdownHook) SinkConfiguration(com.mozilla.bagheera.sink.SinkConfiguration) GnuParser(org.apache.commons.cli.GnuParser) CommandLineParser(org.apache.commons.cli.CommandLineParser) KeyValueSinkFactory(com.mozilla.bagheera.sink.KeyValueSinkFactory) ParseException(org.apache.commons.cli.ParseException) OptionFactory(com.mozilla.bagheera.cli.OptionFactory)

Example 3 with KeyValueSinkFactory

use of com.mozilla.bagheera.sink.KeyValueSinkFactory in project bagheera by mozilla-metrics.

the class KafkaReplayConsumer method main.

public static void main(String[] args) {
    OptionFactory optFactory = OptionFactory.getInstance();
    Options options = KafkaConsumer.getOptions();
    options.addOption(optFactory.create("k", "copy-keys", true, "Whether or not to copy keys from the source data"));
    options.addOption(optFactory.create("d", "dest", true, "Destination host / url pattern (include '" + ReplaySink.KEY_PLACEHOLDER + "' for key placeholder)").required());
    options.addOption(optFactory.create("s", "sample", true, "Rate at which to sample the source data (defaults to using all data)"));
    options.addOption(optFactory.create("D", "delete", true, "Also replay deletes (using the source keys by necessity)"));
    CommandLineParser parser = new GnuParser();
    ShutdownHook sh = ShutdownHook.getInstance();
    try {
        // Parse command line options
        CommandLine cmd = parser.parse(options, args);
        final KafkaConsumer consumer = KafkaConsumer.fromOptions(cmd);
        sh.addFirst(consumer);
        // Create a sink for storing data
        SinkConfiguration sinkConfig = new SinkConfiguration();
        if (cmd.hasOption("numthreads")) {
            sinkConfig.setInt("hbasesink.hbase.numthreads", Integer.parseInt(cmd.getOptionValue("numthreads")));
        }
        sinkConfig.setString("replaysink.keys", cmd.getOptionValue("copy-keys", "true"));
        sinkConfig.setString("replaysink.dest", cmd.getOptionValue("dest", "http://bogus:8080/submit/endpoint/" + ReplaySink.KEY_PLACEHOLDER));
        sinkConfig.setString("replaysink.sample", cmd.getOptionValue("sample", "1"));
        sinkConfig.setString("replaysink.delete", cmd.getOptionValue("delete", "true"));
        KeyValueSinkFactory sinkFactory = KeyValueSinkFactory.getInstance(ReplaySink.class, sinkConfig);
        sh.addLast(sinkFactory);
        // Set the sink factory for consumer storage
        consumer.setSinkFactory(sinkFactory);
        prepareHealthChecks();
        // Initialize metrics collection, reporting, etc.
        final MetricsManager manager = MetricsManager.getDefaultMetricsManager();
        // Begin polling
        consumer.poll();
    } catch (ParseException e) {
        LOG.error("Error parsing command line options", e);
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(KafkaReplayConsumer.class.getName(), options);
    }
}
Also used : HelpFormatter(org.apache.commons.cli.HelpFormatter) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) MetricsManager(com.mozilla.bagheera.metrics.MetricsManager) ShutdownHook(com.mozilla.bagheera.util.ShutdownHook) SinkConfiguration(com.mozilla.bagheera.sink.SinkConfiguration) GnuParser(org.apache.commons.cli.GnuParser) CommandLineParser(org.apache.commons.cli.CommandLineParser) KeyValueSinkFactory(com.mozilla.bagheera.sink.KeyValueSinkFactory) ParseException(org.apache.commons.cli.ParseException) OptionFactory(com.mozilla.bagheera.cli.OptionFactory)

Example 4 with KeyValueSinkFactory

use of com.mozilla.bagheera.sink.KeyValueSinkFactory in project bagheera by mozilla-metrics.

the class KafkaLoggerConsumer method main.

public static void main(String[] args) {
    OptionFactory optFactory = OptionFactory.getInstance();
    Options options = KafkaConsumer.getOptions();
    options.addOption(optFactory.create("lv", "logvalues", false, "Log values."));
    CommandLineParser parser = new GnuParser();
    ShutdownHook sh = ShutdownHook.getInstance();
    try {
        // Parse command line options
        CommandLine cmd = parser.parse(options, args);
        final KafkaConsumer consumer = KafkaConsumer.fromOptions(cmd);
        sh.addFirst(consumer);
        // Create a sink for storing data
        SinkConfiguration sinkConfig = new SinkConfiguration();
        sinkConfig.setBoolean("loggersink.logvalues", cmd.hasOption("logvalues"));
        KeyValueSinkFactory sinkFactory = KeyValueSinkFactory.getInstance(LoggerSink.class, sinkConfig);
        sh.addLast(sinkFactory);
        // Set the sink for consumer storage
        consumer.setSinkFactory(sinkFactory);
        prepareHealthChecks();
        // Initialize metrics collection, reporting, etc.
        final MetricsManager manager = MetricsManager.getDefaultMetricsManager();
        // Begin polling
        consumer.poll();
    } catch (ParseException e) {
        LOG.error("Error parsing command line options", e);
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(KafkaHBaseConsumer.class.getName(), options);
    }
}
Also used : HelpFormatter(org.apache.commons.cli.HelpFormatter) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) MetricsManager(com.mozilla.bagheera.metrics.MetricsManager) ShutdownHook(com.mozilla.bagheera.util.ShutdownHook) SinkConfiguration(com.mozilla.bagheera.sink.SinkConfiguration) GnuParser(org.apache.commons.cli.GnuParser) CommandLineParser(org.apache.commons.cli.CommandLineParser) KeyValueSinkFactory(com.mozilla.bagheera.sink.KeyValueSinkFactory) ParseException(org.apache.commons.cli.ParseException) OptionFactory(com.mozilla.bagheera.cli.OptionFactory)

Aggregations

OptionFactory (com.mozilla.bagheera.cli.OptionFactory)4 MetricsManager (com.mozilla.bagheera.metrics.MetricsManager)4 KeyValueSinkFactory (com.mozilla.bagheera.sink.KeyValueSinkFactory)4 SinkConfiguration (com.mozilla.bagheera.sink.SinkConfiguration)4 ShutdownHook (com.mozilla.bagheera.util.ShutdownHook)4 CommandLine (org.apache.commons.cli.CommandLine)4 CommandLineParser (org.apache.commons.cli.CommandLineParser)4 GnuParser (org.apache.commons.cli.GnuParser)4 HelpFormatter (org.apache.commons.cli.HelpFormatter)4 Options (org.apache.commons.cli.Options)4 ParseException (org.apache.commons.cli.ParseException)4