use of com.mozilla.bagheera.sink.SinkConfiguration in project bagheera by mozilla-metrics.
the class KafkaSequenceFileConsumer method main.
public static void main(String[] args) {
OptionFactory optFactory = OptionFactory.getInstance();
Options options = KafkaConsumer.getOptions();
options.addOption(optFactory.create("o", "output", true, "HDFS base path for output."));
options.addOption(optFactory.create("df", "dateformat", true, "Date format for the date subdirectories."));
options.addOption(optFactory.create("fs", "filesize", true, "Max file size for output files."));
options.addOption(optFactory.create("b", "usebytes", false, "Use BytesWritable for value rather than Text."));
options.addOption(optFactory.create("ts", "addtimestamp", false, "Adds bagheera timestamp to the json"));
CommandLineParser parser = new GnuParser();
ShutdownHook sh = ShutdownHook.getInstance();
try {
// Parse command line options
CommandLine cmd = parser.parse(options, args);
final KafkaConsumer consumer = KafkaConsumer.fromOptions(cmd);
sh.addFirst(consumer);
// Create a sink for storing data
SinkConfiguration sinkConfig = new SinkConfiguration();
sinkConfig.setString("hdfssink.hdfs.basedir.path", cmd.getOptionValue("output", "/bagheera"));
sinkConfig.setString("hdfssink.hdfs.date.format", cmd.getOptionValue("dateformat", "yyyy-MM-dd"));
sinkConfig.setLong("hdfssink.hdfs.max.filesize", Long.parseLong(cmd.getOptionValue("filesize", "536870912")));
sinkConfig.setBoolean("hdfssink.hdfs.usebytes", cmd.hasOption("usebytes"));
if (cmd.hasOption("addtimestamp")) {
sinkConfig.setBoolean("hdfssink.hdfs.addtimestamp", true);
}
KeyValueSinkFactory sinkFactory = KeyValueSinkFactory.getInstance(SequenceFileSink.class, sinkConfig);
sh.addLast(sinkFactory);
// Set the sink for consumer storage
consumer.setSinkFactory(sinkFactory);
// Initialize metrics collection, reporting, etc.
final MetricsManager manager = MetricsManager.getDefaultMetricsManager();
prepareHealthChecks();
// Begin polling
consumer.poll();
} catch (ParseException e) {
LOG.error("Error parsing command line options", e);
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp(KafkaSequenceFileConsumer.class.getName(), options);
} catch (NumberFormatException e) {
LOG.error("Failed to parse filesize option", e);
}
}
use of com.mozilla.bagheera.sink.SinkConfiguration in project bagheera by mozilla-metrics.
the class KafkaHBaseConsumer method main.
public static void main(String[] args) {
OptionFactory optFactory = OptionFactory.getInstance();
Options options = KafkaConsumer.getOptions();
options.addOption(optFactory.create("tbl", "table", true, "HBase table name.").required());
options.addOption(optFactory.create("f", "family", true, "Column family."));
options.addOption(optFactory.create("q", "qualifier", true, "Column qualifier."));
options.addOption(optFactory.create("b", "batchsize", true, "Batch size (number of messages per HBase flush)."));
options.addOption(optFactory.create("pd", "prefixdate", false, "Prefix key with salted date."));
CommandLineParser parser = new GnuParser();
ShutdownHook sh = ShutdownHook.getInstance();
try {
// Parse command line options
CommandLine cmd = parser.parse(options, args);
final KafkaConsumer consumer = KafkaConsumer.fromOptions(cmd);
sh.addFirst(consumer);
// Create a sink for storing data
SinkConfiguration sinkConfig = new SinkConfiguration();
if (cmd.hasOption("numthreads")) {
sinkConfig.setInt("hbasesink.hbase.numthreads", Integer.parseInt(cmd.getOptionValue("numthreads")));
}
if (cmd.hasOption("batchsize")) {
sinkConfig.setInt("hbasesink.hbase.batchsize", Integer.parseInt(cmd.getOptionValue("batchsize")));
}
sinkConfig.setString("hbasesink.hbase.tablename", cmd.getOptionValue("table"));
sinkConfig.setString("hbasesink.hbase.column.family", cmd.getOptionValue("family", "data"));
sinkConfig.setString("hbasesink.hbase.column.qualifier", cmd.getOptionValue("qualifier", "json"));
sinkConfig.setBoolean("hbasesink.hbase.rowkey.prefixdate", cmd.hasOption("prefixdate"));
KeyValueSinkFactory sinkFactory = KeyValueSinkFactory.getInstance(HBaseSink.class, sinkConfig);
sh.addLast(sinkFactory);
// Set the sink factory for consumer storage
consumer.setSinkFactory(sinkFactory);
prepareHealthChecks();
// Initialize metrics collection, reporting, etc.
final MetricsManager manager = MetricsManager.getDefaultMetricsManager();
// Begin polling
consumer.poll();
} catch (ParseException e) {
LOG.error("Error parsing command line options", e);
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp(KafkaHBaseConsumer.class.getName(), options);
}
}
use of com.mozilla.bagheera.sink.SinkConfiguration in project bagheera by mozilla-metrics.
the class KafkaReplayConsumer method main.
public static void main(String[] args) {
OptionFactory optFactory = OptionFactory.getInstance();
Options options = KafkaConsumer.getOptions();
options.addOption(optFactory.create("k", "copy-keys", true, "Whether or not to copy keys from the source data"));
options.addOption(optFactory.create("d", "dest", true, "Destination host / url pattern (include '" + ReplaySink.KEY_PLACEHOLDER + "' for key placeholder)").required());
options.addOption(optFactory.create("s", "sample", true, "Rate at which to sample the source data (defaults to using all data)"));
options.addOption(optFactory.create("D", "delete", true, "Also replay deletes (using the source keys by necessity)"));
CommandLineParser parser = new GnuParser();
ShutdownHook sh = ShutdownHook.getInstance();
try {
// Parse command line options
CommandLine cmd = parser.parse(options, args);
final KafkaConsumer consumer = KafkaConsumer.fromOptions(cmd);
sh.addFirst(consumer);
// Create a sink for storing data
SinkConfiguration sinkConfig = new SinkConfiguration();
if (cmd.hasOption("numthreads")) {
sinkConfig.setInt("hbasesink.hbase.numthreads", Integer.parseInt(cmd.getOptionValue("numthreads")));
}
sinkConfig.setString("replaysink.keys", cmd.getOptionValue("copy-keys", "true"));
sinkConfig.setString("replaysink.dest", cmd.getOptionValue("dest", "http://bogus:8080/submit/endpoint/" + ReplaySink.KEY_PLACEHOLDER));
sinkConfig.setString("replaysink.sample", cmd.getOptionValue("sample", "1"));
sinkConfig.setString("replaysink.delete", cmd.getOptionValue("delete", "true"));
KeyValueSinkFactory sinkFactory = KeyValueSinkFactory.getInstance(ReplaySink.class, sinkConfig);
sh.addLast(sinkFactory);
// Set the sink factory for consumer storage
consumer.setSinkFactory(sinkFactory);
prepareHealthChecks();
// Initialize metrics collection, reporting, etc.
final MetricsManager manager = MetricsManager.getDefaultMetricsManager();
// Begin polling
consumer.poll();
} catch (ParseException e) {
LOG.error("Error parsing command line options", e);
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp(KafkaReplayConsumer.class.getName(), options);
}
}
use of com.mozilla.bagheera.sink.SinkConfiguration in project bagheera by mozilla-metrics.
the class KafkaLoggerConsumer method main.
public static void main(String[] args) {
OptionFactory optFactory = OptionFactory.getInstance();
Options options = KafkaConsumer.getOptions();
options.addOption(optFactory.create("lv", "logvalues", false, "Log values."));
CommandLineParser parser = new GnuParser();
ShutdownHook sh = ShutdownHook.getInstance();
try {
// Parse command line options
CommandLine cmd = parser.parse(options, args);
final KafkaConsumer consumer = KafkaConsumer.fromOptions(cmd);
sh.addFirst(consumer);
// Create a sink for storing data
SinkConfiguration sinkConfig = new SinkConfiguration();
sinkConfig.setBoolean("loggersink.logvalues", cmd.hasOption("logvalues"));
KeyValueSinkFactory sinkFactory = KeyValueSinkFactory.getInstance(LoggerSink.class, sinkConfig);
sh.addLast(sinkFactory);
// Set the sink for consumer storage
consumer.setSinkFactory(sinkFactory);
prepareHealthChecks();
// Initialize metrics collection, reporting, etc.
final MetricsManager manager = MetricsManager.getDefaultMetricsManager();
// Begin polling
consumer.poll();
} catch (ParseException e) {
LOG.error("Error parsing command line options", e);
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp(KafkaHBaseConsumer.class.getName(), options);
}
}
Aggregations