use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project storm by apache.
the class SequenceFileTopology method main.
public static void main(String[] args) throws Exception {
Config config = new Config();
config.setNumWorkers(1);
SentenceSpout spout = new SentenceSpout();
// sync the filesystem after every 1k tuples
SyncPolicy syncPolicy = new CountSyncPolicy(1000);
// rotate files when they reach 5MB
FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, Units.MB);
FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath("/tmp/source/").withExtension(".seq");
// create sequence format instance.
DefaultSequenceFormat format = new DefaultSequenceFormat("timestamp", "sentence");
Yaml yaml = new Yaml();
InputStream in = new FileInputStream(args[1]);
Map<String, Object> yamlConf = (Map<String, Object>) yaml.load(in);
in.close();
config.put("hdfs.config", yamlConf);
SequenceFileBolt bolt = new SequenceFileBolt().withFsUrl(args[0]).withConfigKey("hdfs.config").withFileNameFormat(fileNameFormat).withSequenceFormat(format).withRotationPolicy(rotationPolicy).withSyncPolicy(syncPolicy).withCompressionType(SequenceFile.CompressionType.RECORD).withCompressionCodec("deflate").addRotationAction(new MoveFileAction().toDestination("/tmp/dest/"));
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout(SENTENCE_SPOUT_ID, spout, 1);
// SentenceSpout --> MyBolt
builder.setBolt(BOLT_ID, bolt, 4).shuffleGrouping(SENTENCE_SPOUT_ID);
String topoName = TOPOLOGY_NAME;
if (args.length == 3) {
topoName = args[2];
} else if (args.length > 3) {
System.out.println("Usage: SequenceFileTopology [hdfs url] [hdfs yaml config file] <topology name>");
return;
}
StormSubmitter.submitTopology(topoName, config, builder.createTopology());
}
use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project storm by apache.
the class HdfsFileTopology method main.
public static void main(String[] args) throws Exception {
Config config = new Config();
config.setNumWorkers(1);
SentenceSpout spout = new SentenceSpout();
// sync the filesystem after every 1k tuples
SyncPolicy syncPolicy = new CountSyncPolicy(1000);
// rotate files when they reach 5MB
FileRotationPolicy rotationPolicy = new TimedRotationPolicy(1.0f, TimedRotationPolicy.TimeUnit.MINUTES);
FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath("/tmp/foo/").withExtension(".txt");
// use "|" instead of "," for field delimiter
RecordFormat format = new DelimitedRecordFormat().withFieldDelimiter("|");
Yaml yaml = new Yaml();
InputStream in = new FileInputStream(args[1]);
Map<String, Object> yamlConf = (Map<String, Object>) yaml.load(in);
in.close();
config.put("hdfs.config", yamlConf);
HdfsBolt bolt = new HdfsBolt().withConfigKey("hdfs.config").withFsUrl(args[0]).withFileNameFormat(fileNameFormat).withRecordFormat(format).withRotationPolicy(rotationPolicy).withSyncPolicy(syncPolicy).addRotationAction(new MoveFileAction().toDestination("/tmp/dest2/"));
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout(SENTENCE_SPOUT_ID, spout, 1);
// SentenceSpout --> MyBolt
builder.setBolt(BOLT_ID, bolt, 4).shuffleGrouping(SENTENCE_SPOUT_ID);
String topoName = TOPOLOGY_NAME;
if (args.length == 3) {
topoName = args[2];
} else if (args.length > 3) {
System.out.println("Usage: HdfsFileTopology [hdfs url] [hdfs yaml config file] <topology name>");
return;
}
StormSubmitter.submitTopology(topoName, config, builder.createTopology());
}
use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project storm by apache.
the class KafkaClientHdfsTopo method getTopology.
static StormTopology getTopology(Map<String, Object> config) {
final int spoutNum = getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
final int boltNum = getInt(config, BOLT_NUM, DEFAULT_BOLT_NUM);
final int hdfsBatch = getInt(config, HDFS_BATCH, DEFAULT_HDFS_BATCH);
// 1 - Setup Kafka Spout --------
String bootstrapHosts = getStr(config, KAFKA_BOOTSTRAP_HOSTS);
String topicName = getStr(config, KAFKA_TOPIC);
KafkaSpoutConfig<String, String> spoutConfig = KafkaSpoutConfig.builder(bootstrapHosts, topicName).setFirstPollOffsetStrategy(FirstPollOffsetStrategy.EARLIEST).build();
KafkaSpout<String, String> spout = new KafkaSpout<>(spoutConfig);
// 2 - Setup HFS Bolt --------
String hdfsUrls = getStr(config, HDFS_URI);
RecordFormat format = new LineWriter("value");
SyncPolicy syncPolicy = new CountSyncPolicy(hdfsBatch);
FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(1.0f, FileSizeRotationPolicy.Units.GB);
FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath(getStr(config, HDFS_PATH));
// Instantiate the HdfsBolt
HdfsBolt bolt = new HdfsBolt().withFsUrl(hdfsUrls).withFileNameFormat(fileNameFormat).withRecordFormat(format).withRotationPolicy(rotationPolicy).withSyncPolicy(syncPolicy);
// 3 - Setup Topology --------
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout(SPOUT_ID, spout, spoutNum);
builder.setBolt(BOLT_ID, bolt, boltNum).localOrShuffleGrouping(SPOUT_ID);
return builder.createTopology();
}
use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project storm by apache.
the class StrGenSpoutHdfsBoltTopo method getTopology.
static StormTopology getTopology(Map<String, Object> topoConf) {
final int hdfsBatch = Helper.getInt(topoConf, HDFS_BATCH, DEFAULT_HDFS_BATCH);
// 1 - Setup StringGen Spout --------
StringGenSpout spout = new StringGenSpout(100).withFieldName("str");
// 2 - Setup HFS Bolt --------
String hdfsUrl = Helper.getStr(topoConf, HDFS_URI);
RecordFormat format = new LineWriter("str");
SyncPolicy syncPolicy = new CountSyncPolicy(hdfsBatch);
FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(1.0f, FileSizeRotationPolicy.Units.GB);
final int spoutNum = Helper.getInt(topoConf, SPOUT_NUM, DEFAULT_SPOUT_NUM);
final int boltNum = Helper.getInt(topoConf, BOLT_NUM, DEFAULT_BOLT_NUM);
// Use default, Storm-generated file names
FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath(Helper.getStr(topoConf, HDFS_PATH));
// Instantiate the HdfsBolt
HdfsBolt bolt = new HdfsBolt().withFsUrl(hdfsUrl).withFileNameFormat(fileNameFormat).withRecordFormat(format).withRotationPolicy(rotationPolicy).withSyncPolicy(syncPolicy);
// 3 - Setup Topology --------
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout(SPOUT_ID, spout, spoutNum);
builder.setBolt(BOLT_ID, bolt, boltNum).localOrShuffleGrouping(SPOUT_ID);
return builder.createTopology();
}
use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project storm by apache.
the class AvroGenericRecordBoltTest method makeAvroBolt.
private AvroGenericRecordBolt makeAvroBolt(String nameNodeAddr, int countSync, float rotationSizeMB, String schemaAsString) {
SyncPolicy fieldsSyncPolicy = new CountSyncPolicy(countSync);
FileNameFormat fieldsFileNameFormat = new DefaultFileNameFormat().withPath(testRoot);
FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(rotationSizeMB, FileSizeRotationPolicy.Units.MB);
return new AvroGenericRecordBolt().withFsUrl(nameNodeAddr).withFileNameFormat(fieldsFileNameFormat).withRotationPolicy(rotationPolicy).withSyncPolicy(fieldsSyncPolicy);
}
Aggregations