Search in sources :

Example 11 with CountSyncPolicy

use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project storm by apache.

the class SequenceFileTopology method main.

public static void main(String[] args) throws Exception {
    Config config = new Config();
    config.setNumWorkers(1);
    SentenceSpout spout = new SentenceSpout();
    // sync the filesystem after every 1k tuples
    SyncPolicy syncPolicy = new CountSyncPolicy(1000);
    // rotate files when they reach 5MB
    FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, Units.MB);
    FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath("/tmp/source/").withExtension(".seq");
    // create sequence format instance.
    DefaultSequenceFormat format = new DefaultSequenceFormat("timestamp", "sentence");
    Yaml yaml = new Yaml();
    InputStream in = new FileInputStream(args[1]);
    Map<String, Object> yamlConf = (Map<String, Object>) yaml.load(in);
    in.close();
    config.put("hdfs.config", yamlConf);
    SequenceFileBolt bolt = new SequenceFileBolt().withFsUrl(args[0]).withConfigKey("hdfs.config").withFileNameFormat(fileNameFormat).withSequenceFormat(format).withRotationPolicy(rotationPolicy).withSyncPolicy(syncPolicy).withCompressionType(SequenceFile.CompressionType.RECORD).withCompressionCodec("deflate").addRotationAction(new MoveFileAction().toDestination("/tmp/dest/"));
    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout(SENTENCE_SPOUT_ID, spout, 1);
    // SentenceSpout --> MyBolt
    builder.setBolt(BOLT_ID, bolt, 4).shuffleGrouping(SENTENCE_SPOUT_ID);
    String topoName = TOPOLOGY_NAME;
    if (args.length == 3) {
        topoName = args[2];
    } else if (args.length > 3) {
        System.out.println("Usage: SequenceFileTopology [hdfs url] [hdfs yaml config file] <topology name>");
        return;
    }
    StormSubmitter.submitTopology(topoName, config, builder.createTopology());
}
Also used : TopologyBuilder(org.apache.storm.topology.TopologyBuilder) Config(org.apache.storm.Config) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) SyncPolicy(org.apache.storm.hdfs.bolt.sync.SyncPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) FileNameFormat(org.apache.storm.hdfs.bolt.format.FileNameFormat) FileRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) Yaml(org.yaml.snakeyaml.Yaml) FileInputStream(java.io.FileInputStream) DefaultSequenceFormat(org.apache.storm.hdfs.bolt.format.DefaultSequenceFormat) MoveFileAction(org.apache.storm.hdfs.common.rotation.MoveFileAction) FileSizeRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 12 with CountSyncPolicy

use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project storm by apache.

the class HdfsFileTopology method main.

public static void main(String[] args) throws Exception {
    Config config = new Config();
    config.setNumWorkers(1);
    SentenceSpout spout = new SentenceSpout();
    // sync the filesystem after every 1k tuples
    SyncPolicy syncPolicy = new CountSyncPolicy(1000);
    // rotate files when they reach 5MB
    FileRotationPolicy rotationPolicy = new TimedRotationPolicy(1.0f, TimedRotationPolicy.TimeUnit.MINUTES);
    FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath("/tmp/foo/").withExtension(".txt");
    // use "|" instead of "," for field delimiter
    RecordFormat format = new DelimitedRecordFormat().withFieldDelimiter("|");
    Yaml yaml = new Yaml();
    InputStream in = new FileInputStream(args[1]);
    Map<String, Object> yamlConf = (Map<String, Object>) yaml.load(in);
    in.close();
    config.put("hdfs.config", yamlConf);
    HdfsBolt bolt = new HdfsBolt().withConfigKey("hdfs.config").withFsUrl(args[0]).withFileNameFormat(fileNameFormat).withRecordFormat(format).withRotationPolicy(rotationPolicy).withSyncPolicy(syncPolicy).addRotationAction(new MoveFileAction().toDestination("/tmp/dest2/"));
    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout(SENTENCE_SPOUT_ID, spout, 1);
    // SentenceSpout --> MyBolt
    builder.setBolt(BOLT_ID, bolt, 4).shuffleGrouping(SENTENCE_SPOUT_ID);
    String topoName = TOPOLOGY_NAME;
    if (args.length == 3) {
        topoName = args[2];
    } else if (args.length > 3) {
        System.out.println("Usage: HdfsFileTopology [hdfs url] [hdfs yaml config file] <topology name>");
        return;
    }
    StormSubmitter.submitTopology(topoName, config, builder.createTopology());
}
Also used : DelimitedRecordFormat(org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat) TopologyBuilder(org.apache.storm.topology.TopologyBuilder) Config(org.apache.storm.Config) RecordFormat(org.apache.storm.hdfs.bolt.format.RecordFormat) DelimitedRecordFormat(org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) TimedRotationPolicy(org.apache.storm.hdfs.bolt.rotation.TimedRotationPolicy) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) SyncPolicy(org.apache.storm.hdfs.bolt.sync.SyncPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) FileNameFormat(org.apache.storm.hdfs.bolt.format.FileNameFormat) FileRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) Yaml(org.yaml.snakeyaml.Yaml) FileInputStream(java.io.FileInputStream) MoveFileAction(org.apache.storm.hdfs.common.rotation.MoveFileAction) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 13 with CountSyncPolicy

use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project storm by apache.

the class KafkaClientHdfsTopo method getTopology.

static StormTopology getTopology(Map<String, Object> config) {
    final int spoutNum = getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
    final int boltNum = getInt(config, BOLT_NUM, DEFAULT_BOLT_NUM);
    final int hdfsBatch = getInt(config, HDFS_BATCH, DEFAULT_HDFS_BATCH);
    // 1 -  Setup Kafka Spout   --------
    String bootstrapHosts = getStr(config, KAFKA_BOOTSTRAP_HOSTS);
    String topicName = getStr(config, KAFKA_TOPIC);
    KafkaSpoutConfig<String, String> spoutConfig = KafkaSpoutConfig.builder(bootstrapHosts, topicName).setFirstPollOffsetStrategy(FirstPollOffsetStrategy.EARLIEST).build();
    KafkaSpout<String, String> spout = new KafkaSpout<>(spoutConfig);
    // 2 -  Setup HFS Bolt   --------
    String hdfsUrls = getStr(config, HDFS_URI);
    RecordFormat format = new LineWriter("value");
    SyncPolicy syncPolicy = new CountSyncPolicy(hdfsBatch);
    FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(1.0f, FileSizeRotationPolicy.Units.GB);
    FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath(getStr(config, HDFS_PATH));
    // Instantiate the HdfsBolt
    HdfsBolt bolt = new HdfsBolt().withFsUrl(hdfsUrls).withFileNameFormat(fileNameFormat).withRecordFormat(format).withRotationPolicy(rotationPolicy).withSyncPolicy(syncPolicy);
    // 3 - Setup Topology  --------
    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout(SPOUT_ID, spout, spoutNum);
    builder.setBolt(BOLT_ID, bolt, boltNum).localOrShuffleGrouping(SPOUT_ID);
    return builder.createTopology();
}
Also used : TopologyBuilder(org.apache.storm.topology.TopologyBuilder) RecordFormat(org.apache.storm.hdfs.bolt.format.RecordFormat) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) SyncPolicy(org.apache.storm.hdfs.bolt.sync.SyncPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) FileNameFormat(org.apache.storm.hdfs.bolt.format.FileNameFormat) FileRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) HdfsBolt(org.apache.storm.hdfs.bolt.HdfsBolt) KafkaSpout(org.apache.storm.kafka.spout.KafkaSpout) FileSizeRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy)

Example 14 with CountSyncPolicy

use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project storm by apache.

the class StrGenSpoutHdfsBoltTopo method getTopology.

static StormTopology getTopology(Map<String, Object> topoConf) {
    final int hdfsBatch = Helper.getInt(topoConf, HDFS_BATCH, DEFAULT_HDFS_BATCH);
    // 1 -  Setup StringGen Spout   --------
    StringGenSpout spout = new StringGenSpout(100).withFieldName("str");
    // 2 -  Setup HFS Bolt   --------
    String hdfsUrl = Helper.getStr(topoConf, HDFS_URI);
    RecordFormat format = new LineWriter("str");
    SyncPolicy syncPolicy = new CountSyncPolicy(hdfsBatch);
    FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(1.0f, FileSizeRotationPolicy.Units.GB);
    final int spoutNum = Helper.getInt(topoConf, SPOUT_NUM, DEFAULT_SPOUT_NUM);
    final int boltNum = Helper.getInt(topoConf, BOLT_NUM, DEFAULT_BOLT_NUM);
    // Use default, Storm-generated file names
    FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath(Helper.getStr(topoConf, HDFS_PATH));
    // Instantiate the HdfsBolt
    HdfsBolt bolt = new HdfsBolt().withFsUrl(hdfsUrl).withFileNameFormat(fileNameFormat).withRecordFormat(format).withRotationPolicy(rotationPolicy).withSyncPolicy(syncPolicy);
    // 3 - Setup Topology  --------
    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout(SPOUT_ID, spout, spoutNum);
    builder.setBolt(BOLT_ID, bolt, boltNum).localOrShuffleGrouping(SPOUT_ID);
    return builder.createTopology();
}
Also used : TopologyBuilder(org.apache.storm.topology.TopologyBuilder) RecordFormat(org.apache.storm.hdfs.bolt.format.RecordFormat) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) SyncPolicy(org.apache.storm.hdfs.bolt.sync.SyncPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) FileNameFormat(org.apache.storm.hdfs.bolt.format.FileNameFormat) FileRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) HdfsBolt(org.apache.storm.hdfs.bolt.HdfsBolt) StringGenSpout(org.apache.storm.perf.spout.StringGenSpout) FileSizeRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy)

Example 15 with CountSyncPolicy

use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project storm by apache.

the class AvroGenericRecordBoltTest method makeAvroBolt.

private AvroGenericRecordBolt makeAvroBolt(String nameNodeAddr, int countSync, float rotationSizeMB, String schemaAsString) {
    SyncPolicy fieldsSyncPolicy = new CountSyncPolicy(countSync);
    FileNameFormat fieldsFileNameFormat = new DefaultFileNameFormat().withPath(testRoot);
    FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(rotationSizeMB, FileSizeRotationPolicy.Units.MB);
    return new AvroGenericRecordBolt().withFsUrl(nameNodeAddr).withFileNameFormat(fieldsFileNameFormat).withRotationPolicy(rotationPolicy).withSyncPolicy(fieldsSyncPolicy);
}
Also used : CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) SyncPolicy(org.apache.storm.hdfs.bolt.sync.SyncPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) FileNameFormat(org.apache.storm.hdfs.bolt.format.FileNameFormat) FileRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) FileSizeRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy)

Aggregations

CountSyncPolicy (org.apache.storm.hdfs.bolt.sync.CountSyncPolicy)15 SyncPolicy (org.apache.storm.hdfs.bolt.sync.SyncPolicy)10 DefaultFileNameFormat (org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat)9 FileNameFormat (org.apache.storm.hdfs.bolt.format.FileNameFormat)9 FileRotationPolicy (org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy)9 FileSizeRotationPolicy (org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy)9 RecordFormat (org.apache.storm.hdfs.bolt.format.RecordFormat)6 TopologyBuilder (org.apache.storm.topology.TopologyBuilder)6 HdfsBolt (org.apache.storm.hdfs.bolt.HdfsBolt)4 Test (org.junit.jupiter.api.Test)3 File (java.io.File)2 FileInputStream (java.io.FileInputStream)2 InputStream (java.io.InputStream)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 IndexingWriterConfiguration (org.apache.metron.common.configuration.writer.IndexingWriterConfiguration)2 WriterConfiguration (org.apache.metron.common.configuration.writer.WriterConfiguration)2 StellarProcessor (org.apache.metron.stellar.common.StellarProcessor)2 Config (org.apache.storm.Config)2