Search in sources :

Example 1 with DelimitedRecordFormat

use of org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat in project storm by apache.

the class TestHdfsBolt method makeHdfsBolt.

private HdfsBolt makeHdfsBolt(String nameNodeAddr, int countSync, float rotationSizeMB) {
    RecordFormat fieldsFormat = new DelimitedRecordFormat().withFieldDelimiter("|");
    SyncPolicy fieldsSyncPolicy = new CountSyncPolicy(countSync);
    FileRotationPolicy fieldsRotationPolicy = new FileSizeRotationPolicy(rotationSizeMB, FileSizeRotationPolicy.Units.MB);
    FileNameFormat fieldsFileNameFormat = new DefaultFileNameFormat().withPath(testRoot);
    return new HdfsBolt().withFsUrl(nameNodeAddr).withFileNameFormat(fieldsFileNameFormat).withRecordFormat(fieldsFormat).withRotationPolicy(fieldsRotationPolicy).withSyncPolicy(fieldsSyncPolicy);
}
Also used : DelimitedRecordFormat(org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat) RecordFormat(org.apache.storm.hdfs.bolt.format.RecordFormat) DelimitedRecordFormat(org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) SyncPolicy(org.apache.storm.hdfs.bolt.sync.SyncPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) FileNameFormat(org.apache.storm.hdfs.bolt.format.FileNameFormat) FileRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy) FileSizeRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat)

Example 2 with DelimitedRecordFormat

use of org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat in project storm by apache.

the class HdfsFileTopology method main.

public static void main(String[] args) throws Exception {
    Config config = new Config();
    config.setNumWorkers(1);
    SentenceSpout spout = new SentenceSpout();
    // sync the filesystem after every 1k tuples
    SyncPolicy syncPolicy = new CountSyncPolicy(1000);
    // rotate files when they reach 5MB
    FileRotationPolicy rotationPolicy = new TimedRotationPolicy(1.0f, TimedRotationPolicy.TimeUnit.MINUTES);
    FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath("/tmp/foo/").withExtension(".txt");
    // use "|" instead of "," for field delimiter
    RecordFormat format = new DelimitedRecordFormat().withFieldDelimiter("|");
    Yaml yaml = new Yaml();
    InputStream in = new FileInputStream(args[1]);
    Map<String, Object> yamlConf = (Map<String, Object>) yaml.load(in);
    in.close();
    config.put("hdfs.config", yamlConf);
    HdfsBolt bolt = new HdfsBolt().withConfigKey("hdfs.config").withFsUrl(args[0]).withFileNameFormat(fileNameFormat).withRecordFormat(format).withRotationPolicy(rotationPolicy).withSyncPolicy(syncPolicy).addRotationAction(new MoveFileAction().toDestination("/tmp/dest2/"));
    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout(SENTENCE_SPOUT_ID, spout, 1);
    // SentenceSpout --> MyBolt
    builder.setBolt(BOLT_ID, bolt, 4).shuffleGrouping(SENTENCE_SPOUT_ID);
    if (args.length == 2) {
        try (LocalCluster cluster = new LocalCluster();
            LocalTopology topo = cluster.submitTopology(TOPOLOGY_NAME, config, builder.createTopology())) {
            waitForSeconds(120);
        }
        System.exit(0);
    } else if (args.length == 3) {
        StormSubmitter.submitTopology(args[2], config, builder.createTopology());
    } else {
        System.out.println("Usage: HdfsFileTopology [hdfs url] [hdfs yaml config file] <topology name>");
    }
}
Also used : LocalCluster(org.apache.storm.LocalCluster) DelimitedRecordFormat(org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat) TopologyBuilder(org.apache.storm.topology.TopologyBuilder) Config(org.apache.storm.Config) RecordFormat(org.apache.storm.hdfs.bolt.format.RecordFormat) DelimitedRecordFormat(org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) TimedRotationPolicy(org.apache.storm.hdfs.bolt.rotation.TimedRotationPolicy) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) SyncPolicy(org.apache.storm.hdfs.bolt.sync.SyncPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) FileNameFormat(org.apache.storm.hdfs.bolt.format.FileNameFormat) FileRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) Yaml(org.yaml.snakeyaml.Yaml) FileInputStream(java.io.FileInputStream) LocalTopology(org.apache.storm.LocalCluster.LocalTopology) MoveFileAction(org.apache.storm.hdfs.common.rotation.MoveFileAction) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Aggregations

DefaultFileNameFormat (org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat)2 DelimitedRecordFormat (org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat)2 FileNameFormat (org.apache.storm.hdfs.bolt.format.FileNameFormat)2 RecordFormat (org.apache.storm.hdfs.bolt.format.RecordFormat)2 FileRotationPolicy (org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy)2 CountSyncPolicy (org.apache.storm.hdfs.bolt.sync.CountSyncPolicy)2 SyncPolicy (org.apache.storm.hdfs.bolt.sync.SyncPolicy)2 FileInputStream (java.io.FileInputStream)1 InputStream (java.io.InputStream)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 Config (org.apache.storm.Config)1 LocalCluster (org.apache.storm.LocalCluster)1 LocalTopology (org.apache.storm.LocalCluster.LocalTopology)1 FileSizeRotationPolicy (org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy)1 TimedRotationPolicy (org.apache.storm.hdfs.bolt.rotation.TimedRotationPolicy)1 MoveFileAction (org.apache.storm.hdfs.common.rotation.MoveFileAction)1 TopologyBuilder (org.apache.storm.topology.TopologyBuilder)1 Yaml (org.yaml.snakeyaml.Yaml)1