Search in sources :

Example 1 with MoveFileAction

use of org.apache.storm.hdfs.common.rotation.MoveFileAction in project storm by apache.

the class HdfsFileTopology method main.

public static void main(String[] args) throws Exception {
    Config config = new Config();
    config.setNumWorkers(1);
    SentenceSpout spout = new SentenceSpout();
    // sync the filesystem after every 1k tuples
    SyncPolicy syncPolicy = new CountSyncPolicy(1000);
    // rotate files when they reach 5MB
    FileRotationPolicy rotationPolicy = new TimedRotationPolicy(1.0f, TimedRotationPolicy.TimeUnit.MINUTES);
    FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath("/tmp/foo/").withExtension(".txt");
    // use "|" instead of "," for field delimiter
    RecordFormat format = new DelimitedRecordFormat().withFieldDelimiter("|");
    Yaml yaml = new Yaml();
    InputStream in = new FileInputStream(args[1]);
    Map<String, Object> yamlConf = (Map<String, Object>) yaml.load(in);
    in.close();
    config.put("hdfs.config", yamlConf);
    HdfsBolt bolt = new HdfsBolt().withConfigKey("hdfs.config").withFsUrl(args[0]).withFileNameFormat(fileNameFormat).withRecordFormat(format).withRotationPolicy(rotationPolicy).withSyncPolicy(syncPolicy).addRotationAction(new MoveFileAction().toDestination("/tmp/dest2/"));
    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout(SENTENCE_SPOUT_ID, spout, 1);
    // SentenceSpout --> MyBolt
    builder.setBolt(BOLT_ID, bolt, 4).shuffleGrouping(SENTENCE_SPOUT_ID);
    if (args.length == 2) {
        try (LocalCluster cluster = new LocalCluster();
            LocalTopology topo = cluster.submitTopology(TOPOLOGY_NAME, config, builder.createTopology())) {
            waitForSeconds(120);
        }
        System.exit(0);
    } else if (args.length == 3) {
        StormSubmitter.submitTopology(args[2], config, builder.createTopology());
    } else {
        System.out.println("Usage: HdfsFileTopology [hdfs url] [hdfs yaml config file] <topology name>");
    }
}
Also used : LocalCluster(org.apache.storm.LocalCluster) DelimitedRecordFormat(org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat) TopologyBuilder(org.apache.storm.topology.TopologyBuilder) Config(org.apache.storm.Config) RecordFormat(org.apache.storm.hdfs.bolt.format.RecordFormat) DelimitedRecordFormat(org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) TimedRotationPolicy(org.apache.storm.hdfs.bolt.rotation.TimedRotationPolicy) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) SyncPolicy(org.apache.storm.hdfs.bolt.sync.SyncPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) FileNameFormat(org.apache.storm.hdfs.bolt.format.FileNameFormat) FileRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) Yaml(org.yaml.snakeyaml.Yaml) FileInputStream(java.io.FileInputStream) LocalTopology(org.apache.storm.LocalCluster.LocalTopology) MoveFileAction(org.apache.storm.hdfs.common.rotation.MoveFileAction) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 2 with MoveFileAction

use of org.apache.storm.hdfs.common.rotation.MoveFileAction in project storm by apache.

the class SequenceFileTopology method main.

public static void main(String[] args) throws Exception {
    Config config = new Config();
    config.setNumWorkers(1);
    SentenceSpout spout = new SentenceSpout();
    // sync the filesystem after every 1k tuples
    SyncPolicy syncPolicy = new CountSyncPolicy(1000);
    // rotate files when they reach 5MB
    FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, Units.MB);
    FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath("/tmp/source/").withExtension(".seq");
    // create sequence format instance.
    DefaultSequenceFormat format = new DefaultSequenceFormat("timestamp", "sentence");
    Yaml yaml = new Yaml();
    InputStream in = new FileInputStream(args[1]);
    Map<String, Object> yamlConf = (Map<String, Object>) yaml.load(in);
    in.close();
    config.put("hdfs.config", yamlConf);
    SequenceFileBolt bolt = new SequenceFileBolt().withFsUrl(args[0]).withConfigKey("hdfs.config").withFileNameFormat(fileNameFormat).withSequenceFormat(format).withRotationPolicy(rotationPolicy).withSyncPolicy(syncPolicy).withCompressionType(SequenceFile.CompressionType.RECORD).withCompressionCodec("deflate").addRotationAction(new MoveFileAction().toDestination("/tmp/dest/"));
    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout(SENTENCE_SPOUT_ID, spout, 1);
    // SentenceSpout --> MyBolt
    builder.setBolt(BOLT_ID, bolt, 4).shuffleGrouping(SENTENCE_SPOUT_ID);
    if (args.length == 2) {
        try (LocalCluster cluster = new LocalCluster();
            LocalTopology topo = cluster.submitTopology(TOPOLOGY_NAME, config, builder.createTopology())) {
            waitForSeconds(120);
        }
        System.exit(0);
    } else if (args.length == 3) {
        StormSubmitter.submitTopology(args[2], config, builder.createTopology());
    } else {
        System.out.println("Usage: SequenceFileTopology [hdfs url] [hdfs yaml config file] <topology name>");
    }
}
Also used : LocalCluster(org.apache.storm.LocalCluster) TopologyBuilder(org.apache.storm.topology.TopologyBuilder) Config(org.apache.storm.Config) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) SyncPolicy(org.apache.storm.hdfs.bolt.sync.SyncPolicy) FileRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy) Yaml(org.yaml.snakeyaml.Yaml) FileInputStream(java.io.FileInputStream) LocalTopology(org.apache.storm.LocalCluster.LocalTopology) MoveFileAction(org.apache.storm.hdfs.common.rotation.MoveFileAction) FileSizeRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 3 with MoveFileAction

use of org.apache.storm.hdfs.common.rotation.MoveFileAction in project storm by apache.

the class TridentSequenceTopology method buildTopology.

public static StormTopology buildTopology(String hdfsUrl) {
    FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence", "key"), 1000, new Values("the cow jumped over the moon", 1l), new Values("the man went to the store and bought some candy", 2l), new Values("four score and seven years ago", 3l), new Values("how many apples can you eat", 4l), new Values("to be or not to be the person", 5l));
    spout.setCycle(true);
    TridentTopology topology = new TridentTopology();
    Stream stream = topology.newStream("spout1", spout);
    Fields hdfsFields = new Fields("sentence", "key");
    FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath("/tmp/trident").withPrefix("trident").withExtension(".seq");
    FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB);
    HdfsState.Options seqOpts = new HdfsState.SequenceFileOptions().withFileNameFormat(fileNameFormat).withSequenceFormat(new DefaultSequenceFormat("key", "sentence")).withRotationPolicy(rotationPolicy).withFsUrl(hdfsUrl).withConfigKey("hdfs.config").addRotationAction(new MoveFileAction().toDestination("/tmp/dest2/"));
    StateFactory factory = new HdfsStateFactory().withOptions(seqOpts);
    TridentState state = stream.partitionPersist(factory, hdfsFields, new HdfsUpdater(), new Fields());
    return topology.build();
}
Also used : TridentState(org.apache.storm.trident.TridentState) Values(org.apache.storm.tuple.Values) FileRotationPolicy(org.apache.storm.hdfs.trident.rotation.FileRotationPolicy) MoveFileAction(org.apache.storm.hdfs.common.rotation.MoveFileAction) Fields(org.apache.storm.tuple.Fields) StateFactory(org.apache.storm.trident.state.StateFactory) TridentTopology(org.apache.storm.trident.TridentTopology) FileInputStream(java.io.FileInputStream) Stream(org.apache.storm.trident.Stream) InputStream(java.io.InputStream) FileSizeRotationPolicy(org.apache.storm.hdfs.trident.rotation.FileSizeRotationPolicy)

Aggregations

FileInputStream (java.io.FileInputStream)3 InputStream (java.io.InputStream)3 MoveFileAction (org.apache.storm.hdfs.common.rotation.MoveFileAction)3 HashMap (java.util.HashMap)2 Map (java.util.Map)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 Config (org.apache.storm.Config)2 LocalCluster (org.apache.storm.LocalCluster)2 LocalTopology (org.apache.storm.LocalCluster.LocalTopology)2 FileRotationPolicy (org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy)2 CountSyncPolicy (org.apache.storm.hdfs.bolt.sync.CountSyncPolicy)2 SyncPolicy (org.apache.storm.hdfs.bolt.sync.SyncPolicy)2 TopologyBuilder (org.apache.storm.topology.TopologyBuilder)2 Yaml (org.yaml.snakeyaml.Yaml)2 DefaultFileNameFormat (org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat)1 DelimitedRecordFormat (org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat)1 FileNameFormat (org.apache.storm.hdfs.bolt.format.FileNameFormat)1 RecordFormat (org.apache.storm.hdfs.bolt.format.RecordFormat)1 FileSizeRotationPolicy (org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy)1 TimedRotationPolicy (org.apache.storm.hdfs.bolt.rotation.TimedRotationPolicy)1