Search in sources :

Example 6 with CountSyncPolicy

use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project storm by apache.

the class KafkaHdfsTopo method getTopology.

public static StormTopology getTopology(Map config) {
    final int spoutNum = getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
    final int boltNum = getInt(config, BOLT_NUM, DEFAULT_BOLT_NUM);
    final int hdfsBatch = getInt(config, HDFS_BATCH, DEFAULT_HDFS_BATCH);
    // 1 -  Setup Kafka Spout   --------
    String zkConnString = getStr(config, ZOOKEEPER_URI);
    String topicName = getStr(config, KAFKA_TOPIC);
    BrokerHosts brokerHosts = new ZkHosts(zkConnString);
    SpoutConfig spoutConfig = new SpoutConfig(brokerHosts, topicName, "/" + topicName, UUID.randomUUID().toString());
    spoutConfig.scheme = new StringMultiSchemeWithTopic();
    spoutConfig.ignoreZkOffsets = true;
    KafkaSpout spout = new KafkaSpout(spoutConfig);
    // 2 -  Setup HFS Bolt   --------
    String Hdfs_url = getStr(config, HDFS_URI);
    RecordFormat format = new LineWriter("str");
    SyncPolicy syncPolicy = new CountSyncPolicy(hdfsBatch);
    FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(1.0f, FileSizeRotationPolicy.Units.GB);
    FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath(getStr(config, HDFS_PATH));
    // Instantiate the HdfsBolt
    HdfsBolt bolt = new HdfsBolt().withFsUrl(Hdfs_url).withFileNameFormat(fileNameFormat).withRecordFormat(format).withRotationPolicy(rotationPolicy).withSyncPolicy(syncPolicy);
    // 3 - Setup Topology  --------
    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout(SPOUT_ID, spout, spoutNum);
    builder.setBolt(BOLT_ID, bolt, boltNum).localOrShuffleGrouping(SPOUT_ID);
    return builder.createTopology();
}
Also used : TopologyBuilder(org.apache.storm.topology.TopologyBuilder) RecordFormat(org.apache.storm.hdfs.bolt.format.RecordFormat) SpoutConfig(org.apache.storm.kafka.SpoutConfig) ZkHosts(org.apache.storm.kafka.ZkHosts) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) SyncPolicy(org.apache.storm.hdfs.bolt.sync.SyncPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) FileNameFormat(org.apache.storm.hdfs.bolt.format.FileNameFormat) StringMultiSchemeWithTopic(org.apache.storm.kafka.StringMultiSchemeWithTopic) FileRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) BrokerHosts(org.apache.storm.kafka.BrokerHosts) HdfsBolt(org.apache.storm.hdfs.bolt.HdfsBolt) KafkaSpout(org.apache.storm.kafka.KafkaSpout) FileSizeRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy)

Example 7 with CountSyncPolicy

use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project storm by apache.

the class StrGenSpoutHdfsBoltTopo method getTopology.

public static StormTopology getTopology(Map topoConf) {
    final int hdfsBatch = Helper.getInt(topoConf, HDFS_BATCH, DEFAULT_HDFS_BATCH);
    // 1 -  Setup StringGen Spout   --------
    StringGenSpout spout = new StringGenSpout(100).withFieldName("str");
    // 2 -  Setup HFS Bolt   --------
    String Hdfs_url = Helper.getStr(topoConf, HDFS_URI);
    RecordFormat format = new LineWriter("str");
    SyncPolicy syncPolicy = new CountSyncPolicy(hdfsBatch);
    FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(1.0f, FileSizeRotationPolicy.Units.GB);
    final int spoutNum = Helper.getInt(topoConf, SPOUT_NUM, DEFAULT_SPOUT_NUM);
    final int boltNum = Helper.getInt(topoConf, BOLT_NUM, DEFAULT_BOLT_NUM);
    // Use default, Storm-generated file names
    FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath(Helper.getStr(topoConf, HDFS_PATH));
    // Instantiate the HdfsBolt
    HdfsBolt bolt = new HdfsBolt().withFsUrl(Hdfs_url).withFileNameFormat(fileNameFormat).withRecordFormat(format).withRotationPolicy(rotationPolicy).withSyncPolicy(syncPolicy);
    // 3 - Setup Topology  --------
    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout(SPOUT_ID, spout, spoutNum);
    builder.setBolt(BOLT_ID, bolt, boltNum).localOrShuffleGrouping(SPOUT_ID);
    return builder.createTopology();
}
Also used : TopologyBuilder(org.apache.storm.topology.TopologyBuilder) RecordFormat(org.apache.storm.hdfs.bolt.format.RecordFormat) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) SyncPolicy(org.apache.storm.hdfs.bolt.sync.SyncPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) FileNameFormat(org.apache.storm.hdfs.bolt.format.FileNameFormat) FileRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy) DefaultFileNameFormat(org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat) HdfsBolt(org.apache.storm.hdfs.bolt.HdfsBolt) StringGenSpout(org.apache.storm.perf.spout.StringGenSpout) FileSizeRotationPolicy(org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy)

Example 8 with CountSyncPolicy

use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project metron by apache.

the class HdfsWriterTest method testHandleAttemptsRotateIfStreamClosed.

@Test
@SuppressWarnings("unchecked")
public void testHandleAttemptsRotateIfStreamClosed() throws Exception {
    String function = "FORMAT('test-%s/%s', test.key, test.key)";
    WriterConfiguration config = buildWriterConfiguration(function);
    HdfsWriter writer = new HdfsWriter().withFileNameFormat(testFormat);
    writer.init(new HashMap<String, String>(), createTopologyContext(), config);
    JSONObject message = new JSONObject();
    message.put("test.key", "test.value");
    ArrayList<JSONObject> messages = new ArrayList<>();
    messages.add(message);
    ArrayList<Tuple> tuples = new ArrayList<>();
    CountSyncPolicy basePolicy = new CountSyncPolicy(5);
    ClonedSyncPolicyCreator creator = new ClonedSyncPolicyCreator(basePolicy);
    writer.write(SENSOR_NAME, config, tuples, messages);
    writer.getSourceHandler(SENSOR_NAME, "test-test.value/test.value", config).closeOutputFile();
    writer.getSourceHandler(SENSOR_NAME, "test-test.value/test.value", config).handle(message, SENSOR_NAME, config, creator);
    writer.close();
    File outputFolder = new File(folder.getAbsolutePath() + "/test-test.value/test.value/");
    // The message should show up twice, once in each file
    ArrayList<String> expected = new ArrayList<>();
    expected.add(message.toJSONString());
    // Assert this went into a new file because it actually rotated
    Assert.assertEquals(2, outputFolder.listFiles().length);
    for (File file : outputFolder.listFiles()) {
        List<String> lines = Files.readAllLines(file.toPath());
        // One line per file
        Assert.assertEquals(1, lines.size());
        Assert.assertEquals(expected, lines);
    }
}
Also used : ArrayList(java.util.ArrayList) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) WriterConfiguration(org.apache.metron.common.configuration.writer.WriterConfiguration) IndexingWriterConfiguration(org.apache.metron.common.configuration.writer.IndexingWriterConfiguration) JSONObject(org.json.simple.JSONObject) File(java.io.File) Tuple(org.apache.storm.tuple.Tuple) Test(org.junit.Test)

Example 9 with CountSyncPolicy

use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project metron by apache.

the class HdfsWriterTest method testSingleFileIfNoStreamClosed.

@Test
@SuppressWarnings("unchecked")
public void testSingleFileIfNoStreamClosed() throws Exception {
    String function = "FORMAT('test-%s/%s', test.key, test.key)";
    WriterConfiguration config = buildWriterConfiguration(function);
    HdfsWriter writer = new HdfsWriter().withFileNameFormat(testFormat);
    writer.init(new HashMap<String, String>(), config);
    writer.initFileNameFormat(createTopologyContext());
    JSONObject message = new JSONObject();
    message.put("test.key", "test.value");
    List<BulkMessage<JSONObject>> messages = new ArrayList<BulkMessage<JSONObject>>() {

        {
            add(new BulkMessage("message1", message));
        }
    };
    CountSyncPolicy basePolicy = new CountSyncPolicy(5);
    ClonedSyncPolicyCreator creator = new ClonedSyncPolicyCreator(basePolicy);
    writer.write(SENSOR_NAME, config, messages);
    writer.write(SENSOR_NAME, config, messages);
    writer.close();
    File outputFolder = new File(folder.getAbsolutePath() + "/test-test.value/test.value/");
    // The message should show up twice, once in each file
    ArrayList<String> expected = new ArrayList<>();
    expected.add(message.toJSONString());
    expected.add(message.toJSONString());
    // Assert both messages are in the same file, because the stream stayed open
    assertEquals(1, outputFolder.listFiles().length);
    for (File file : outputFolder.listFiles()) {
        List<String> lines = Files.readAllLines(file.toPath());
        // One line per file
        assertEquals(2, lines.size());
        assertEquals(expected, lines);
    }
}
Also used : CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy) WriterConfiguration(org.apache.metron.common.configuration.writer.WriterConfiguration) IndexingWriterConfiguration(org.apache.metron.common.configuration.writer.IndexingWriterConfiguration) BulkMessage(org.apache.metron.common.writer.BulkMessage) JSONObject(org.json.simple.JSONObject) File(java.io.File) Test(org.junit.jupiter.api.Test)

Example 10 with CountSyncPolicy

use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project metron by apache.

the class HdfsWriter method init.

@Override
public void init(Map stormConfig, WriterConfiguration configurations) {
    this.stormConfig = stormConfig;
    this.stellarProcessor = new StellarProcessor();
    if (syncPolicy != null) {
        // if the user has specified the sync policy, we don't want to override their wishes.
        LOG.debug("Using user specified sync policy {}", () -> syncPolicy.getClass().getSimpleName());
        syncPolicyCreator = new ClonedSyncPolicyCreator(syncPolicy);
    } else {
        // if the user has not, then we want to have the sync policy depend on the batch size.
        LOG.debug("No user specified sync policy, using CountSyncPolicy based on batch size");
        syncPolicyCreator = (source, config) -> new CountSyncPolicy(config == null ? 1 : config.getBatchSize(source));
    }
}
Also used : StellarProcessor(org.apache.metron.stellar.common.StellarProcessor) CountSyncPolicy(org.apache.storm.hdfs.bolt.sync.CountSyncPolicy)

Aggregations

CountSyncPolicy (org.apache.storm.hdfs.bolt.sync.CountSyncPolicy)15 SyncPolicy (org.apache.storm.hdfs.bolt.sync.SyncPolicy)10 DefaultFileNameFormat (org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat)9 FileNameFormat (org.apache.storm.hdfs.bolt.format.FileNameFormat)9 FileRotationPolicy (org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy)9 FileSizeRotationPolicy (org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy)9 RecordFormat (org.apache.storm.hdfs.bolt.format.RecordFormat)6 TopologyBuilder (org.apache.storm.topology.TopologyBuilder)6 HdfsBolt (org.apache.storm.hdfs.bolt.HdfsBolt)4 Test (org.junit.jupiter.api.Test)3 File (java.io.File)2 FileInputStream (java.io.FileInputStream)2 InputStream (java.io.InputStream)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 IndexingWriterConfiguration (org.apache.metron.common.configuration.writer.IndexingWriterConfiguration)2 WriterConfiguration (org.apache.metron.common.configuration.writer.WriterConfiguration)2 StellarProcessor (org.apache.metron.stellar.common.StellarProcessor)2 Config (org.apache.storm.Config)2