use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project storm by apache.
the class KafkaHdfsTopo method getTopology.
public static StormTopology getTopology(Map config) {
final int spoutNum = getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
final int boltNum = getInt(config, BOLT_NUM, DEFAULT_BOLT_NUM);
final int hdfsBatch = getInt(config, HDFS_BATCH, DEFAULT_HDFS_BATCH);
// 1 - Setup Kafka Spout --------
String zkConnString = getStr(config, ZOOKEEPER_URI);
String topicName = getStr(config, KAFKA_TOPIC);
BrokerHosts brokerHosts = new ZkHosts(zkConnString);
SpoutConfig spoutConfig = new SpoutConfig(brokerHosts, topicName, "/" + topicName, UUID.randomUUID().toString());
spoutConfig.scheme = new StringMultiSchemeWithTopic();
spoutConfig.ignoreZkOffsets = true;
KafkaSpout spout = new KafkaSpout(spoutConfig);
// 2 - Setup HFS Bolt --------
String Hdfs_url = getStr(config, HDFS_URI);
RecordFormat format = new LineWriter("str");
SyncPolicy syncPolicy = new CountSyncPolicy(hdfsBatch);
FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(1.0f, FileSizeRotationPolicy.Units.GB);
FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath(getStr(config, HDFS_PATH));
// Instantiate the HdfsBolt
HdfsBolt bolt = new HdfsBolt().withFsUrl(Hdfs_url).withFileNameFormat(fileNameFormat).withRecordFormat(format).withRotationPolicy(rotationPolicy).withSyncPolicy(syncPolicy);
// 3 - Setup Topology --------
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout(SPOUT_ID, spout, spoutNum);
builder.setBolt(BOLT_ID, bolt, boltNum).localOrShuffleGrouping(SPOUT_ID);
return builder.createTopology();
}
use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project storm by apache.
the class StrGenSpoutHdfsBoltTopo method getTopology.
public static StormTopology getTopology(Map topoConf) {
final int hdfsBatch = Helper.getInt(topoConf, HDFS_BATCH, DEFAULT_HDFS_BATCH);
// 1 - Setup StringGen Spout --------
StringGenSpout spout = new StringGenSpout(100).withFieldName("str");
// 2 - Setup HFS Bolt --------
String Hdfs_url = Helper.getStr(topoConf, HDFS_URI);
RecordFormat format = new LineWriter("str");
SyncPolicy syncPolicy = new CountSyncPolicy(hdfsBatch);
FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(1.0f, FileSizeRotationPolicy.Units.GB);
final int spoutNum = Helper.getInt(topoConf, SPOUT_NUM, DEFAULT_SPOUT_NUM);
final int boltNum = Helper.getInt(topoConf, BOLT_NUM, DEFAULT_BOLT_NUM);
// Use default, Storm-generated file names
FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath(Helper.getStr(topoConf, HDFS_PATH));
// Instantiate the HdfsBolt
HdfsBolt bolt = new HdfsBolt().withFsUrl(Hdfs_url).withFileNameFormat(fileNameFormat).withRecordFormat(format).withRotationPolicy(rotationPolicy).withSyncPolicy(syncPolicy);
// 3 - Setup Topology --------
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout(SPOUT_ID, spout, spoutNum);
builder.setBolt(BOLT_ID, bolt, boltNum).localOrShuffleGrouping(SPOUT_ID);
return builder.createTopology();
}
use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project metron by apache.
the class HdfsWriterTest method testHandleAttemptsRotateIfStreamClosed.
@Test
@SuppressWarnings("unchecked")
public void testHandleAttemptsRotateIfStreamClosed() throws Exception {
String function = "FORMAT('test-%s/%s', test.key, test.key)";
WriterConfiguration config = buildWriterConfiguration(function);
HdfsWriter writer = new HdfsWriter().withFileNameFormat(testFormat);
writer.init(new HashMap<String, String>(), createTopologyContext(), config);
JSONObject message = new JSONObject();
message.put("test.key", "test.value");
ArrayList<JSONObject> messages = new ArrayList<>();
messages.add(message);
ArrayList<Tuple> tuples = new ArrayList<>();
CountSyncPolicy basePolicy = new CountSyncPolicy(5);
ClonedSyncPolicyCreator creator = new ClonedSyncPolicyCreator(basePolicy);
writer.write(SENSOR_NAME, config, tuples, messages);
writer.getSourceHandler(SENSOR_NAME, "test-test.value/test.value", config).closeOutputFile();
writer.getSourceHandler(SENSOR_NAME, "test-test.value/test.value", config).handle(message, SENSOR_NAME, config, creator);
writer.close();
File outputFolder = new File(folder.getAbsolutePath() + "/test-test.value/test.value/");
// The message should show up twice, once in each file
ArrayList<String> expected = new ArrayList<>();
expected.add(message.toJSONString());
// Assert this went into a new file because it actually rotated
Assert.assertEquals(2, outputFolder.listFiles().length);
for (File file : outputFolder.listFiles()) {
List<String> lines = Files.readAllLines(file.toPath());
// One line per file
Assert.assertEquals(1, lines.size());
Assert.assertEquals(expected, lines);
}
}
use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project metron by apache.
the class HdfsWriterTest method testSingleFileIfNoStreamClosed.
@Test
@SuppressWarnings("unchecked")
public void testSingleFileIfNoStreamClosed() throws Exception {
String function = "FORMAT('test-%s/%s', test.key, test.key)";
WriterConfiguration config = buildWriterConfiguration(function);
HdfsWriter writer = new HdfsWriter().withFileNameFormat(testFormat);
writer.init(new HashMap<String, String>(), config);
writer.initFileNameFormat(createTopologyContext());
JSONObject message = new JSONObject();
message.put("test.key", "test.value");
List<BulkMessage<JSONObject>> messages = new ArrayList<BulkMessage<JSONObject>>() {
{
add(new BulkMessage("message1", message));
}
};
CountSyncPolicy basePolicy = new CountSyncPolicy(5);
ClonedSyncPolicyCreator creator = new ClonedSyncPolicyCreator(basePolicy);
writer.write(SENSOR_NAME, config, messages);
writer.write(SENSOR_NAME, config, messages);
writer.close();
File outputFolder = new File(folder.getAbsolutePath() + "/test-test.value/test.value/");
// The message should show up twice, once in each file
ArrayList<String> expected = new ArrayList<>();
expected.add(message.toJSONString());
expected.add(message.toJSONString());
// Assert both messages are in the same file, because the stream stayed open
assertEquals(1, outputFolder.listFiles().length);
for (File file : outputFolder.listFiles()) {
List<String> lines = Files.readAllLines(file.toPath());
// One line per file
assertEquals(2, lines.size());
assertEquals(expected, lines);
}
}
use of org.apache.storm.hdfs.bolt.sync.CountSyncPolicy in project metron by apache.
the class HdfsWriter method init.
@Override
public void init(Map stormConfig, WriterConfiguration configurations) {
this.stormConfig = stormConfig;
this.stellarProcessor = new StellarProcessor();
if (syncPolicy != null) {
// if the user has specified the sync policy, we don't want to override their wishes.
LOG.debug("Using user specified sync policy {}", () -> syncPolicy.getClass().getSimpleName());
syncPolicyCreator = new ClonedSyncPolicyCreator(syncPolicy);
} else {
// if the user has not, then we want to have the sync policy depend on the batch size.
LOG.debug("No user specified sync policy, using CountSyncPolicy based on batch size");
syncPolicyCreator = (source, config) -> new CountSyncPolicy(config == null ? 1 : config.getBatchSize(source));
}
}
Aggregations