use of org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper in project storm by apache.
the class TestHiveBolt method testWithoutPartitions.
@Test
public void testWithoutPartitions() throws Exception {
DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames));
HiveOptions hiveOptions = new HiveOptions(metaStoreURI, dbName1, tblName1, mapper).withTxnsPerBatch(2).withBatchSize(2).withAutoCreatePartitions(false);
bolt = new TestingHiveBolt(hiveOptions);
bolt.prepare(config, null, collector);
Integer id = 100;
String msg = "test-123";
String city = "sunnyvale";
String state = "ca";
Set<Tuple> tupleSet = new HashSet<Tuple>();
for (int i = 0; i < 4; i++) {
Tuple tuple = generateTestTuple(id, msg, city, state);
bolt.execute(tuple);
tupleSet.add(tuple);
}
List<String> partVals = Collections.emptyList();
for (Tuple t : tupleSet) {
verify(collector).ack(t);
}
List<byte[]> recordWritten = bolt.getRecordWritten(partVals);
Assert.assertNotNull(recordWritten);
Assert.assertEquals(4, recordWritten.size());
bolt.cleanup();
}
use of org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper in project storm by apache.
the class TridentHiveTopology method buildTopology.
public static StormTopology buildTopology(String metaStoreUri, String dbName, String tblName, Object keytab, Object principal) {
int batchSize = 100;
FixedBatchSpout spout = new FixedBatchSpout(batchSize);
spout.setCycle(true);
TridentTopology topology = new TridentTopology();
Stream stream = topology.newStream("hiveTridentspout1", spout);
String[] partNames = { "city", "state" };
String[] colNames = { "id", "name", "phone", "street" };
Fields hiveFields = new Fields("id", "name", "phone", "street", "city", "state");
DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withPartitionFields(new Fields(partNames));
HiveOptions hiveOptions;
if (keytab != null && principal != null) {
hiveOptions = new HiveOptions(metaStoreUri, dbName, tblName, mapper).withTxnsPerBatch(10).withBatchSize(batchSize).withIdleTimeout(10).withCallTimeout(30000).withKerberosKeytab((String) keytab).withKerberosPrincipal((String) principal);
} else {
hiveOptions = new HiveOptions(metaStoreUri, dbName, tblName, mapper).withTxnsPerBatch(10).withBatchSize(batchSize).withCallTimeout(30000).withIdleTimeout(10);
}
StateFactory factory = new HiveStateFactory().withOptions(hiveOptions);
TridentState state = stream.partitionPersist(factory, hiveFields, new HiveUpdater(), new Fields());
return topology.build();
}
use of org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper in project storm by apache.
the class BucketTestHiveTopology method main.
public static void main(String[] args) throws Exception {
if ((args == null) || (args.length < 7)) {
System.out.println("Usage: BucketTestHiveTopology metastoreURI " + "dbName tableName dataFileLocation hiveBatchSize " + "hiveTickTupl]eIntervalSecs workers [topologyNamey] [keytab file]" + " [principal name] ");
System.exit(1);
}
String metaStoreUri = args[0];
String dbName = args[1];
String tblName = args[2];
Integer hiveBatchSize = Integer.parseInt(args[4]);
Integer hiveTickTupleIntervalSecs = Integer.parseInt(args[5]);
Integer workers = Integer.parseInt(args[6]);
String[] colNames = { "ss_sold_date_sk", "ss_sold_time_sk", "ss_item_sk", "ss_customer_sk", "ss_cdemo_sk", "ss_hdemo_sk", "ss_addr_sk", "ss_store_sk", "ss_promo_sk", "ss_ticket_number", "ss_quantity", "ss_wholesale_cost", "ss_list_price", "ss_sales_price", "ss_ext_discount_amt", "ss_ext_sales_price", "ss_ext_wholesale_cost", "ss_ext_list_price", "ss_ext_tax", "ss_coupon_amt", "ss_net_paid", "ss_net_paid_inc_tax", "ss_net_profit" };
Config config = new Config();
config.setNumWorkers(workers);
DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withTimeAsPartitionField("yyyy/MM/dd");
HiveOptions hiveOptions;
hiveOptions = new HiveOptions(metaStoreUri, dbName, tblName, mapper).withTxnsPerBatch(10).withBatchSize(hiveBatchSize);
// had to make tick tuple a mandatory argument since its positional
if (hiveTickTupleIntervalSecs > 0) {
hiveOptions.withTickTupleInterval(hiveTickTupleIntervalSecs);
}
if (args.length == 10) {
hiveOptions.withKerberosKeytab(args[8]).withKerberosPrincipal(args[9]);
}
HiveBolt hiveBolt = new HiveBolt(hiveOptions);
TopologyBuilder builder = new TopologyBuilder();
String sourceFileLocation = args[3];
UserDataSpout spout = new UserDataSpout().withDataFile(sourceFileLocation);
builder.setSpout(USER_SPOUT_ID, spout, 1);
// SentenceSpout --> MyBolt
builder.setBolt(BOLT_ID, hiveBolt, 14).shuffleGrouping(USER_SPOUT_ID);
String topoName = TOPOLOGY_NAME;
if (args.length > 6) {
topoName = args[7];
}
StormSubmitter.submitTopology(args[7], config, builder.createTopology());
}
use of org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper in project storm by apache.
the class TestHiveBolt method testWithTimeformat.
@Test
public void testWithTimeformat() throws Exception {
String timeFormat = "yyyy/MM/dd";
DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withTimeAsPartitionField(timeFormat);
HiveOptions hiveOptions = new HiveOptions(metaStoreURI, dbName1, tblName1, mapper).withTxnsPerBatch(2).withBatchSize(1).withMaxOpenConnections(1);
bolt = new TestingHiveBolt(hiveOptions);
bolt.prepare(config, null, collector);
Integer id = 100;
String msg = "test-123";
Date d = new Date();
SimpleDateFormat parseDate = new SimpleDateFormat(timeFormat);
String today = parseDate.format(d.getTime());
List<Tuple> tuples = new ArrayList<>();
for (int i = 0; i < 2; i++) {
Tuple tuple = generateTestTuple(id, msg, null, null);
tuples.add(tuple);
bolt.execute(tuple);
}
for (Tuple t : tuples) {
verify(collector).ack(t);
}
List<String> partVals = Lists.newArrayList(today);
List<byte[]> recordsWritten = bolt.getRecordWritten(partVals);
Assert.assertNotNull(recordsWritten);
Assert.assertEquals(2, recordsWritten.size());
byte[] mapped = generateDelimiteredRecord(Lists.newArrayList(id, msg), mapper.getFieldDelimiter());
for (byte[] record : recordsWritten) {
Assert.assertArrayEquals(mapped, record);
}
bolt.cleanup();
}
use of org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper in project storm by apache.
the class TestHiveBolt method testData.
@Test
public void testData() throws Exception {
DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withPartitionFields(new Fields(partNames));
HiveOptions hiveOptions = new HiveOptions(metaStoreURI, dbName, tblName, mapper).withTxnsPerBatch(2).withBatchSize(1);
bolt = new TestingHiveBolt(hiveOptions);
bolt.prepare(config, null, new OutputCollector(collector));
Integer id = 1;
String msg = "SJC";
String city = "Sunnyvale";
String state = "CA";
Tuple tuple1 = generateTestTuple(id, msg, city, state);
bolt.execute(tuple1);
verify(collector).ack(tuple1);
List<String> partVals = Lists.newArrayList(city, state);
List<byte[]> recordsWritten = bolt.getRecordWritten(partVals);
Assert.assertNotNull(recordsWritten);
Assert.assertEquals(1, recordsWritten.size());
byte[] mapped = generateDelimiteredRecord(Lists.newArrayList(id, msg), mapper.getFieldDelimiter());
Assert.assertArrayEquals(mapped, recordsWritten.get(0));
bolt.cleanup();
}
Aggregations