use of org.apache.storm.hive.common.HiveOptions in project storm by apache.
the class TestHiveBolt method testWithByteArrayIdandMessage.
@Test
public void testWithByteArrayIdandMessage() throws Exception {
DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withPartitionFields(new Fields(partNames));
HiveOptions hiveOptions = new HiveOptions(metaStoreURI, dbName, tblName, mapper).withTxnsPerBatch(2).withBatchSize(2);
bolt = new HiveBolt(hiveOptions);
bolt.prepare(config, null, collector);
Integer id = 100;
String msg = "test-123";
String city = "sunnyvale";
String state = "ca";
checkRecordCountInTable(tblName, dbName, 0);
Set<Tuple> tupleSet = new HashSet<Tuple>();
for (int i = 0; i < 4; i++) {
Tuple tuple = generateTestTuple(id, msg, city, state);
bolt.execute(tuple);
tupleSet.add(tuple);
}
for (Tuple t : tupleSet) verify(collector).ack(t);
checkRecordCountInTable(tblName, dbName, 4);
bolt.cleanup();
}
use of org.apache.storm.hive.common.HiveOptions in project storm by apache.
the class TestHiveBolt method testJsonWriter.
@Test
public void testJsonWriter() throws Exception {
// json record doesn't need columns to be in the same order
// as table in hive.
JsonRecordHiveMapper mapper = new JsonRecordHiveMapper().withColumnFields(new Fields(colNames1)).withPartitionFields(new Fields(partNames));
HiveOptions hiveOptions = new HiveOptions(metaStoreURI, dbName, tblName, mapper).withTxnsPerBatch(2).withBatchSize(1);
bolt = new HiveBolt(hiveOptions);
bolt.prepare(config, null, collector);
Tuple tuple1 = generateTestTuple(1, "SJC", "Sunnyvale", "CA");
//Tuple tuple2 = generateTestTuple(2,"SFO","San Jose","CA");
bolt.execute(tuple1);
verify(collector).ack(tuple1);
//bolt.execute(tuple2);
//verify(collector).ack(tuple2);
checkDataWritten(tblName, dbName, "1,SJC,Sunnyvale,CA");
bolt.cleanup();
}
use of org.apache.storm.hive.common.HiveOptions in project storm by apache.
the class BucketTestHiveTopology method main.
public static void main(String[] args) throws Exception {
if ((args == null) || (args.length < 7)) {
System.out.println("Usage: BucketTestHiveTopology metastoreURI " + "dbName tableName dataFileLocation hiveBatchSize " + "hiveTickTupl]eIntervalSecs workers [topologyNamey] [keytab file]" + " [principal name] ");
System.exit(1);
}
String metaStoreURI = args[0];
String dbName = args[1];
String tblName = args[2];
String sourceFileLocation = args[3];
Integer hiveBatchSize = Integer.parseInt(args[4]);
Integer hiveTickTupleIntervalSecs = Integer.parseInt(args[5]);
Integer workers = Integer.parseInt(args[6]);
String[] colNames = { "ss_sold_date_sk", "ss_sold_time_sk", "ss_item_sk", "ss_customer_sk", "ss_cdemo_sk", "ss_hdemo_sk", "ss_addr_sk", "ss_store_sk", "ss_promo_sk", "ss_ticket_number", "ss_quantity", "ss_wholesale_cost", "ss_list_price", "ss_sales_price", "ss_ext_discount_amt", "ss_ext_sales_price", "ss_ext_wholesale_cost", "ss_ext_list_price", "ss_ext_tax", "ss_coupon_amt", "ss_net_paid", "ss_net_paid_inc_tax", "ss_net_profit" };
Config config = new Config();
config.setNumWorkers(workers);
UserDataSpout spout = new UserDataSpout().withDataFile(sourceFileLocation);
DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withTimeAsPartitionField("yyyy/MM/dd");
HiveOptions hiveOptions;
hiveOptions = new HiveOptions(metaStoreURI, dbName, tblName, mapper).withTxnsPerBatch(10).withBatchSize(hiveBatchSize);
// had to make tick tuple a mandatory argument since its positional
if (hiveTickTupleIntervalSecs > 0) {
hiveOptions.withTickTupleInterval(hiveTickTupleIntervalSecs);
}
if (args.length == 10) {
hiveOptions.withKerberosKeytab(args[8]).withKerberosPrincipal(args[9]);
}
HiveBolt hiveBolt = new HiveBolt(hiveOptions);
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout(USER_SPOUT_ID, spout, 1);
// SentenceSpout --> MyBolt
builder.setBolt(BOLT_ID, hiveBolt, 14).shuffleGrouping(USER_SPOUT_ID);
if (args.length == 6) {
try (LocalCluster cluster = new LocalCluster();
LocalTopology topo = cluster.submitTopology(TOPOLOGY_NAME, config, builder.createTopology())) {
waitForSeconds(20);
}
System.exit(0);
} else {
StormSubmitter.submitTopology(args[7], config, builder.createTopology());
}
}
use of org.apache.storm.hive.common.HiveOptions in project storm by apache.
the class TridentHiveTopology method buildTopology.
public static StormTopology buildTopology(String metaStoreURI, String dbName, String tblName, Object keytab, Object principal) {
int batchSize = 100;
FixedBatchSpout spout = new FixedBatchSpout(batchSize);
spout.setCycle(true);
TridentTopology topology = new TridentTopology();
Stream stream = topology.newStream("hiveTridentspout1", spout);
String[] partNames = { "city", "state" };
String[] colNames = { "id", "name", "phone", "street" };
Fields hiveFields = new Fields("id", "name", "phone", "street", "city", "state");
DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withPartitionFields(new Fields(partNames));
HiveOptions hiveOptions;
if (keytab != null && principal != null) {
hiveOptions = new HiveOptions(metaStoreURI, dbName, tblName, mapper).withTxnsPerBatch(10).withBatchSize(batchSize).withIdleTimeout(10).withCallTimeout(30000).withKerberosKeytab((String) keytab).withKerberosPrincipal((String) principal);
} else {
hiveOptions = new HiveOptions(metaStoreURI, dbName, tblName, mapper).withTxnsPerBatch(10).withBatchSize(batchSize).withCallTimeout(30000).withIdleTimeout(10);
}
StateFactory factory = new HiveStateFactory().withOptions(hiveOptions);
TridentState state = stream.partitionPersist(factory, hiveFields, new HiveUpdater(), new Fields());
return topology.build();
}
use of org.apache.storm.hive.common.HiveOptions in project storm by apache.
the class TestHiveBolt method testWithTimeformat.
@Test
public void testWithTimeformat() throws Exception {
String[] partNames1 = { "dt" };
String timeFormat = "yyyy/MM/dd";
HiveSetupUtil.dropDB(conf, dbName1);
HiveSetupUtil.createDbAndTable(conf, dbName1, tblName1, null, colNames, colTypes, partNames1, dbLocation);
DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withTimeAsPartitionField(timeFormat);
HiveOptions hiveOptions = new HiveOptions(metaStoreURI, dbName1, tblName1, mapper).withTxnsPerBatch(2).withBatchSize(1).withMaxOpenConnections(1);
bolt = new HiveBolt(hiveOptions);
bolt.prepare(config, null, collector);
Integer id = 100;
String msg = "test-123";
Date d = new Date();
SimpleDateFormat parseDate = new SimpleDateFormat(timeFormat);
String today = parseDate.format(d.getTime());
checkRecordCountInTable(tblName1, dbName1, 0);
Set<Tuple> tupleSet = new HashSet<Tuple>();
for (int i = 0; i < 2; i++) {
Tuple tuple = generateTestTuple(id, msg, null, null);
tupleSet.add(tuple);
bolt.execute(tuple);
}
for (Tuple t : tupleSet) verify(collector).ack(t);
checkDataWritten(tblName1, dbName1, "100,test-123," + today, "100,test-123," + today);
bolt.cleanup();
}
Aggregations