Search in sources :

Example 6 with HiveOptions

use of org.apache.storm.hive.common.HiveOptions in project storm by apache.

the class TestHiveBolt method testWithByteArrayIdandMessage.

@Test
public void testWithByteArrayIdandMessage() throws Exception {
    DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withPartitionFields(new Fields(partNames));
    HiveOptions hiveOptions = new HiveOptions(metaStoreURI, dbName, tblName, mapper).withTxnsPerBatch(2).withBatchSize(2);
    bolt = new HiveBolt(hiveOptions);
    bolt.prepare(config, null, collector);
    Integer id = 100;
    String msg = "test-123";
    String city = "sunnyvale";
    String state = "ca";
    checkRecordCountInTable(tblName, dbName, 0);
    Set<Tuple> tupleSet = new HashSet<Tuple>();
    for (int i = 0; i < 4; i++) {
        Tuple tuple = generateTestTuple(id, msg, city, state);
        bolt.execute(tuple);
        tupleSet.add(tuple);
    }
    for (Tuple t : tupleSet) verify(collector).ack(t);
    checkRecordCountInTable(tblName, dbName, 4);
    bolt.cleanup();
}
Also used : Fields(org.apache.storm.tuple.Fields) DelimitedRecordHiveMapper(org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper) HiveOptions(org.apache.storm.hive.common.HiveOptions) Tuple(org.apache.storm.tuple.Tuple) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 7 with HiveOptions

use of org.apache.storm.hive.common.HiveOptions in project storm by apache.

the class TestHiveBolt method testJsonWriter.

@Test
public void testJsonWriter() throws Exception {
    // json record doesn't need columns to be in the same order
    // as table in hive.
    JsonRecordHiveMapper mapper = new JsonRecordHiveMapper().withColumnFields(new Fields(colNames1)).withPartitionFields(new Fields(partNames));
    HiveOptions hiveOptions = new HiveOptions(metaStoreURI, dbName, tblName, mapper).withTxnsPerBatch(2).withBatchSize(1);
    bolt = new HiveBolt(hiveOptions);
    bolt.prepare(config, null, collector);
    Tuple tuple1 = generateTestTuple(1, "SJC", "Sunnyvale", "CA");
    //Tuple tuple2 = generateTestTuple(2,"SFO","San Jose","CA");
    bolt.execute(tuple1);
    verify(collector).ack(tuple1);
    //bolt.execute(tuple2);
    //verify(collector).ack(tuple2);
    checkDataWritten(tblName, dbName, "1,SJC,Sunnyvale,CA");
    bolt.cleanup();
}
Also used : JsonRecordHiveMapper(org.apache.storm.hive.bolt.mapper.JsonRecordHiveMapper) Fields(org.apache.storm.tuple.Fields) HiveOptions(org.apache.storm.hive.common.HiveOptions) Tuple(org.apache.storm.tuple.Tuple) Test(org.junit.Test)

Example 8 with HiveOptions

use of org.apache.storm.hive.common.HiveOptions in project storm by apache.

the class BucketTestHiveTopology method main.

public static void main(String[] args) throws Exception {
    if ((args == null) || (args.length < 7)) {
        System.out.println("Usage: BucketTestHiveTopology metastoreURI " + "dbName tableName dataFileLocation hiveBatchSize " + "hiveTickTupl]eIntervalSecs workers  [topologyNamey] [keytab file]" + " [principal name] ");
        System.exit(1);
    }
    String metaStoreURI = args[0];
    String dbName = args[1];
    String tblName = args[2];
    String sourceFileLocation = args[3];
    Integer hiveBatchSize = Integer.parseInt(args[4]);
    Integer hiveTickTupleIntervalSecs = Integer.parseInt(args[5]);
    Integer workers = Integer.parseInt(args[6]);
    String[] colNames = { "ss_sold_date_sk", "ss_sold_time_sk", "ss_item_sk", "ss_customer_sk", "ss_cdemo_sk", "ss_hdemo_sk", "ss_addr_sk", "ss_store_sk", "ss_promo_sk", "ss_ticket_number", "ss_quantity", "ss_wholesale_cost", "ss_list_price", "ss_sales_price", "ss_ext_discount_amt", "ss_ext_sales_price", "ss_ext_wholesale_cost", "ss_ext_list_price", "ss_ext_tax", "ss_coupon_amt", "ss_net_paid", "ss_net_paid_inc_tax", "ss_net_profit" };
    Config config = new Config();
    config.setNumWorkers(workers);
    UserDataSpout spout = new UserDataSpout().withDataFile(sourceFileLocation);
    DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withTimeAsPartitionField("yyyy/MM/dd");
    HiveOptions hiveOptions;
    hiveOptions = new HiveOptions(metaStoreURI, dbName, tblName, mapper).withTxnsPerBatch(10).withBatchSize(hiveBatchSize);
    // had to make tick tuple a mandatory argument since its positional
    if (hiveTickTupleIntervalSecs > 0) {
        hiveOptions.withTickTupleInterval(hiveTickTupleIntervalSecs);
    }
    if (args.length == 10) {
        hiveOptions.withKerberosKeytab(args[8]).withKerberosPrincipal(args[9]);
    }
    HiveBolt hiveBolt = new HiveBolt(hiveOptions);
    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout(USER_SPOUT_ID, spout, 1);
    // SentenceSpout --> MyBolt
    builder.setBolt(BOLT_ID, hiveBolt, 14).shuffleGrouping(USER_SPOUT_ID);
    if (args.length == 6) {
        try (LocalCluster cluster = new LocalCluster();
            LocalTopology topo = cluster.submitTopology(TOPOLOGY_NAME, config, builder.createTopology())) {
            waitForSeconds(20);
        }
        System.exit(0);
    } else {
        StormSubmitter.submitTopology(args[7], config, builder.createTopology());
    }
}
Also used : LocalCluster(org.apache.storm.LocalCluster) Fields(org.apache.storm.tuple.Fields) TopologyBuilder(org.apache.storm.topology.TopologyBuilder) Config(org.apache.storm.Config) DelimitedRecordHiveMapper(org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper) HiveOptions(org.apache.storm.hive.common.HiveOptions) LocalTopology(org.apache.storm.LocalCluster.LocalTopology)

Example 9 with HiveOptions

use of org.apache.storm.hive.common.HiveOptions in project storm by apache.

the class TridentHiveTopology method buildTopology.

public static StormTopology buildTopology(String metaStoreURI, String dbName, String tblName, Object keytab, Object principal) {
    int batchSize = 100;
    FixedBatchSpout spout = new FixedBatchSpout(batchSize);
    spout.setCycle(true);
    TridentTopology topology = new TridentTopology();
    Stream stream = topology.newStream("hiveTridentspout1", spout);
    String[] partNames = { "city", "state" };
    String[] colNames = { "id", "name", "phone", "street" };
    Fields hiveFields = new Fields("id", "name", "phone", "street", "city", "state");
    DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withPartitionFields(new Fields(partNames));
    HiveOptions hiveOptions;
    if (keytab != null && principal != null) {
        hiveOptions = new HiveOptions(metaStoreURI, dbName, tblName, mapper).withTxnsPerBatch(10).withBatchSize(batchSize).withIdleTimeout(10).withCallTimeout(30000).withKerberosKeytab((String) keytab).withKerberosPrincipal((String) principal);
    } else {
        hiveOptions = new HiveOptions(metaStoreURI, dbName, tblName, mapper).withTxnsPerBatch(10).withBatchSize(batchSize).withCallTimeout(30000).withIdleTimeout(10);
    }
    StateFactory factory = new HiveStateFactory().withOptions(hiveOptions);
    TridentState state = stream.partitionPersist(factory, hiveFields, new HiveUpdater(), new Fields());
    return topology.build();
}
Also used : TridentState(org.apache.storm.trident.TridentState) DelimitedRecordHiveMapper(org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper) Fields(org.apache.storm.tuple.Fields) StateFactory(org.apache.storm.trident.state.StateFactory) TridentTopology(org.apache.storm.trident.TridentTopology) Stream(org.apache.storm.trident.Stream) HiveOptions(org.apache.storm.hive.common.HiveOptions)

Example 10 with HiveOptions

use of org.apache.storm.hive.common.HiveOptions in project storm by apache.

the class TestHiveBolt method testWithTimeformat.

@Test
public void testWithTimeformat() throws Exception {
    String[] partNames1 = { "dt" };
    String timeFormat = "yyyy/MM/dd";
    HiveSetupUtil.dropDB(conf, dbName1);
    HiveSetupUtil.createDbAndTable(conf, dbName1, tblName1, null, colNames, colTypes, partNames1, dbLocation);
    DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withTimeAsPartitionField(timeFormat);
    HiveOptions hiveOptions = new HiveOptions(metaStoreURI, dbName1, tblName1, mapper).withTxnsPerBatch(2).withBatchSize(1).withMaxOpenConnections(1);
    bolt = new HiveBolt(hiveOptions);
    bolt.prepare(config, null, collector);
    Integer id = 100;
    String msg = "test-123";
    Date d = new Date();
    SimpleDateFormat parseDate = new SimpleDateFormat(timeFormat);
    String today = parseDate.format(d.getTime());
    checkRecordCountInTable(tblName1, dbName1, 0);
    Set<Tuple> tupleSet = new HashSet<Tuple>();
    for (int i = 0; i < 2; i++) {
        Tuple tuple = generateTestTuple(id, msg, null, null);
        tupleSet.add(tuple);
        bolt.execute(tuple);
    }
    for (Tuple t : tupleSet) verify(collector).ack(t);
    checkDataWritten(tblName1, dbName1, "100,test-123," + today, "100,test-123," + today);
    bolt.cleanup();
}
Also used : DelimitedRecordHiveMapper(org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper) Date(java.util.Date) Fields(org.apache.storm.tuple.Fields) HiveOptions(org.apache.storm.hive.common.HiveOptions) SimpleDateFormat(java.text.SimpleDateFormat) Tuple(org.apache.storm.tuple.Tuple) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

HiveOptions (org.apache.storm.hive.common.HiveOptions)14 Fields (org.apache.storm.tuple.Fields)14 Tuple (org.apache.storm.tuple.Tuple)10 Test (org.junit.Test)10 DelimitedRecordHiveMapper (org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper)9 OutputCollector (org.apache.storm.task.OutputCollector)6 JsonRecordHiveMapper (org.apache.storm.hive.bolt.mapper.JsonRecordHiveMapper)5 HashSet (java.util.HashSet)4 Config (org.apache.storm.Config)3 LocalCluster (org.apache.storm.LocalCluster)3 LocalTopology (org.apache.storm.LocalCluster.LocalTopology)3 TopologyBuilder (org.apache.storm.topology.TopologyBuilder)3 SimpleDateFormat (java.text.SimpleDateFormat)1 Date (java.util.Date)1 Stream (org.apache.storm.trident.Stream)1 TridentState (org.apache.storm.trident.TridentState)1 TridentTopology (org.apache.storm.trident.TridentTopology)1 StateFactory (org.apache.storm.trident.state.StateFactory)1