Search in sources :

Example 6 with DelimitedRecordHiveMapper

use of org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper in project storm by apache.

the class TestHiveBolt method testWithoutPartitions.

@Test
public void testWithoutPartitions() throws Exception {
    DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames));
    HiveOptions hiveOptions = new HiveOptions(metaStoreURI, dbName1, tblName1, mapper).withTxnsPerBatch(2).withBatchSize(2).withAutoCreatePartitions(false);
    bolt = new TestingHiveBolt(hiveOptions);
    bolt.prepare(config, null, collector);
    Integer id = 100;
    String msg = "test-123";
    String city = "sunnyvale";
    String state = "ca";
    Set<Tuple> tupleSet = new HashSet<Tuple>();
    for (int i = 0; i < 4; i++) {
        Tuple tuple = generateTestTuple(id, msg, city, state);
        bolt.execute(tuple);
        tupleSet.add(tuple);
    }
    List<String> partVals = Collections.emptyList();
    for (Tuple t : tupleSet) {
        verify(collector).ack(t);
    }
    List<byte[]> recordWritten = bolt.getRecordWritten(partVals);
    Assert.assertNotNull(recordWritten);
    Assert.assertEquals(4, recordWritten.size());
    bolt.cleanup();
}
Also used : Fields(org.apache.storm.tuple.Fields) DelimitedRecordHiveMapper(org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper) HiveOptions(org.apache.storm.hive.common.HiveOptions) Tuple(org.apache.storm.tuple.Tuple) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 7 with DelimitedRecordHiveMapper

use of org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper in project storm by apache.

the class TridentHiveTopology method buildTopology.

public static StormTopology buildTopology(String metaStoreUri, String dbName, String tblName, Object keytab, Object principal) {
    int batchSize = 100;
    FixedBatchSpout spout = new FixedBatchSpout(batchSize);
    spout.setCycle(true);
    TridentTopology topology = new TridentTopology();
    Stream stream = topology.newStream("hiveTridentspout1", spout);
    String[] partNames = { "city", "state" };
    String[] colNames = { "id", "name", "phone", "street" };
    Fields hiveFields = new Fields("id", "name", "phone", "street", "city", "state");
    DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withPartitionFields(new Fields(partNames));
    HiveOptions hiveOptions;
    if (keytab != null && principal != null) {
        hiveOptions = new HiveOptions(metaStoreUri, dbName, tblName, mapper).withTxnsPerBatch(10).withBatchSize(batchSize).withIdleTimeout(10).withCallTimeout(30000).withKerberosKeytab((String) keytab).withKerberosPrincipal((String) principal);
    } else {
        hiveOptions = new HiveOptions(metaStoreUri, dbName, tblName, mapper).withTxnsPerBatch(10).withBatchSize(batchSize).withCallTimeout(30000).withIdleTimeout(10);
    }
    StateFactory factory = new HiveStateFactory().withOptions(hiveOptions);
    TridentState state = stream.partitionPersist(factory, hiveFields, new HiveUpdater(), new Fields());
    return topology.build();
}
Also used : TridentState(org.apache.storm.trident.TridentState) DelimitedRecordHiveMapper(org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper) Fields(org.apache.storm.tuple.Fields) StateFactory(org.apache.storm.trident.state.StateFactory) TridentTopology(org.apache.storm.trident.TridentTopology) Stream(org.apache.storm.trident.Stream) HiveOptions(org.apache.storm.hive.common.HiveOptions)

Example 8 with DelimitedRecordHiveMapper

use of org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper in project storm by apache.

the class BucketTestHiveTopology method main.

public static void main(String[] args) throws Exception {
    if ((args == null) || (args.length < 7)) {
        System.out.println("Usage: BucketTestHiveTopology metastoreURI " + "dbName tableName dataFileLocation hiveBatchSize " + "hiveTickTupl]eIntervalSecs workers  [topologyNamey] [keytab file]" + " [principal name] ");
        System.exit(1);
    }
    String metaStoreUri = args[0];
    String dbName = args[1];
    String tblName = args[2];
    Integer hiveBatchSize = Integer.parseInt(args[4]);
    Integer hiveTickTupleIntervalSecs = Integer.parseInt(args[5]);
    Integer workers = Integer.parseInt(args[6]);
    String[] colNames = { "ss_sold_date_sk", "ss_sold_time_sk", "ss_item_sk", "ss_customer_sk", "ss_cdemo_sk", "ss_hdemo_sk", "ss_addr_sk", "ss_store_sk", "ss_promo_sk", "ss_ticket_number", "ss_quantity", "ss_wholesale_cost", "ss_list_price", "ss_sales_price", "ss_ext_discount_amt", "ss_ext_sales_price", "ss_ext_wholesale_cost", "ss_ext_list_price", "ss_ext_tax", "ss_coupon_amt", "ss_net_paid", "ss_net_paid_inc_tax", "ss_net_profit" };
    Config config = new Config();
    config.setNumWorkers(workers);
    DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withTimeAsPartitionField("yyyy/MM/dd");
    HiveOptions hiveOptions;
    hiveOptions = new HiveOptions(metaStoreUri, dbName, tblName, mapper).withTxnsPerBatch(10).withBatchSize(hiveBatchSize);
    // had to make tick tuple a mandatory argument since its positional
    if (hiveTickTupleIntervalSecs > 0) {
        hiveOptions.withTickTupleInterval(hiveTickTupleIntervalSecs);
    }
    if (args.length == 10) {
        hiveOptions.withKerberosKeytab(args[8]).withKerberosPrincipal(args[9]);
    }
    HiveBolt hiveBolt = new HiveBolt(hiveOptions);
    TopologyBuilder builder = new TopologyBuilder();
    String sourceFileLocation = args[3];
    UserDataSpout spout = new UserDataSpout().withDataFile(sourceFileLocation);
    builder.setSpout(USER_SPOUT_ID, spout, 1);
    // SentenceSpout --> MyBolt
    builder.setBolt(BOLT_ID, hiveBolt, 14).shuffleGrouping(USER_SPOUT_ID);
    String topoName = TOPOLOGY_NAME;
    if (args.length > 6) {
        topoName = args[7];
    }
    StormSubmitter.submitTopology(args[7], config, builder.createTopology());
}
Also used : Fields(org.apache.storm.tuple.Fields) TopologyBuilder(org.apache.storm.topology.TopologyBuilder) Config(org.apache.storm.Config) DelimitedRecordHiveMapper(org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper) HiveOptions(org.apache.storm.hive.common.HiveOptions)

Example 9 with DelimitedRecordHiveMapper

use of org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper in project storm by apache.

the class TestHiveBolt method testWithTimeformat.

@Test
public void testWithTimeformat() throws Exception {
    String timeFormat = "yyyy/MM/dd";
    DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withTimeAsPartitionField(timeFormat);
    HiveOptions hiveOptions = new HiveOptions(metaStoreURI, dbName1, tblName1, mapper).withTxnsPerBatch(2).withBatchSize(1).withMaxOpenConnections(1);
    bolt = new TestingHiveBolt(hiveOptions);
    bolt.prepare(config, null, collector);
    Integer id = 100;
    String msg = "test-123";
    Date d = new Date();
    SimpleDateFormat parseDate = new SimpleDateFormat(timeFormat);
    String today = parseDate.format(d.getTime());
    List<Tuple> tuples = new ArrayList<>();
    for (int i = 0; i < 2; i++) {
        Tuple tuple = generateTestTuple(id, msg, null, null);
        tuples.add(tuple);
        bolt.execute(tuple);
    }
    for (Tuple t : tuples) {
        verify(collector).ack(t);
    }
    List<String> partVals = Lists.newArrayList(today);
    List<byte[]> recordsWritten = bolt.getRecordWritten(partVals);
    Assert.assertNotNull(recordsWritten);
    Assert.assertEquals(2, recordsWritten.size());
    byte[] mapped = generateDelimiteredRecord(Lists.newArrayList(id, msg), mapper.getFieldDelimiter());
    for (byte[] record : recordsWritten) {
        Assert.assertArrayEquals(mapped, record);
    }
    bolt.cleanup();
}
Also used : ArrayList(java.util.ArrayList) DelimitedRecordHiveMapper(org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper) Date(java.util.Date) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) Fields(org.apache.storm.tuple.Fields) HiveOptions(org.apache.storm.hive.common.HiveOptions) SimpleDateFormat(java.text.SimpleDateFormat) Tuple(org.apache.storm.tuple.Tuple) Test(org.junit.Test)

Example 10 with DelimitedRecordHiveMapper

use of org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper in project storm by apache.

the class TestHiveBolt method testData.

@Test
public void testData() throws Exception {
    DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withPartitionFields(new Fields(partNames));
    HiveOptions hiveOptions = new HiveOptions(metaStoreURI, dbName, tblName, mapper).withTxnsPerBatch(2).withBatchSize(1);
    bolt = new TestingHiveBolt(hiveOptions);
    bolt.prepare(config, null, new OutputCollector(collector));
    Integer id = 1;
    String msg = "SJC";
    String city = "Sunnyvale";
    String state = "CA";
    Tuple tuple1 = generateTestTuple(id, msg, city, state);
    bolt.execute(tuple1);
    verify(collector).ack(tuple1);
    List<String> partVals = Lists.newArrayList(city, state);
    List<byte[]> recordsWritten = bolt.getRecordWritten(partVals);
    Assert.assertNotNull(recordsWritten);
    Assert.assertEquals(1, recordsWritten.size());
    byte[] mapped = generateDelimiteredRecord(Lists.newArrayList(id, msg), mapper.getFieldDelimiter());
    Assert.assertArrayEquals(mapped, recordsWritten.get(0));
    bolt.cleanup();
}
Also used : OutputCollector(org.apache.storm.task.OutputCollector) Fields(org.apache.storm.tuple.Fields) DelimitedRecordHiveMapper(org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper) HiveOptions(org.apache.storm.hive.common.HiveOptions) Tuple(org.apache.storm.tuple.Tuple) Test(org.junit.Test)

Aggregations

DelimitedRecordHiveMapper (org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper)12 Fields (org.apache.storm.tuple.Fields)12 HiveOptions (org.apache.storm.hive.common.HiveOptions)9 Test (org.junit.Test)8 HiveEndPoint (org.apache.hive.hcatalog.streaming.HiveEndPoint)7 Tuple (org.apache.storm.tuple.Tuple)6 Config (org.apache.storm.Config)3 TopologyBuilder (org.apache.storm.topology.TopologyBuilder)3 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 OutputCollector (org.apache.storm.task.OutputCollector)2 SimpleDateFormat (java.text.SimpleDateFormat)1 Date (java.util.Date)1 Stream (org.apache.storm.trident.Stream)1 TridentState (org.apache.storm.trident.TridentState)1 TridentTopology (org.apache.storm.trident.TridentTopology)1 StateFactory (org.apache.storm.trident.state.StateFactory)1