Search in sources :

Example 1 with HiveOptions

use of org.apache.storm.hive.common.HiveOptions in project storm by apache.

the class HiveTopology method main.

public static void main(String[] args) throws Exception {
    String metaStoreUri = args[0];
    String dbName = args[1];
    String tblName = args[2];
    String[] colNames = { "id", "name", "phone", "street", "city", "state" };
    Config config = new Config();
    config.setNumWorkers(1);
    UserDataSpout spout = new UserDataSpout();
    DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames));
    HiveOptions hiveOptions;
    if (args.length == 6) {
        hiveOptions = new HiveOptions(metaStoreUri, dbName, tblName, mapper).withTxnsPerBatch(10).withBatchSize(100).withIdleTimeout(10).withKerberosKeytab(args[4]).withKerberosPrincipal(args[5]);
    } else {
        hiveOptions = new HiveOptions(metaStoreUri, dbName, tblName, mapper).withTxnsPerBatch(10).withBatchSize(100).withIdleTimeout(10).withMaxOpenConnections(1);
    }
    HiveBolt hiveBolt = new HiveBolt(hiveOptions);
    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout(USER_SPOUT_ID, spout, 1);
    // SentenceSpout --> MyBolt
    builder.setBolt(BOLT_ID, hiveBolt, 1).shuffleGrouping(USER_SPOUT_ID);
    String topoName = TOPOLOGY_NAME;
    if (args.length >= 4) {
        topoName = args[3];
    }
    StormSubmitter.submitTopology(topoName, config, builder.createTopology());
}
Also used : Fields(org.apache.storm.tuple.Fields) TopologyBuilder(org.apache.storm.topology.TopologyBuilder) Config(org.apache.storm.Config) DelimitedRecordHiveMapper(org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper) HiveOptions(org.apache.storm.hive.common.HiveOptions)

Example 2 with HiveOptions

use of org.apache.storm.hive.common.HiveOptions in project storm by apache.

the class HiveTopologyPartitioned method main.

public static void main(String[] args) throws Exception {
    String metaStoreUri = args[0];
    String dbName = args[1];
    String tblName = args[2];
    String[] partNames = { "city", "state" };
    String[] colNames = { "id", "name", "phone", "street" };
    Config config = new Config();
    config.setNumWorkers(1);
    UserDataSpout spout = new UserDataSpout();
    DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withPartitionFields(new Fields(partNames));
    HiveOptions hiveOptions;
    if (args.length == 6) {
        hiveOptions = new HiveOptions(metaStoreUri, dbName, tblName, mapper).withTxnsPerBatch(10).withBatchSize(1000).withIdleTimeout(10).withKerberosKeytab(args[4]).withKerberosPrincipal(args[5]);
    } else {
        hiveOptions = new HiveOptions(metaStoreUri, dbName, tblName, mapper).withTxnsPerBatch(10).withBatchSize(1000).withIdleTimeout(10);
    }
    HiveBolt hiveBolt = new HiveBolt(hiveOptions);
    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout(USER_SPOUT_ID, spout, 1);
    // SentenceSpout --> MyBolt
    builder.setBolt(BOLT_ID, hiveBolt, 1).shuffleGrouping(USER_SPOUT_ID);
    String topoName = TOPOLOGY_NAME;
    if (args.length > 3) {
        topoName = args[3];
    }
    StormSubmitter.submitTopology(topoName, config, builder.createTopology());
}
Also used : Fields(org.apache.storm.tuple.Fields) TopologyBuilder(org.apache.storm.topology.TopologyBuilder) Config(org.apache.storm.Config) DelimitedRecordHiveMapper(org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper) HiveOptions(org.apache.storm.hive.common.HiveOptions)

Example 3 with HiveOptions

use of org.apache.storm.hive.common.HiveOptions in project storm by apache.

the class TestHiveBolt method testNoAcksUntilFlushed.

@Test
public void testNoAcksUntilFlushed() {
    JsonRecordHiveMapper mapper = new JsonRecordHiveMapper().withColumnFields(new Fields(colNames1)).withPartitionFields(new Fields(partNames));
    HiveOptions hiveOptions = new HiveOptions(metaStoreURI, dbName, tblName, mapper).withTxnsPerBatch(2).withBatchSize(2);
    bolt = new TestingHiveBolt(hiveOptions);
    bolt.prepare(config, null, new OutputCollector(collector));
    Tuple tuple1 = generateTestTuple(1, "SJC", "Sunnyvale", "CA");
    Tuple tuple2 = generateTestTuple(2, "SFO", "San Jose", "CA");
    bolt.execute(tuple1);
    verifyZeroInteractions(collector);
    bolt.execute(tuple2);
    verify(collector).ack(tuple1);
    verify(collector).ack(tuple2);
    bolt.cleanup();
}
Also used : JsonRecordHiveMapper(org.apache.storm.hive.bolt.mapper.JsonRecordHiveMapper) OutputCollector(org.apache.storm.task.OutputCollector) Fields(org.apache.storm.tuple.Fields) HiveOptions(org.apache.storm.hive.common.HiveOptions) Tuple(org.apache.storm.tuple.Tuple) Test(org.junit.Test)

Example 4 with HiveOptions

use of org.apache.storm.hive.common.HiveOptions in project storm by apache.

the class TestHiveBolt method testWithByteArrayIdandMessage.

@Test
public void testWithByteArrayIdandMessage() throws Exception {
    DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withPartitionFields(new Fields(partNames));
    HiveOptions hiveOptions = new HiveOptions(metaStoreURI, dbName, tblName, mapper).withTxnsPerBatch(2).withBatchSize(2);
    bolt = new TestingHiveBolt(hiveOptions);
    bolt.prepare(config, null, collector);
    Integer id = 100;
    String msg = "test-123";
    String city = "sunnyvale";
    String state = "ca";
    Set<Tuple> tupleSet = new HashSet<Tuple>();
    for (int i = 0; i < 4; i++) {
        Tuple tuple = generateTestTuple(id, msg, city, state);
        bolt.execute(tuple);
        tupleSet.add(tuple);
    }
    List<String> partVals = Lists.newArrayList(city, state);
    for (Tuple t : tupleSet) {
        verify(collector).ack(t);
    }
    Assert.assertEquals(4, bolt.getRecordWritten(partVals).size());
    bolt.cleanup();
}
Also used : Fields(org.apache.storm.tuple.Fields) DelimitedRecordHiveMapper(org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper) HiveOptions(org.apache.storm.hive.common.HiveOptions) Tuple(org.apache.storm.tuple.Tuple) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 5 with HiveOptions

use of org.apache.storm.hive.common.HiveOptions in project storm by apache.

the class TestHiveBolt method testJsonWriter.

@Test
public void testJsonWriter() throws Exception {
    // json record doesn't need columns to be in the same order
    // as table in hive.
    JsonRecordHiveMapper mapper = new JsonRecordHiveMapper().withColumnFields(new Fields(colNames1)).withPartitionFields(new Fields(partNames));
    HiveOptions hiveOptions = new HiveOptions(metaStoreURI, dbName, tblName, mapper).withTxnsPerBatch(2).withBatchSize(1);
    bolt = new TestingHiveBolt(hiveOptions);
    bolt.prepare(config, null, collector);
    Integer id = 1;
    String msg = "SJC";
    String city = "Sunnyvale";
    String state = "CA";
    Tuple tuple1 = generateTestTuple(id, msg, city, state);
    bolt.execute(tuple1);
    verify(collector).ack(tuple1);
    List<String> partVals = Lists.newArrayList(city, state);
    List<byte[]> recordsWritten = bolt.getRecordWritten(partVals);
    Assert.assertNotNull(recordsWritten);
    Assert.assertEquals(1, recordsWritten.size());
    byte[] written = recordsWritten.get(0);
    Map<String, ?> writtenMap = objectMapper.readValue(new String(written), new TypeReference<Map<String, ?>>() {
    });
    Map<String, Object> expected = new HashMap<>();
    expected.put(COL1, id);
    expected.put(COL2, msg);
    Assert.assertEquals(expected, writtenMap);
    bolt.cleanup();
}
Also used : JsonRecordHiveMapper(org.apache.storm.hive.bolt.mapper.JsonRecordHiveMapper) HashMap(java.util.HashMap) Fields(org.apache.storm.tuple.Fields) HiveOptions(org.apache.storm.hive.common.HiveOptions) HashMap(java.util.HashMap) Map(java.util.Map) Tuple(org.apache.storm.tuple.Tuple) Test(org.junit.Test)

Aggregations

HiveOptions (org.apache.storm.hive.common.HiveOptions)14 Fields (org.apache.storm.tuple.Fields)14 Tuple (org.apache.storm.tuple.Tuple)10 Test (org.junit.Test)10 DelimitedRecordHiveMapper (org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper)9 OutputCollector (org.apache.storm.task.OutputCollector)6 JsonRecordHiveMapper (org.apache.storm.hive.bolt.mapper.JsonRecordHiveMapper)5 HiveEndPoint (org.apache.hive.hcatalog.streaming.HiveEndPoint)4 Config (org.apache.storm.Config)3 TopologyBuilder (org.apache.storm.topology.TopologyBuilder)3 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 SimpleDateFormat (java.text.SimpleDateFormat)1 Date (java.util.Date)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Stream (org.apache.storm.trident.Stream)1 TridentState (org.apache.storm.trident.TridentState)1 TridentTopology (org.apache.storm.trident.TridentTopology)1 StateFactory (org.apache.storm.trident.state.StateFactory)1