use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.
the class HiveExploreServiceStreamTest method testAvroFormattedStream.
@Test
public void testAvroFormattedStream() throws Exception {
StreamId streamId = NAMESPACE_ID.stream("avroStream");
createStream(streamId);
try {
Schema schema = Schema.recordOf("purchase", Schema.Field.of("user", Schema.of(Schema.Type.STRING)), Schema.Field.of("num", Schema.of(Schema.Type.INT)), Schema.Field.of("price", Schema.of(Schema.Type.DOUBLE)));
FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, schema, Collections.<String, String>emptyMap());
StreamProperties properties = new StreamProperties(Long.MAX_VALUE, formatSpecification, 1000);
setStreamProperties(NAMESPACE_ID.getNamespace(), "avroStream", properties);
// our schemas are compatible
org.apache.avro.Schema avroSchema = new org.apache.avro.Schema.Parser().parse(schema.toString());
sendStreamEvent(streamId, createAvroEvent(avroSchema, "userX", 5, 3.14));
sendStreamEvent(streamId, createAvroEvent(avroSchema, "userX", 10, 2.34));
sendStreamEvent(streamId, createAvroEvent(avroSchema, "userY", 1, 1.23));
sendStreamEvent(streamId, createAvroEvent(avroSchema, "userZ", 50, 45.67));
sendStreamEvent(streamId, createAvroEvent(avroSchema, "userZ", 100, 98.76));
Double xPrice = 5 * 3.14 + 10 * 2.34;
Double yPrice = 1.23;
Double zPrice = 50 * 45.67 + 100 * 98.76;
ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, "SELECT `user`, sum(num) as total_num, sum(price * num) as total_price " + "FROM " + getTableName(streamId) + " GROUP BY `user` ORDER BY total_price DESC").get();
Assert.assertTrue(result.hasNext());
Assert.assertEquals(Lists.newArrayList(new ColumnDesc("user", "STRING", 1, null), new ColumnDesc("total_num", "BIGINT", 2, null), new ColumnDesc("total_price", "DOUBLE", 3, null)), result.getResultSchema());
// should get 3 rows
// first row should be for userZ
List<Object> rowColumns = result.next().getColumns();
// toString b/c avro returns a utf8 object for strings
Assert.assertEquals("userZ", rowColumns.get(0).toString());
Assert.assertEquals(150L, rowColumns.get(1));
Assert.assertTrue(Math.abs(zPrice - (Double) rowColumns.get(2)) < 0.0000001);
// 2nd row, should be userX
rowColumns = result.next().getColumns();
Assert.assertEquals("userX", rowColumns.get(0).toString());
Assert.assertEquals(15L, rowColumns.get(1));
Assert.assertTrue(Math.abs(xPrice - (Double) rowColumns.get(2)) < 0.0000001);
// 3rd row, should be userY
rowColumns = result.next().getColumns();
Assert.assertEquals("userY", rowColumns.get(0).toString());
Assert.assertEquals(1L, rowColumns.get(1));
Assert.assertTrue(Math.abs(yPrice - (Double) rowColumns.get(2)) < 0.0000001);
// shouldn't be any more results
Assert.assertFalse(result.hasNext());
} finally {
dropStream(streamId);
}
}
use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.
the class HiveExploreServiceStreamTest method testStreamNameWithHyphen.
@Test
public void testStreamNameWithHyphen() throws Exception {
StreamId streamId = NAMESPACE_ID.stream("stream-test");
createStream(streamId);
try {
sendStreamEvent(streamId, Collections.<String, String>emptyMap(), Bytes.toBytes("Dummy"));
// Streams with '-' are replaced with '_'
String cleanStreamName = "stream_test";
runCommand(NAMESPACE_ID, "select body from " + getTableName(cleanStreamName), true, Lists.newArrayList(new ColumnDesc("body", "STRING", 1, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("Dummy"))));
} finally {
dropStream(streamId);
}
}
use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.
the class HiveExploreServiceStreamTest method start.
@BeforeClass
public static void start() throws Exception {
// use leveldb implementations, since stream input format examines the filesystem
// to determine input splits. also enable authorization.
initialize(CConfiguration.create(), tmpFolder, true, true);
authorizer = injector.getInstance(AuthorizerInstantiator.class).get();
SecurityRequestContext.setUserId(USER.getName());
grantAndAssertSuccess(NAMESPACE_ID, USER, EnumSet.allOf(Action.class));
StreamId streamId = NAMESPACE_ID.stream(streamName);
createStream(streamId);
sendStreamEvent(streamId, headers, Bytes.toBytes(body1));
sendStreamEvent(streamId, headers, Bytes.toBytes(body2));
sendStreamEvent(streamId, headers, Bytes.toBytes(body3));
}
use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.
the class HiveExploreServiceStreamTest method testJoinOnStreams.
@Test
public void testJoinOnStreams() throws Exception {
StreamId streamId1 = NAMESPACE_ID.stream("jointest1");
StreamId streamId2 = NAMESPACE_ID.stream("jointest2");
createStream(streamId1);
try {
createStream(streamId2);
try {
sendStreamEvent(streamId1, Collections.<String, String>emptyMap(), Bytes.toBytes("ABC"));
sendStreamEvent(streamId1, Collections.<String, String>emptyMap(), Bytes.toBytes("XYZ"));
sendStreamEvent(streamId2, Collections.<String, String>emptyMap(), Bytes.toBytes("ABC"));
sendStreamEvent(streamId2, Collections.<String, String>emptyMap(), Bytes.toBytes("DEF"));
runCommand(NAMESPACE_ID, "select " + getTableName(streamId1) + ".body, " + getTableName(streamId2) + ".body" + " from " + getTableName(streamId1) + " join " + getTableName(streamId2) + " on (" + getTableName(streamId1) + ".body = " + getTableName(streamId2) + ".body)", true, Lists.newArrayList(new ColumnDesc(getTableName(streamId1) + ".body", "STRING", 1, null), new ColumnDesc(getTableName(streamId2) + ".body", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("ABC", "ABC"))));
} finally {
dropStream(streamId2);
}
} finally {
dropStream(streamId1);
}
}
use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.
the class StreamSerDe method initialize.
// initialize gets called multiple times by Hive. It may seem like a good idea to put additional settings into
// the conf, but be very careful when doing so. If there are multiple hive tables involved in a query, initialize
// for each table is called before input splits are fetched for any table. It is therefore not safe to put anything
// the input format may need into conf in this method. Rather, use StorageHandler's method to place needed config
// into the properties map there, which will get passed here and also copied into the job conf for the input
// format to consume.
@Override
public void initialize(Configuration conf, Properties properties) throws SerDeException {
// The columns property comes from the Hive metastore, which has it from the create table statement
// It is then important that this schema be accurate and in the right order - the same order as
// object inspectors will reflect them.
String streamName = properties.getProperty(Constants.Explore.STREAM_NAME);
String streamNamespace = properties.getProperty(Constants.Explore.STREAM_NAMESPACE);
// to avoid a null pointer exception that prevents dropping a table, we handle the null namespace case here.
if (streamNamespace == null) {
// we also still need an ObjectInspector as Hive uses it to check what columns the table has.
this.inspector = new ObjectDeserializer(properties, null).getInspector();
return;
}
StreamId streamId = new StreamId(streamNamespace, streamName);
try (ContextManager.Context context = ContextManager.getContext(conf)) {
Schema schema = null;
// Because it calls initialize just to get the object inspector
if (context != null) {
// Get the stream format from the stream config.
FormatSpecification formatSpec = getFormatSpec(properties, streamId, context);
this.streamFormat = (AbstractStreamEventRecordFormat) RecordFormats.createInitializedFormat(formatSpec);
schema = formatSpec.getSchema();
}
this.deserializer = new ObjectDeserializer(properties, schema, BODY_OFFSET);
this.inspector = deserializer.getInspector();
} catch (UnsupportedTypeException e) {
// this should have been validated up front when schema was set on the stream.
// if we hit this something went wrong much earlier.
LOG.error("Schema unsupported by format.", e);
throw new SerDeException("Schema unsupported by format.", e);
} catch (IOException e) {
LOG.error("Could not get the config for stream {}.", streamName, e);
throw new SerDeException("Could not get the config for stream " + streamName, e);
} catch (Exception e) {
LOG.error("Could not create the format for stream {}.", streamName, e);
throw new SerDeException("Could not create the format for stream " + streamName, e);
}
}
Aggregations