Search in sources :

Example 36 with StreamId

use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.

the class HiveExploreServiceStreamTest method testAvroFormattedStream.

@Test
public void testAvroFormattedStream() throws Exception {
    StreamId streamId = NAMESPACE_ID.stream("avroStream");
    createStream(streamId);
    try {
        Schema schema = Schema.recordOf("purchase", Schema.Field.of("user", Schema.of(Schema.Type.STRING)), Schema.Field.of("num", Schema.of(Schema.Type.INT)), Schema.Field.of("price", Schema.of(Schema.Type.DOUBLE)));
        FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, schema, Collections.<String, String>emptyMap());
        StreamProperties properties = new StreamProperties(Long.MAX_VALUE, formatSpecification, 1000);
        setStreamProperties(NAMESPACE_ID.getNamespace(), "avroStream", properties);
        // our schemas are compatible
        org.apache.avro.Schema avroSchema = new org.apache.avro.Schema.Parser().parse(schema.toString());
        sendStreamEvent(streamId, createAvroEvent(avroSchema, "userX", 5, 3.14));
        sendStreamEvent(streamId, createAvroEvent(avroSchema, "userX", 10, 2.34));
        sendStreamEvent(streamId, createAvroEvent(avroSchema, "userY", 1, 1.23));
        sendStreamEvent(streamId, createAvroEvent(avroSchema, "userZ", 50, 45.67));
        sendStreamEvent(streamId, createAvroEvent(avroSchema, "userZ", 100, 98.76));
        Double xPrice = 5 * 3.14 + 10 * 2.34;
        Double yPrice = 1.23;
        Double zPrice = 50 * 45.67 + 100 * 98.76;
        ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, "SELECT `user`, sum(num) as total_num, sum(price * num) as total_price " + "FROM " + getTableName(streamId) + " GROUP BY `user` ORDER BY total_price DESC").get();
        Assert.assertTrue(result.hasNext());
        Assert.assertEquals(Lists.newArrayList(new ColumnDesc("user", "STRING", 1, null), new ColumnDesc("total_num", "BIGINT", 2, null), new ColumnDesc("total_price", "DOUBLE", 3, null)), result.getResultSchema());
        // should get 3 rows
        // first row should be for userZ
        List<Object> rowColumns = result.next().getColumns();
        // toString b/c avro returns a utf8 object for strings
        Assert.assertEquals("userZ", rowColumns.get(0).toString());
        Assert.assertEquals(150L, rowColumns.get(1));
        Assert.assertTrue(Math.abs(zPrice - (Double) rowColumns.get(2)) < 0.0000001);
        // 2nd row, should be userX
        rowColumns = result.next().getColumns();
        Assert.assertEquals("userX", rowColumns.get(0).toString());
        Assert.assertEquals(15L, rowColumns.get(1));
        Assert.assertTrue(Math.abs(xPrice - (Double) rowColumns.get(2)) < 0.0000001);
        // 3rd row, should be userY
        rowColumns = result.next().getColumns();
        Assert.assertEquals("userY", rowColumns.get(0).toString());
        Assert.assertEquals(1L, rowColumns.get(1));
        Assert.assertTrue(Math.abs(yPrice - (Double) rowColumns.get(2)) < 0.0000001);
        // shouldn't be any more results
        Assert.assertFalse(result.hasNext());
    } finally {
        dropStream(streamId);
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) Schema(co.cask.cdap.api.data.schema.Schema) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StreamProperties(co.cask.cdap.proto.StreamProperties) ColumnDesc(co.cask.cdap.proto.ColumnDesc) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) Test(org.junit.Test)

Example 37 with StreamId

use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.

the class HiveExploreServiceStreamTest method testStreamNameWithHyphen.

@Test
public void testStreamNameWithHyphen() throws Exception {
    StreamId streamId = NAMESPACE_ID.stream("stream-test");
    createStream(streamId);
    try {
        sendStreamEvent(streamId, Collections.<String, String>emptyMap(), Bytes.toBytes("Dummy"));
        // Streams with '-' are replaced with '_'
        String cleanStreamName = "stream_test";
        runCommand(NAMESPACE_ID, "select body from " + getTableName(cleanStreamName), true, Lists.newArrayList(new ColumnDesc("body", "STRING", 1, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("Dummy"))));
    } finally {
        dropStream(streamId);
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) QueryResult(co.cask.cdap.proto.QueryResult) ColumnDesc(co.cask.cdap.proto.ColumnDesc) Test(org.junit.Test)

Example 38 with StreamId

use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.

the class HiveExploreServiceStreamTest method start.

@BeforeClass
public static void start() throws Exception {
    // use leveldb implementations, since stream input format examines the filesystem
    // to determine input splits. also enable authorization.
    initialize(CConfiguration.create(), tmpFolder, true, true);
    authorizer = injector.getInstance(AuthorizerInstantiator.class).get();
    SecurityRequestContext.setUserId(USER.getName());
    grantAndAssertSuccess(NAMESPACE_ID, USER, EnumSet.allOf(Action.class));
    StreamId streamId = NAMESPACE_ID.stream(streamName);
    createStream(streamId);
    sendStreamEvent(streamId, headers, Bytes.toBytes(body1));
    sendStreamEvent(streamId, headers, Bytes.toBytes(body2));
    sendStreamEvent(streamId, headers, Bytes.toBytes(body3));
}
Also used : Action(co.cask.cdap.proto.security.Action) StreamId(co.cask.cdap.proto.id.StreamId) BeforeClass(org.junit.BeforeClass)

Example 39 with StreamId

use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.

the class HiveExploreServiceStreamTest method testJoinOnStreams.

@Test
public void testJoinOnStreams() throws Exception {
    StreamId streamId1 = NAMESPACE_ID.stream("jointest1");
    StreamId streamId2 = NAMESPACE_ID.stream("jointest2");
    createStream(streamId1);
    try {
        createStream(streamId2);
        try {
            sendStreamEvent(streamId1, Collections.<String, String>emptyMap(), Bytes.toBytes("ABC"));
            sendStreamEvent(streamId1, Collections.<String, String>emptyMap(), Bytes.toBytes("XYZ"));
            sendStreamEvent(streamId2, Collections.<String, String>emptyMap(), Bytes.toBytes("ABC"));
            sendStreamEvent(streamId2, Collections.<String, String>emptyMap(), Bytes.toBytes("DEF"));
            runCommand(NAMESPACE_ID, "select " + getTableName(streamId1) + ".body, " + getTableName(streamId2) + ".body" + " from " + getTableName(streamId1) + " join " + getTableName(streamId2) + " on (" + getTableName(streamId1) + ".body = " + getTableName(streamId2) + ".body)", true, Lists.newArrayList(new ColumnDesc(getTableName(streamId1) + ".body", "STRING", 1, null), new ColumnDesc(getTableName(streamId2) + ".body", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("ABC", "ABC"))));
        } finally {
            dropStream(streamId2);
        }
    } finally {
        dropStream(streamId1);
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) QueryResult(co.cask.cdap.proto.QueryResult) ColumnDesc(co.cask.cdap.proto.ColumnDesc) Test(org.junit.Test)

Example 40 with StreamId

use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.

the class StreamSerDe method initialize.

// initialize gets called multiple times by Hive. It may seem like a good idea to put additional settings into
// the conf, but be very careful when doing so. If there are multiple hive tables involved in a query, initialize
// for each table is called before input splits are fetched for any table. It is therefore not safe to put anything
// the input format may need into conf in this method. Rather, use StorageHandler's method to place needed config
// into the properties map there, which will get passed here and also copied into the job conf for the input
// format to consume.
@Override
public void initialize(Configuration conf, Properties properties) throws SerDeException {
    // The columns property comes from the Hive metastore, which has it from the create table statement
    // It is then important that this schema be accurate and in the right order - the same order as
    // object inspectors will reflect them.
    String streamName = properties.getProperty(Constants.Explore.STREAM_NAME);
    String streamNamespace = properties.getProperty(Constants.Explore.STREAM_NAMESPACE);
    // to avoid a null pointer exception that prevents dropping a table, we handle the null namespace case here.
    if (streamNamespace == null) {
        // we also still need an ObjectInspector as Hive uses it to check what columns the table has.
        this.inspector = new ObjectDeserializer(properties, null).getInspector();
        return;
    }
    StreamId streamId = new StreamId(streamNamespace, streamName);
    try (ContextManager.Context context = ContextManager.getContext(conf)) {
        Schema schema = null;
        // Because it calls initialize just to get the object inspector
        if (context != null) {
            // Get the stream format from the stream config.
            FormatSpecification formatSpec = getFormatSpec(properties, streamId, context);
            this.streamFormat = (AbstractStreamEventRecordFormat) RecordFormats.createInitializedFormat(formatSpec);
            schema = formatSpec.getSchema();
        }
        this.deserializer = new ObjectDeserializer(properties, schema, BODY_OFFSET);
        this.inspector = deserializer.getInspector();
    } catch (UnsupportedTypeException e) {
        // this should have been validated up front when schema was set on the stream.
        // if we hit this something went wrong much earlier.
        LOG.error("Schema unsupported by format.", e);
        throw new SerDeException("Schema unsupported by format.", e);
    } catch (IOException e) {
        LOG.error("Could not get the config for stream {}.", streamName, e);
        throw new SerDeException("Could not get the config for stream " + streamName, e);
    } catch (Exception e) {
        LOG.error("Could not create the format for stream {}.", streamName, e);
        throw new SerDeException("Could not create the format for stream " + streamName, e);
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) ContextManager(co.cask.cdap.hive.context.ContextManager) Schema(co.cask.cdap.api.data.schema.Schema) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) UnsupportedTypeException(co.cask.cdap.api.data.schema.UnsupportedTypeException) IOException(java.io.IOException) ObjectDeserializer(co.cask.cdap.hive.serde.ObjectDeserializer) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) UnsupportedTypeException(co.cask.cdap.api.data.schema.UnsupportedTypeException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Aggregations

StreamId (co.cask.cdap.proto.id.StreamId)166 Test (org.junit.Test)88 DatasetId (co.cask.cdap.proto.id.DatasetId)33 ProgramId (co.cask.cdap.proto.id.ProgramId)30 NamespaceId (co.cask.cdap.proto.id.NamespaceId)27 Path (javax.ws.rs.Path)27 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)24 ApplicationId (co.cask.cdap.proto.id.ApplicationId)22 IOException (java.io.IOException)20 StreamProperties (co.cask.cdap.proto.StreamProperties)17 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)16 StreamViewId (co.cask.cdap.proto.id.StreamViewId)16 Location (org.apache.twill.filesystem.Location)15 StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)12 NamespaceMeta (co.cask.cdap.proto.NamespaceMeta)12 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)11 ViewSpecification (co.cask.cdap.proto.ViewSpecification)10 MetadataSearchResultRecord (co.cask.cdap.proto.metadata.MetadataSearchResultRecord)10 Action (co.cask.cdap.proto.security.Action)10 GET (javax.ws.rs.GET)10