Search in sources :

Example 11 with FormatSpecification

use of in project cdap by caskdata.

the class StreamSerDe method initialize.

// initialize gets called multiple times by Hive. It may seem like a good idea to put additional settings into
// the conf, but be very careful when doing so. If there are multiple hive tables involved in a query, initialize
// for each table is called before input splits are fetched for any table. It is therefore not safe to put anything
// the input format may need into conf in this method. Rather, use StorageHandler's method to place needed config
// into the properties map there, which will get passed here and also copied into the job conf for the input
// format to consume.
public void initialize(Configuration conf, Properties properties) throws SerDeException {
    // The columns property comes from the Hive metastore, which has it from the create table statement
    // It is then important that this schema be accurate and in the right order - the same order as
    // object inspectors will reflect them.
    String streamName = properties.getProperty(Constants.Explore.STREAM_NAME);
    String streamNamespace = properties.getProperty(Constants.Explore.STREAM_NAMESPACE);
    // to avoid a null pointer exception that prevents dropping a table, we handle the null namespace case here.
    if (streamNamespace == null) {
        // we also still need an ObjectInspector as Hive uses it to check what columns the table has.
        this.inspector = new ObjectDeserializer(properties, null).getInspector();
    StreamId streamId = new StreamId(streamNamespace, streamName);
    try (ContextManager.Context context = ContextManager.getContext(conf)) {
        Schema schema = null;
        // Because it calls initialize just to get the object inspector
        if (context != null) {
            // Get the stream format from the stream config.
            FormatSpecification formatSpec = getFormatSpec(properties, streamId, context);
            this.streamFormat = (AbstractStreamEventRecordFormat) RecordFormats.createInitializedFormat(formatSpec);
            schema = formatSpec.getSchema();
        this.deserializer = new ObjectDeserializer(properties, schema, BODY_OFFSET);
        this.inspector = deserializer.getInspector();
    } catch (UnsupportedTypeException e) {
        // this should have been validated up front when schema was set on the stream.
        // if we hit this something went wrong much earlier.
        LOG.error("Schema unsupported by format.", e);
        throw new SerDeException("Schema unsupported by format.", e);
    } catch (IOException e) {
        LOG.error("Could not get the config for stream {}.", streamName, e);
        throw new SerDeException("Could not get the config for stream " + streamName, e);
    } catch (Exception e) {
        LOG.error("Could not create the format for stream {}.", streamName, e);
        throw new SerDeException("Could not create the format for stream " + streamName, e);
Also used : StreamId( ContextManager(co.cask.cdap.hive.context.ContextManager) Schema( FormatSpecification( UnsupportedTypeException( IOException( ObjectDeserializer(co.cask.cdap.hive.serde.ObjectDeserializer) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) UnsupportedTypeException( IOException( SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 12 with FormatSpecification

use of in project cdap by caskdata.

the class AbstractStreamInputFormat method getInitializedFormat.

private RecordFormat<StreamEvent, V> getInitializedFormat(Configuration conf) throws UnsupportedTypeException, IllegalAccessException, ClassNotFoundException, InstantiationException {
    String formatSpecStr = conf.get(BODY_FORMAT);
    if (formatSpecStr == null || formatSpecStr.isEmpty()) {
        throw new IllegalArgumentException(BODY_FORMAT + " must be set in the configuration in order to use a format for the stream body.");
    FormatSpecification formatSpec = GSON.fromJson(formatSpecStr, FormatSpecification.class);
    return RecordFormats.createInitializedFormat(formatSpec);
Also used : FormatSpecification(

Example 13 with FormatSpecification

use of in project cdap by caskdata.

the class ViewSystemMetadataWriter method getSchemaToAdd.

protected String getSchemaToAdd() {
    Schema schema = viewSpec.getFormat().getSchema();
    if (schema == null) {
        FormatSpecification format = viewSpec.getFormat();
        RecordFormat<Object, Object> initializedFormat;
        try {
            initializedFormat = RecordFormats.createInitializedFormat(format);
            schema = initializedFormat.getSchema();
        } catch (IllegalAccessException | InstantiationException | UnsupportedTypeException | ClassNotFoundException e) {
            LOG.debug("Exception: ", e);
            LOG.warn("Exception while determining schema for view {}. View {} will not contain schema as metadata.", viewId, viewId);
    return schema == null ? null : schema.toString();
Also used : Schema( FormatSpecification( UnsupportedTypeException( Nullable(javax.annotation.Nullable)

Example 14 with FormatSpecification

use of in project cdap by caskdata.

the class ViewStoreTestBase method testExploreViewStore.

public void testExploreViewStore() throws Exception {
    ViewStore store = getExploreViewStore();
    StreamId stream = new StreamId("foo", "s");
    StreamViewId view1 = stream.view("bar1");
    StreamViewId view2 = stream.view("bar2");
    StreamViewId view3 = stream.view("bar3");
    ViewSpecification properties = new ViewSpecification(new FormatSpecification("a", createSchema("name string, props map<string, string>")));
    Assert.assertTrue("view1 should be created", store.createOrUpdate(view1, properties));
    Assert.assertTrue("view1 should exist", store.exists(view1));
    Assert.assertEquals("view1 should have the initial properties", properties, new ViewSpecification(store.get(view1)));
    ViewSpecification properties2 = new ViewSpecification(new FormatSpecification("b", createSchema("name string, age int")));
    Assert.assertFalse("view1 should be updated", store.createOrUpdate(view1, properties2));
    Assert.assertTrue("view1 should exist", store.exists(view1));
    Assert.assertEquals("view1 should have the updated properties", properties2, new ViewSpecification(store.get(view1)));
    Assert.assertTrue("view2 should be created", store.createOrUpdate(view2, properties));
    Assert.assertTrue("view3 should be created", store.createOrUpdate(view3, properties));
    Assert.assertEquals("view1, view2, and view3 should be in the stream", ImmutableSet.of(view1, view2, view3), ImmutableSet.copyOf(store.list(stream)));
    Assert.assertEquals(ImmutableList.of(), store.list(stream));
Also used : StreamId( FormatSpecification( ViewSpecification(co.cask.cdap.proto.ViewSpecification) StreamViewId( Test(org.junit.Test)

Example 15 with FormatSpecification

use of in project cdap by caskdata.

the class ExploreExecutorHttpHandler method enableStream.

public void enableStream(HttpRequest request, HttpResponder responder, @PathParam("namespace-id") String namespace, @PathParam("stream") String streamName, @PathParam("table") final String tableName) throws Exception {
    final StreamId streamId = new StreamId(namespace, streamName);
    try (Reader reader = new InputStreamReader(new ChannelBufferInputStream(request.getContent()))) {
        final FormatSpecification format = GSON.fromJson(reader, FormatSpecification.class);
        if (format == null) {
            throw new BadRequestException("Expected format in the body");
        QueryHandle handle = impersonator.doAs(streamId, new Callable<QueryHandle>() {

            public QueryHandle call() throws Exception {
                return exploreTableManager.enableStream(tableName, streamId, format);
        JsonObject json = new JsonObject();
        json.addProperty("handle", handle.getHandle());
        responder.sendJson(HttpResponseStatus.OK, json);
    } catch (UnsupportedTypeException e) {
        LOG.error("Exception while generating create statement for stream {}", streamName, e);
        responder.sendString(HttpResponseStatus.BAD_REQUEST, e.getMessage());
Also used : StreamId( InputStreamReader( FormatSpecification( Reader( InputStreamReader( BadRequestException(co.cask.cdap.common.BadRequestException) JsonObject( UnsupportedTypeException( ChannelBufferInputStream(org.jboss.netty.buffer.ChannelBufferInputStream) QueryHandle(co.cask.cdap.proto.QueryHandle) BadRequestException(co.cask.cdap.common.BadRequestException) ExploreException(co.cask.cdap.explore.service.ExploreException) SQLException(java.sql.SQLException) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) JsonSyntaxException( UnsupportedTypeException( IOException( Path( AuditPolicy( POST(


FormatSpecification ( Test (org.junit.Test)43 Schema ( StructuredRecord ( StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)17 StreamId ( ViewSpecification (co.cask.cdap.proto.ViewSpecification)14 StreamProperties (co.cask.cdap.proto.StreamProperties)11 StreamViewId ( DatasetId ( NamespaceMeta (co.cask.cdap.proto.NamespaceMeta)5 NamespaceId ( MetadataSearchResultRecord (co.cask.cdap.proto.metadata.MetadataSearchResultRecord)5 IOException ( UnsupportedTypeException ( NotFoundException (co.cask.cdap.common.NotFoundException)3 ApplicationId ( ArtifactId ( ProgramId ( HttpURLConnection (