Search in sources :

Example 21 with FormatSpecification

use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class DelimitedStringsRecordFormatTest method testSimpleArraySchemaValidation.

@Test
public void testSimpleArraySchemaValidation() throws UnsupportedTypeException {
    Schema schema = Schema.recordOf("event", Schema.Field.of("f1", Schema.of(Schema.Type.BOOLEAN)), Schema.Field.of("f2", Schema.of(Schema.Type.INT)), Schema.Field.of("f3", Schema.of(Schema.Type.FLOAT)), Schema.Field.of("f4", Schema.of(Schema.Type.DOUBLE)), Schema.Field.of("f5", Schema.of(Schema.Type.BYTES)), Schema.Field.of("f6", Schema.of(Schema.Type.STRING)), Schema.Field.of("f7", Schema.arrayOf(Schema.of(Schema.Type.STRING))));
    FormatSpecification formatSpec = new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(), schema, Collections.<String, String>emptyMap());
    DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
    format.initialize(formatSpec);
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) Test(org.junit.Test)

Example 22 with FormatSpecification

use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class GrokRecordFormatTest method testSimple.

@Test
public void testSimple() throws Exception {
    Schema schema = Schema.recordOf("streamEvent", Schema.Field.of("user", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("body", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    FormatSpecification spec = new FormatSpecification(Formats.GROK, schema, GrokRecordFormat.settings("%{USER:user}:%{GREEDYDATA:body}"));
    RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(spec);
    String message = "nitin:falkfjaksjf fkafjalkf fa fasfsalfsaf af afaslkfjasf asf af asf";
    StructuredRecord record = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(message))));
    Assert.assertEquals("nitin", record.get("user"));
    Assert.assertEquals("falkfjaksjf fkafjalkf fa fasfsalfsaf af afaslkfjasf asf af asf", record.get("body"));
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 23 with FormatSpecification

use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class DelimitedStringsRecordFormatTest method testDelimiter.

@Test
public void testDelimiter() throws UnsupportedTypeException, UnexpectedFormatException {
    DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
    FormatSpecification spec = new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(), null, ImmutableMap.of(DelimitedStringsRecordFormat.DELIMITER, " "));
    format.initialize(spec);
    String body = "userX actionY itemZ";
    StructuredRecord output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(body))));
    String[] actual = output.get("body");
    String[] expected = body.split(" ");
    Assert.assertArrayEquals(expected, actual);
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 24 with FormatSpecification

use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class StreamViewClientTest method testAll.

@Test
public void testAll() throws Exception {
    NamespaceId namespace = NamespaceId.DEFAULT;
    StreamId stream = namespace.stream("foo");
    StreamViewId view1 = stream.view("view1");
    LOG.info("Creating stream {}", stream);
    streamClient.create(stream);
    try {
        LOG.info("Sending events to stream {}", stream);
        streamClient.sendEvent(stream, "a,b,c");
        streamClient.sendEvent(stream, "d,e,f");
        streamClient.sendEvent(stream, "g,h,i");
        LOG.info("Verifying that no views exist yet");
        Assert.assertEquals(ImmutableList.of(), streamViewClient.list(stream));
        try {
            streamViewClient.get(view1);
            Assert.fail();
        } catch (NotFoundException e) {
            Assert.assertEquals(view1, e.getObject());
        }
        FormatSpecification format = new FormatSpecification("csv", Schema.recordOf("foo", Schema.Field.of("one", Schema.of(Schema.Type.STRING)), Schema.Field.of("two", Schema.of(Schema.Type.STRING)), Schema.Field.of("three", Schema.of(Schema.Type.STRING))));
        ViewSpecification viewSpecification = new ViewSpecification(format, "firsttable");
        LOG.info("Creating view {} with config {}", view1, GSON.toJson(viewSpecification));
        Assert.assertEquals(true, streamViewClient.createOrUpdate(view1, viewSpecification));
        LOG.info("Verifying that view {} has been created", view1);
        Assert.assertEquals(new ViewDetail(view1.getView(), viewSpecification), streamViewClient.get(view1));
        Assert.assertEquals(ImmutableList.of(view1.getView()), streamViewClient.list(stream));
        FormatSpecification newFormat = new FormatSpecification("csv", Schema.recordOf("foo", Schema.Field.of("one", Schema.of(Schema.Type.STRING)), Schema.Field.of("two", Schema.of(Schema.Type.STRING)), Schema.Field.of("three", Schema.of(Schema.Type.STRING))));
        ViewSpecification newViewSpecification = new ViewSpecification(newFormat, "firsttable");
        LOG.info("Updating view {} with config {}", view1, GSON.toJson(newViewSpecification));
        Assert.assertEquals(false, streamViewClient.createOrUpdate(view1, newViewSpecification));
        LOG.info("Verifying that view {} has been updated", view1);
        Assert.assertEquals(new ViewDetail(view1.getView(), newViewSpecification), streamViewClient.get(view1));
        Assert.assertEquals(ImmutableList.of(view1.getView()), streamViewClient.list(stream));
        ExploreExecutionResult executionResult = queryClient.execute(view1.getParent().getParent(), "select one,two,three from firsttable").get();
        Assert.assertNotNull(executionResult.getResultSchema());
        Assert.assertEquals(3, executionResult.getResultSchema().size());
        Assert.assertEquals("one", executionResult.getResultSchema().get(0).getName());
        Assert.assertEquals("two", executionResult.getResultSchema().get(1).getName());
        Assert.assertEquals("three", executionResult.getResultSchema().get(2).getName());
        List<QueryResult> results = Lists.newArrayList(executionResult);
        Assert.assertNotNull(results);
        Assert.assertEquals(3, results.size());
        Assert.assertEquals("a", results.get(0).getColumns().get(0));
        Assert.assertEquals("b", results.get(0).getColumns().get(1));
        Assert.assertEquals("c", results.get(0).getColumns().get(2));
        Assert.assertEquals("d", results.get(1).getColumns().get(0));
        Assert.assertEquals("e", results.get(1).getColumns().get(1));
        Assert.assertEquals("f", results.get(1).getColumns().get(2));
        Assert.assertEquals("g", results.get(2).getColumns().get(0));
        Assert.assertEquals("h", results.get(2).getColumns().get(1));
        Assert.assertEquals("i", results.get(2).getColumns().get(2));
        LOG.info("Deleting view {}", view1);
        streamViewClient.delete(view1);
        LOG.info("Verifying that view {] has been deleted", view1);
        try {
            streamViewClient.get(view1);
            Assert.fail();
        } catch (NotFoundException e) {
            Assert.assertEquals(view1, e.getObject());
        }
        Assert.assertEquals(ImmutableList.of(), streamViewClient.list(stream));
    } finally {
        streamClient.delete(stream);
    }
    // test deleting stream with a view
    LOG.info("Creating stream {}", stream);
    streamClient.create(stream);
    try {
        FormatSpecification format = new FormatSpecification("csv", Schema.recordOf("foo", Schema.Field.of("one", Schema.of(Schema.Type.STRING)), Schema.Field.of("two", Schema.of(Schema.Type.STRING)), Schema.Field.of("three", Schema.of(Schema.Type.STRING))));
        ViewSpecification viewSpecification = new ViewSpecification(format, "firsttable");
        LOG.info("Creating view {} with config {}", view1, GSON.toJson(viewSpecification));
        Assert.assertEquals(true, streamViewClient.createOrUpdate(view1, viewSpecification));
    } finally {
        streamClient.delete(stream);
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) QueryResult(co.cask.cdap.proto.QueryResult) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) NotFoundException(co.cask.cdap.common.NotFoundException) ViewSpecification(co.cask.cdap.proto.ViewSpecification) NamespaceId(co.cask.cdap.proto.id.NamespaceId) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) ViewDetail(co.cask.cdap.proto.ViewDetail) StreamViewId(co.cask.cdap.proto.id.StreamViewId) Test(org.junit.Test)

Example 25 with FormatSpecification

use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class AvroRecordFormatTest method testFlatRecord.

@Test
public void testFlatRecord() throws Exception {
    Schema schema = Schema.recordOf("record", Schema.Field.of("int", Schema.of(Schema.Type.INT)), Schema.Field.of("long", Schema.of(Schema.Type.LONG)), Schema.Field.of("boolean", Schema.of(Schema.Type.BOOLEAN)), Schema.Field.of("bytes", Schema.of(Schema.Type.BYTES)), Schema.Field.of("double", Schema.of(Schema.Type.DOUBLE)), Schema.Field.of("float", Schema.of(Schema.Type.FLOAT)), Schema.Field.of("string", Schema.of(Schema.Type.STRING)), Schema.Field.of("array", Schema.arrayOf(Schema.of(Schema.Type.INT))), Schema.Field.of("map", Schema.mapOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.INT))), Schema.Field.of("nullable", Schema.unionOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.NULL))), Schema.Field.of("nullable2", Schema.unionOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.NULL))));
    FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, schema, Collections.<String, String>emptyMap());
    org.apache.avro.Schema avroSchema = convertSchema(schema);
    GenericRecord record = new GenericRecordBuilder(avroSchema).set("int", Integer.MAX_VALUE).set("long", Long.MAX_VALUE).set("boolean", false).set("bytes", Charsets.UTF_8.encode("hello world")).set("double", Double.MAX_VALUE).set("float", Float.MAX_VALUE).set("string", "foo bar").set("array", Lists.newArrayList(1, 2, 3)).set("map", ImmutableMap.of("k1", 1, "k2", 2)).set("nullable", null).set("nullable2", "Hello").build();
    RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(formatSpecification);
    StructuredRecord actual = format.read(toStreamEvent(record));
    Assert.assertEquals(Integer.MAX_VALUE, actual.get("int"));
    Assert.assertEquals(Long.MAX_VALUE, actual.get("long"));
    Assert.assertFalse((Boolean) actual.get("boolean"));
    Assert.assertArrayEquals(Bytes.toBytes("hello world"), Bytes.toBytes((ByteBuffer) actual.get("bytes")));
    Assert.assertEquals(Double.MAX_VALUE, actual.get("double"));
    Assert.assertEquals(Float.MAX_VALUE, actual.get("float"));
    Assert.assertEquals("foo bar", actual.get("string"));
    Assert.assertEquals(Lists.newArrayList(1, 2, 3), actual.get("array"));
    assertMapEquals(ImmutableMap.<String, Object>of("k1", 1, "k2", 2), (Map<Object, Object>) actual.get("map"));
    Assert.assertNull(actual.get("nullable"));
    Assert.assertEquals("Hello", actual.get("nullable2"));
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) ByteBuffer(java.nio.ByteBuffer) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Aggregations

FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)61 Test (org.junit.Test)43 Schema (co.cask.cdap.api.data.schema.Schema)32 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)19 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)17 StreamId (co.cask.cdap.proto.id.StreamId)16 ViewSpecification (co.cask.cdap.proto.ViewSpecification)14 StreamProperties (co.cask.cdap.proto.StreamProperties)11 StreamViewId (co.cask.cdap.proto.id.StreamViewId)11 DatasetId (co.cask.cdap.proto.id.DatasetId)6 NamespaceMeta (co.cask.cdap.proto.NamespaceMeta)5 NamespaceId (co.cask.cdap.proto.id.NamespaceId)5 MetadataSearchResultRecord (co.cask.cdap.proto.metadata.MetadataSearchResultRecord)5 IOException (java.io.IOException)5 UnsupportedTypeException (co.cask.cdap.api.data.schema.UnsupportedTypeException)4 NotFoundException (co.cask.cdap.common.NotFoundException)3 ApplicationId (co.cask.cdap.proto.id.ApplicationId)3 ArtifactId (co.cask.cdap.proto.id.ArtifactId)3 ProgramId (co.cask.cdap.proto.id.ProgramId)3 HttpURLConnection (java.net.HttpURLConnection)3