Search in sources :

Example 51 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class ObjectDeserializer method getRecordField.

// get a field from the object using the get method if the object is a StructuredRecord,
// or using reflection if it is not.
private Object getRecordField(Object record, String fieldName) throws NoSuchFieldException, IllegalAccessException {
    if (record instanceof StructuredRecord) {
        return ((StructuredRecord) record).get(fieldName);
    }
    Class recordClass = record.getClass();
    Field field = recordClass.getDeclaredField(fieldName);
    field.setAccessible(true);
    return field.get(record);
}
Also used : Field(java.lang.reflect.Field) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord)

Example 52 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class ETLWorkerTest method testDAG.

@Test
public void testDAG() throws Exception {
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("x", Schema.of(Schema.Type.INT)));
    StructuredRecord record1 = StructuredRecord.builder(schema).set("x", 1).build();
    StructuredRecord record2 = StructuredRecord.builder(schema).set("x", 2).build();
    StructuredRecord record3 = StructuredRecord.builder(schema).set("x", 3).build();
    List<StructuredRecord> input = ImmutableList.of(record1, record2, record3);
    /*
     *            ----- value filter ------- sink1
     *           |
     * source --------- double --------
     *           |                     |---- sink2
     *            ----- identity ------
     */
    File sink1Out = TMP_FOLDER.newFolder();
    File sink2Out = TMP_FOLDER.newFolder();
    ETLRealtimeConfig etlConfig = ETLRealtimeConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(input))).addStage(new ETLStage("sink1", MockSink.getPlugin(sink1Out))).addStage(new ETLStage("sink2", MockSink.getPlugin(sink2Out))).addStage(new ETLStage("valueFilter", IntValueFilterTransform.getPlugin("x", 2))).addStage(new ETLStage("double", DoubleTransform.getPlugin())).addStage(new ETLStage("identity", IdentityTransform.getPlugin())).addConnection("source", "valueFilter").addConnection("source", "double").addConnection("source", "identity").addConnection("valueFilter", "sink1").addConnection("double", "sink2").addConnection("identity", "sink2").build();
    ApplicationId appId = NamespaceId.DEFAULT.app("dagTest");
    AppRequest<ETLRealtimeConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    Assert.assertNotNull(appManager);
    WorkerManager workerManager = appManager.getWorkerManager(ETLWorker.NAME);
    workerManager.start();
    workerManager.waitForStatus(true, 10, 1);
    try {
        List<StructuredRecord> sink1output = MockSink.getRecords(sink1Out, 0, 10, TimeUnit.SECONDS);
        List<StructuredRecord> sink1expected = ImmutableList.of(record1, record3);
        Assert.assertEquals(sink1expected, sink1output);
        List<StructuredRecord> sink2output = MockSink.getRecords(sink2Out, 0, 10, TimeUnit.SECONDS);
        Assert.assertEquals(9, sink2output.size());
    } finally {
        stopWorker(workerManager);
    }
    validateMetric(3, appId, "source.records.out");
    validateMetric(3, appId, "valueFilter.records.in");
    validateMetric(2, appId, "valueFilter.records.out");
    validateMetric(3, appId, "double.records.in");
    validateMetric(6, appId, "double.records.out");
    validateMetric(3, appId, "identity.records.in");
    validateMetric(3, appId, "identity.records.out");
    validateMetric(2, appId, "sink1.records.in");
    validateMetric(9, appId, "sink2.records.in");
}
Also used : WorkerManager(co.cask.cdap.test.WorkerManager) ApplicationManager(co.cask.cdap.test.ApplicationManager) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) Schema(co.cask.cdap.api.data.schema.Schema) ETLRealtimeConfig(co.cask.cdap.etl.proto.v2.ETLRealtimeConfig) ApplicationId(co.cask.cdap.proto.id.ApplicationId) File(java.io.File) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) AppRequest(co.cask.cdap.proto.artifact.AppRequest) Test(org.junit.Test)

Example 53 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class StreamInputFormatTest method testFormatStreamRecordReader.

@Test
public void testFormatStreamRecordReader() throws IOException, InterruptedException {
    File inputDir = tmpFolder.newFolder();
    File partition = new File(inputDir, "1.1000");
    partition.mkdirs();
    File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
    File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());
    // write 1 event
    StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile), Files.newOutputStreamSupplier(indexFile), 100L);
    StreamEvent streamEvent = new StreamEvent(ImmutableMap.of("header1", "value1", "header2", "value2"), Charsets.UTF_8.encode("hello world"), 1000);
    writer.append(streamEvent);
    writer.close();
    FormatSpecification formatSpec = new FormatSpecification(TextRecordFormat.class.getName(), Schema.recordOf("event", Schema.Field.of("body", Schema.of(Schema.Type.STRING))), Collections.<String, String>emptyMap());
    Configuration conf = new Configuration();
    AbstractStreamInputFormat.setStreamId(conf, DUMMY_ID);
    AbstractStreamInputFormat.setBodyFormatSpecification(conf, formatSpec);
    AbstractStreamInputFormat.setStreamPath(conf, inputDir.toURI());
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    AbstractStreamInputFormat format = new AbstractStreamInputFormat() {

        @Override
        public AuthorizationEnforcer getAuthorizationEnforcer(TaskAttemptContext context) {
            return new NoOpAuthorizer();
        }

        @Override
        public AuthenticationContext getAuthenticationContext(TaskAttemptContext context) {
            return new AuthenticationTestContext();
        }
    };
    // read all splits and store the results in the list
    List<GenericStreamEventData<StructuredRecord>> recordsRead = Lists.newArrayList();
    List<InputSplit> inputSplits = format.getSplits(context);
    for (InputSplit split : inputSplits) {
        RecordReader<LongWritable, GenericStreamEventData<StructuredRecord>> recordReader = format.createRecordReader(split, context);
        recordReader.initialize(split, context);
        while (recordReader.nextKeyValue()) {
            recordsRead.add(recordReader.getCurrentValue());
        }
    }
    // should only have read 1 record
    Assert.assertEquals(1, recordsRead.size());
    GenericStreamEventData<StructuredRecord> eventData = recordsRead.get(0);
    Assert.assertEquals(streamEvent.getHeaders(), eventData.getHeaders());
    Assert.assertEquals("hello world", eventData.getBody().get("body"));
}
Also used : TextRecordFormat(co.cask.cdap.format.TextRecordFormat) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) AuthenticationTestContext(co.cask.cdap.security.auth.context.AuthenticationTestContext) NoOpAuthorizer(co.cask.cdap.security.spi.authorization.NoOpAuthorizer) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) GenericStreamEventData(co.cask.cdap.api.stream.GenericStreamEventData) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) LongWritable(org.apache.hadoop.io.LongWritable) File(java.io.File) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Example 54 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class StructuredRecordStringConverterTest method checkConversion.

@Test
public void checkConversion() throws Exception {
    for (boolean nullable : Arrays.asList(true, false)) {
        StructuredRecord initial = getStructuredRecord(nullable);
        String jsonOfRecord = StructuredRecordStringConverter.toJsonString(initial);
        StructuredRecord recordOfJson = StructuredRecordStringConverter.fromJsonString(jsonOfRecord, schema);
        assertRecordsEqual(initial, recordOfJson);
    }
}
Also used : StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 55 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class StructuredRecordStringConverterTest method testPrimitiveArrays.

@SuppressWarnings("AssertEqualsBetweenInconvertibleTypes")
@Test
public void testPrimitiveArrays() throws Exception {
    Schema arraysSchema = Schema.recordOf("arrays", Schema.Field.of("int", Schema.arrayOf(Schema.of(Schema.Type.INT))), Schema.Field.of("long", Schema.arrayOf(Schema.of(Schema.Type.LONG))), Schema.Field.of("float", Schema.arrayOf(Schema.of(Schema.Type.FLOAT))), Schema.Field.of("double", Schema.arrayOf(Schema.of(Schema.Type.DOUBLE))), Schema.Field.of("bool", Schema.arrayOf(Schema.of(Schema.Type.BOOLEAN))));
    StructuredRecord expected = StructuredRecord.builder(arraysSchema).set("int", new int[] { Integer.MIN_VALUE, 0, Integer.MAX_VALUE }).set("long", new long[] { Long.MIN_VALUE, 0L, Long.MAX_VALUE }).set("float", new float[] { Float.MIN_VALUE, 0f, Float.MAX_VALUE }).set("double", new double[] { Double.MIN_VALUE, 0d, Double.MAX_VALUE }).set("bool", new boolean[] { false, true }).build();
    String recordOfJson = StructuredRecordStringConverter.toJsonString(expected);
    StructuredRecord actual = StructuredRecordStringConverter.fromJsonString(recordOfJson, arraysSchema);
    List<Integer> expectedInts = ImmutableList.of(Integer.MIN_VALUE, 0, Integer.MAX_VALUE);
    List<Long> expectedLongs = ImmutableList.of(Long.MIN_VALUE, 0L, Long.MAX_VALUE);
    List<Float> expectedFloats = ImmutableList.of(Float.MIN_VALUE, 0f, Float.MAX_VALUE);
    List<Double> expectedDoubles = ImmutableList.of(Double.MIN_VALUE, 0d, Double.MAX_VALUE);
    List<Boolean> expectedBools = ImmutableList.of(false, true);
    Assert.assertEquals(expectedInts, actual.get("int"));
    Assert.assertEquals(expectedLongs, actual.get("long"));
    Assert.assertEquals(expectedFloats, actual.get("float"));
    Assert.assertEquals(expectedDoubles, actual.get("double"));
    Assert.assertEquals(expectedBools, actual.get("bool"));
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Aggregations

StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)97 Schema (co.cask.cdap.api.data.schema.Schema)71 Test (org.junit.Test)51 Table (co.cask.cdap.api.dataset.table.Table)36 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)36 ApplicationId (co.cask.cdap.proto.id.ApplicationId)36 ApplicationManager (co.cask.cdap.test.ApplicationManager)33 AppRequest (co.cask.cdap.proto.artifact.AppRequest)31 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)25 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)25 WorkflowManager (co.cask.cdap.test.WorkflowManager)23 ArrayList (java.util.ArrayList)20 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)19 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)18 HashSet (java.util.HashSet)10 DataStreamsConfig (co.cask.cdap.etl.proto.v2.DataStreamsConfig)8 File (java.io.File)8 TimeoutException (java.util.concurrent.TimeoutException)8 Put (co.cask.cdap.api.dataset.table.Put)7 ETLPlugin (co.cask.cdap.etl.proto.v2.ETLPlugin)7