Search in sources :

Example 6 with StructuredRecord

use of io.cdap.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class StructuredRecordComparatorTest method testRecordUnions.

@Test
public void testRecordUnions() {
    Schema intFieldSchema = Schema.recordOf("i", Schema.Field.of("x", INT_SCHEMA));
    Schema longFieldSchema = Schema.recordOf("l", Schema.Field.of("x", LONG_SCHEMA));
    Schema unionSchema = Schema.recordOf("u", Schema.Field.of("u", Schema.unionOf(intFieldSchema, longFieldSchema)));
    StructuredRecord irec = StructuredRecord.builder(intFieldSchema).set("x", 0).build();
    StructuredRecord lrec = StructuredRecord.builder(longFieldSchema).set("x", 0L).build();
    StructuredRecord r1 = StructuredRecord.builder(unionSchema).set("u", irec).build();
    StructuredRecord r2 = StructuredRecord.builder(unionSchema).set("u", lrec).build();
    Assert.assertEquals(0, COMPARATOR.compare(r1, r1));
    Assert.assertEquals(0, COMPARATOR.compare(r2, r2));
    testRecordsNotEqual(r1, r2);
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 7 with StructuredRecord

use of io.cdap.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class StructuredRecordComparatorTest method testMapOfUnions.

@Test
public void testMapOfUnions() {
    Schema intKeySchema = Schema.recordOf("i", Schema.Field.of("i", INT_SCHEMA));
    Schema stringKeySchema = Schema.recordOf("s", Schema.Field.of("s", STRING_SCHEMA));
    Schema keySchema = Schema.unionOf(intKeySchema, stringKeySchema);
    Schema mapSchema = Schema.mapOf(keySchema, Schema.unionOf(BOOL_SCHEMA, INT_SCHEMA));
    Schema schema = Schema.recordOf("m", Schema.Field.of("m", mapSchema));
    StructuredRecord intKey = StructuredRecord.builder(intKeySchema).set("i", 0).build();
    StructuredRecord stringKey = StructuredRecord.builder(stringKeySchema).set("s", "0").build();
    Map<StructuredRecord, Object> map1 = new HashMap<>();
    map1.put(intKey, false);
    map1.put(stringKey, 0);
    // test equality
    StructuredRecord r1 = StructuredRecord.builder(schema).set("m", map1).build();
    StructuredRecord r2 = StructuredRecord.builder(schema).set("m", map1).build();
    Assert.assertEquals(0, COMPARATOR.compare(r1, r2));
    // test same keys, different value type
    Map<StructuredRecord, Object> map2 = new HashMap<>();
    map2.put(intKey, 0);
    map2.put(stringKey, 0);
    r2 = StructuredRecord.builder(schema).set("m", map2).build();
    testRecordsNotEqual(r1, r2);
    // test same vals, different key type
    map2 = new HashMap<>();
    map2.put(stringKey, false);
    map2.put(stringKey, 0);
    r2 = StructuredRecord.builder(schema).set("m", map2).build();
    testRecordsNotEqual(r1, r2);
}
Also used : HashMap(java.util.HashMap) Schema(io.cdap.cdap.api.data.schema.Schema) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 8 with StructuredRecord

use of io.cdap.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class StructuredRecordComparatorTest method testSimpleArrays.

@Test
public void testSimpleArrays() {
    Schema schema = Schema.recordOf("x", Schema.Field.of("x", Schema.arrayOf(INT_SCHEMA)));
    // test with lists
    StructuredRecord r1 = StructuredRecord.builder(schema).set("x", Arrays.asList(0, 1, 2, 3, 4, 5)).build();
    StructuredRecord r2 = StructuredRecord.builder(schema).set("x", Arrays.asList(0, 1, 2, 3, 4)).build();
    Assert.assertEquals(0, COMPARATOR.compare(r1, r1));
    testRecordsNotEqual(r1, r2);
    // test comparing list with array
    r2 = StructuredRecord.builder(schema).set("x", new int[] { 0, 1, 2, 3, 4, 5 }).build();
    Assert.assertEquals(0, COMPARATOR.compare(r1, r2));
    r2 = StructuredRecord.builder(schema).set("x", new int[] { 0, 1, 2, 3, 4 }).build();
    testRecordsNotEqual(r1, r2);
    r2 = StructuredRecord.builder(schema).set("x", Arrays.asList(5, 4, 3, 2, 1, 0)).build();
    testRecordsNotEqual(r1, r2);
    r2 = StructuredRecord.builder(schema).set("x", new int[] { 5, 4, 3, 2, 1, 0 }).build();
    testRecordsNotEqual(r1, r2);
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 9 with StructuredRecord

use of io.cdap.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class DataPipelineTest method testOuterJoin.

private void testOuterJoin(Engine engine) throws Exception {
    Schema inputSchema1 = Schema.recordOf("customerRecord", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING)));
    Schema inputSchema2 = Schema.recordOf("itemRecord", Schema.Field.of("item_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("item_price", Schema.of(Schema.Type.LONG)), Schema.Field.of("cust_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("cust_name", Schema.of(Schema.Type.STRING)));
    Schema inputSchema3 = Schema.recordOf("transactionRecord", Schema.Field.of("t_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("c_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("c_name", Schema.of(Schema.Type.STRING)));
    String input1Name = "source1OuterJoinInput-" + engine;
    String input2Name = "source2OuterJoinInput-" + engine;
    String input3Name = "source3OuterJoinInput-" + engine;
    String outputName = "outerJoinOutput-" + engine;
    String joinerName = "outerJoiner-" + engine;
    String sinkName = "outerJoinSink-" + engine;
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source1", MockSource.getPlugin(input1Name, inputSchema1))).addStage(new ETLStage("source2", MockSource.getPlugin(input2Name, inputSchema2))).addStage(new ETLStage("source3", MockSource.getPlugin(input3Name, inputSchema3))).addStage(new ETLStage("t1", IdentityTransform.getPlugin())).addStage(new ETLStage("t2", IdentityTransform.getPlugin())).addStage(new ETLStage("t3", IdentityTransform.getPlugin())).addStage(new ETLStage(joinerName, MockJoiner.getPlugin("t1.customer_id=t2.cust_id=t3.c_id&" + "t1.customer_name=t2.cust_name=t3.c_name", "t1", ""))).addStage(new ETLStage(sinkName, MockSink.getPlugin(outputName))).addConnection("source1", "t1").addConnection("source2", "t2").addConnection("source3", "t3").addConnection("t1", joinerName).addConnection("t2", joinerName).addConnection("t3", joinerName).addConnection(joinerName, sinkName).setEngine(engine).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("OuterJoinApp-" + engine);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    Schema outSchema = Schema.recordOf("join.output", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING)), Schema.Field.of("item_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("item_price", Schema.nullableOf(Schema.of(Schema.Type.LONG))), Schema.Field.of("cust_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("cust_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("t_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("c_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("c_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    StructuredRecord recordSamuel = StructuredRecord.builder(inputSchema1).set("customer_id", "1").set("customer_name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(inputSchema1).set("customer_id", "2").set("customer_name", "bob").build();
    StructuredRecord recordJane = StructuredRecord.builder(inputSchema1).set("customer_id", "3").set("customer_name", "jane").build();
    StructuredRecord recordMartha = StructuredRecord.builder(inputSchema1).set("customer_id", "4").set("customer_name", "martha").build();
    StructuredRecord recordCar = StructuredRecord.builder(inputSchema2).set("item_id", "11").set("item_price", 10000L).set("cust_id", "1").set("cust_name", "samuel").build();
    StructuredRecord recordBike = StructuredRecord.builder(inputSchema2).set("item_id", "22").set("item_price", 100L).set("cust_id", "3").set("cust_name", "jane").build();
    StructuredRecord recordTrasCar = StructuredRecord.builder(inputSchema3).set("t_id", "1").set("c_id", "1").set("c_name", "samuel").build();
    StructuredRecord recordTrasPlane = StructuredRecord.builder(inputSchema3).set("t_id", "2").set("c_id", "2").set("c_name", "bob").build();
    StructuredRecord recordTrasBike = StructuredRecord.builder(inputSchema3).set("t_id", "3").set("c_id", "3").set("c_name", "jane").build();
    // write one record to each source
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(input1Name));
    MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob, recordJane, recordMartha));
    inputManager = getDataset(NamespaceId.DEFAULT.dataset(input2Name));
    MockSource.writeInput(inputManager, ImmutableList.of(recordCar, recordBike));
    inputManager = getDataset(NamespaceId.DEFAULT.dataset(input3Name));
    MockSource.writeInput(inputManager, ImmutableList.of(recordTrasCar, recordTrasPlane, recordTrasBike));
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    StructuredRecord joinRecordSamuel = StructuredRecord.builder(outSchema).set("customer_id", "1").set("customer_name", "samuel").set("item_id", "11").set("item_price", 10000L).set("cust_id", "1").set("cust_name", "samuel").set("t_id", "1").set("c_id", "1").set("c_name", "samuel").build();
    StructuredRecord joinRecordBob = StructuredRecord.builder(outSchema).set("customer_id", "2").set("customer_name", "bob").set("t_id", "2").set("c_id", "2").set("c_name", "bob").build();
    StructuredRecord joinRecordJane = StructuredRecord.builder(outSchema).set("customer_id", "3").set("customer_name", "jane").set("item_id", "22").set("item_price", 100L).set("cust_id", "3").set("cust_name", "jane").set("t_id", "3").set("c_id", "3").set("c_name", "jane").build();
    StructuredRecord joinRecordMartha = StructuredRecord.builder(outSchema).set("customer_id", "4").set("customer_name", "martha").build();
    DataSetManager<Table> sinkManager = getDataset(outputName);
    Set<StructuredRecord> expected = ImmutableSet.of(joinRecordSamuel, joinRecordJane, joinRecordBob, joinRecordMartha);
    Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
    Assert.assertEquals(expected, actual);
    validateMetric(4, appId, joinerName + ".records.out");
    validateMetric(4, appId, sinkName + ".records.in");
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId)

Example 10 with StructuredRecord

use of io.cdap.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class DataPipelineTest method testAlertPublisher.

private void testAlertPublisher(Engine engine) throws Exception {
    String sourceName = "alertSource" + engine.name();
    String sinkName = "alertSink" + engine.name();
    String topic = "alertTopic" + engine.name();
    /*
     * source --> nullAlert --> sink
     *               |
     *               |--> TMS publisher
     */
    ETLBatchConfig config = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("nullAlert", NullAlertTransform.getPlugin("id"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addStage(new ETLStage("tms alert", TMSAlertPublisher.getPlugin(topic, NamespaceId.DEFAULT.getNamespace()))).addConnection("source", "nullAlert").addConnection("nullAlert", "sink").addConnection("nullAlert", "tms alert").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app("AlertTest-" + engine);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    Schema schema = Schema.recordOf("x", Schema.Field.of("id", Schema.nullableOf(Schema.of(Schema.Type.LONG))));
    StructuredRecord record1 = StructuredRecord.builder(schema).set("id", 1L).build();
    StructuredRecord record2 = StructuredRecord.builder(schema).set("id", 2L).build();
    StructuredRecord alertRecord = StructuredRecord.builder(schema).build();
    DataSetManager<Table> sourceTable = getDataset(sourceName);
    MockSource.writeInput(sourceTable, ImmutableList.of(record1, record2, alertRecord));
    WorkflowManager manager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    manager.start();
    manager.waitForRun(ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
    DataSetManager<Table> sinkTable = getDataset(sinkName);
    Set<StructuredRecord> actual = new HashSet<>(MockSink.readOutput(sinkTable));
    Set<StructuredRecord> expected = ImmutableSet.of(record1, record2);
    Assert.assertEquals(expected, actual);
    MessageFetcher messageFetcher = getMessagingContext().getMessageFetcher();
    Set<Alert> actualMessages = new HashSet<>();
    try (CloseableIterator<Message> iter = messageFetcher.fetch(NamespaceId.DEFAULT.getNamespace(), topic, 5, 0)) {
        while (iter.hasNext()) {
            Message message = iter.next();
            Alert alert = message.decodePayload(r -> GSON.fromJson(r, Alert.class));
            actualMessages.add(alert);
        }
    }
    Set<Alert> expectedMessages = ImmutableSet.of(new Alert("nullAlert", new HashMap<>()));
    Assert.assertEquals(expectedMessages, actualMessages);
    validateMetric(3, appId, "source.records.out");
    validateMetric(3, appId, "nullAlert.records.in");
    validateMetric(2, appId, "nullAlert.records.out");
    validateMetric(1, appId, "nullAlert.records.alert");
    validateMetric(2, appId, "sink.records.in");
    validateMetric(1, appId, "tms alert.records.in");
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) MessageFetcher(io.cdap.cdap.api.messaging.MessageFetcher) Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Message(io.cdap.cdap.api.messaging.Message) SpamMessage(io.cdap.cdap.datapipeline.mock.SpamMessage) HashMap(java.util.HashMap) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Alert(io.cdap.cdap.etl.api.Alert) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet)

Aggregations

StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)210 Schema (io.cdap.cdap.api.data.schema.Schema)169 Test (org.junit.Test)119 Table (io.cdap.cdap.api.dataset.table.Table)76 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)73 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)73 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)68 ApplicationManager (io.cdap.cdap.test.ApplicationManager)68 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)59 WorkflowManager (io.cdap.cdap.test.WorkflowManager)54 HashSet (java.util.HashSet)50 ArrayList (java.util.ArrayList)44 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)40 HashMap (java.util.HashMap)25 File (java.io.File)17 ETLPlugin (io.cdap.cdap.etl.proto.v2.ETLPlugin)16 FormatSpecification (io.cdap.cdap.api.data.format.FormatSpecification)15 DataStreamsConfig (io.cdap.cdap.etl.proto.v2.DataStreamsConfig)14 SparkManager (io.cdap.cdap.test.SparkManager)12 Map (java.util.Map)12