use of io.cdap.cdap.api.data.format.StructuredRecord in project cdap by caskdata.
the class StructuredRecordComparatorTest method testRecordUnions.
@Test
public void testRecordUnions() {
Schema intFieldSchema = Schema.recordOf("i", Schema.Field.of("x", INT_SCHEMA));
Schema longFieldSchema = Schema.recordOf("l", Schema.Field.of("x", LONG_SCHEMA));
Schema unionSchema = Schema.recordOf("u", Schema.Field.of("u", Schema.unionOf(intFieldSchema, longFieldSchema)));
StructuredRecord irec = StructuredRecord.builder(intFieldSchema).set("x", 0).build();
StructuredRecord lrec = StructuredRecord.builder(longFieldSchema).set("x", 0L).build();
StructuredRecord r1 = StructuredRecord.builder(unionSchema).set("u", irec).build();
StructuredRecord r2 = StructuredRecord.builder(unionSchema).set("u", lrec).build();
Assert.assertEquals(0, COMPARATOR.compare(r1, r1));
Assert.assertEquals(0, COMPARATOR.compare(r2, r2));
testRecordsNotEqual(r1, r2);
}
use of io.cdap.cdap.api.data.format.StructuredRecord in project cdap by caskdata.
the class StructuredRecordComparatorTest method testMapOfUnions.
@Test
public void testMapOfUnions() {
Schema intKeySchema = Schema.recordOf("i", Schema.Field.of("i", INT_SCHEMA));
Schema stringKeySchema = Schema.recordOf("s", Schema.Field.of("s", STRING_SCHEMA));
Schema keySchema = Schema.unionOf(intKeySchema, stringKeySchema);
Schema mapSchema = Schema.mapOf(keySchema, Schema.unionOf(BOOL_SCHEMA, INT_SCHEMA));
Schema schema = Schema.recordOf("m", Schema.Field.of("m", mapSchema));
StructuredRecord intKey = StructuredRecord.builder(intKeySchema).set("i", 0).build();
StructuredRecord stringKey = StructuredRecord.builder(stringKeySchema).set("s", "0").build();
Map<StructuredRecord, Object> map1 = new HashMap<>();
map1.put(intKey, false);
map1.put(stringKey, 0);
// test equality
StructuredRecord r1 = StructuredRecord.builder(schema).set("m", map1).build();
StructuredRecord r2 = StructuredRecord.builder(schema).set("m", map1).build();
Assert.assertEquals(0, COMPARATOR.compare(r1, r2));
// test same keys, different value type
Map<StructuredRecord, Object> map2 = new HashMap<>();
map2.put(intKey, 0);
map2.put(stringKey, 0);
r2 = StructuredRecord.builder(schema).set("m", map2).build();
testRecordsNotEqual(r1, r2);
// test same vals, different key type
map2 = new HashMap<>();
map2.put(stringKey, false);
map2.put(stringKey, 0);
r2 = StructuredRecord.builder(schema).set("m", map2).build();
testRecordsNotEqual(r1, r2);
}
use of io.cdap.cdap.api.data.format.StructuredRecord in project cdap by caskdata.
the class StructuredRecordComparatorTest method testSimpleArrays.
@Test
public void testSimpleArrays() {
Schema schema = Schema.recordOf("x", Schema.Field.of("x", Schema.arrayOf(INT_SCHEMA)));
// test with lists
StructuredRecord r1 = StructuredRecord.builder(schema).set("x", Arrays.asList(0, 1, 2, 3, 4, 5)).build();
StructuredRecord r2 = StructuredRecord.builder(schema).set("x", Arrays.asList(0, 1, 2, 3, 4)).build();
Assert.assertEquals(0, COMPARATOR.compare(r1, r1));
testRecordsNotEqual(r1, r2);
// test comparing list with array
r2 = StructuredRecord.builder(schema).set("x", new int[] { 0, 1, 2, 3, 4, 5 }).build();
Assert.assertEquals(0, COMPARATOR.compare(r1, r2));
r2 = StructuredRecord.builder(schema).set("x", new int[] { 0, 1, 2, 3, 4 }).build();
testRecordsNotEqual(r1, r2);
r2 = StructuredRecord.builder(schema).set("x", Arrays.asList(5, 4, 3, 2, 1, 0)).build();
testRecordsNotEqual(r1, r2);
r2 = StructuredRecord.builder(schema).set("x", new int[] { 5, 4, 3, 2, 1, 0 }).build();
testRecordsNotEqual(r1, r2);
}
use of io.cdap.cdap.api.data.format.StructuredRecord in project cdap by caskdata.
the class DataPipelineTest method testOuterJoin.
private void testOuterJoin(Engine engine) throws Exception {
Schema inputSchema1 = Schema.recordOf("customerRecord", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING)));
Schema inputSchema2 = Schema.recordOf("itemRecord", Schema.Field.of("item_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("item_price", Schema.of(Schema.Type.LONG)), Schema.Field.of("cust_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("cust_name", Schema.of(Schema.Type.STRING)));
Schema inputSchema3 = Schema.recordOf("transactionRecord", Schema.Field.of("t_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("c_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("c_name", Schema.of(Schema.Type.STRING)));
String input1Name = "source1OuterJoinInput-" + engine;
String input2Name = "source2OuterJoinInput-" + engine;
String input3Name = "source3OuterJoinInput-" + engine;
String outputName = "outerJoinOutput-" + engine;
String joinerName = "outerJoiner-" + engine;
String sinkName = "outerJoinSink-" + engine;
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source1", MockSource.getPlugin(input1Name, inputSchema1))).addStage(new ETLStage("source2", MockSource.getPlugin(input2Name, inputSchema2))).addStage(new ETLStage("source3", MockSource.getPlugin(input3Name, inputSchema3))).addStage(new ETLStage("t1", IdentityTransform.getPlugin())).addStage(new ETLStage("t2", IdentityTransform.getPlugin())).addStage(new ETLStage("t3", IdentityTransform.getPlugin())).addStage(new ETLStage(joinerName, MockJoiner.getPlugin("t1.customer_id=t2.cust_id=t3.c_id&" + "t1.customer_name=t2.cust_name=t3.c_name", "t1", ""))).addStage(new ETLStage(sinkName, MockSink.getPlugin(outputName))).addConnection("source1", "t1").addConnection("source2", "t2").addConnection("source3", "t3").addConnection("t1", joinerName).addConnection("t2", joinerName).addConnection("t3", joinerName).addConnection(joinerName, sinkName).setEngine(engine).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("OuterJoinApp-" + engine);
ApplicationManager appManager = deployApplication(appId, appRequest);
Schema outSchema = Schema.recordOf("join.output", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING)), Schema.Field.of("item_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("item_price", Schema.nullableOf(Schema.of(Schema.Type.LONG))), Schema.Field.of("cust_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("cust_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("t_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("c_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("c_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
StructuredRecord recordSamuel = StructuredRecord.builder(inputSchema1).set("customer_id", "1").set("customer_name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(inputSchema1).set("customer_id", "2").set("customer_name", "bob").build();
StructuredRecord recordJane = StructuredRecord.builder(inputSchema1).set("customer_id", "3").set("customer_name", "jane").build();
StructuredRecord recordMartha = StructuredRecord.builder(inputSchema1).set("customer_id", "4").set("customer_name", "martha").build();
StructuredRecord recordCar = StructuredRecord.builder(inputSchema2).set("item_id", "11").set("item_price", 10000L).set("cust_id", "1").set("cust_name", "samuel").build();
StructuredRecord recordBike = StructuredRecord.builder(inputSchema2).set("item_id", "22").set("item_price", 100L).set("cust_id", "3").set("cust_name", "jane").build();
StructuredRecord recordTrasCar = StructuredRecord.builder(inputSchema3).set("t_id", "1").set("c_id", "1").set("c_name", "samuel").build();
StructuredRecord recordTrasPlane = StructuredRecord.builder(inputSchema3).set("t_id", "2").set("c_id", "2").set("c_name", "bob").build();
StructuredRecord recordTrasBike = StructuredRecord.builder(inputSchema3).set("t_id", "3").set("c_id", "3").set("c_name", "jane").build();
// write one record to each source
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(input1Name));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob, recordJane, recordMartha));
inputManager = getDataset(NamespaceId.DEFAULT.dataset(input2Name));
MockSource.writeInput(inputManager, ImmutableList.of(recordCar, recordBike));
inputManager = getDataset(NamespaceId.DEFAULT.dataset(input3Name));
MockSource.writeInput(inputManager, ImmutableList.of(recordTrasCar, recordTrasPlane, recordTrasBike));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
StructuredRecord joinRecordSamuel = StructuredRecord.builder(outSchema).set("customer_id", "1").set("customer_name", "samuel").set("item_id", "11").set("item_price", 10000L).set("cust_id", "1").set("cust_name", "samuel").set("t_id", "1").set("c_id", "1").set("c_name", "samuel").build();
StructuredRecord joinRecordBob = StructuredRecord.builder(outSchema).set("customer_id", "2").set("customer_name", "bob").set("t_id", "2").set("c_id", "2").set("c_name", "bob").build();
StructuredRecord joinRecordJane = StructuredRecord.builder(outSchema).set("customer_id", "3").set("customer_name", "jane").set("item_id", "22").set("item_price", 100L).set("cust_id", "3").set("cust_name", "jane").set("t_id", "3").set("c_id", "3").set("c_name", "jane").build();
StructuredRecord joinRecordMartha = StructuredRecord.builder(outSchema).set("customer_id", "4").set("customer_name", "martha").build();
DataSetManager<Table> sinkManager = getDataset(outputName);
Set<StructuredRecord> expected = ImmutableSet.of(joinRecordSamuel, joinRecordJane, joinRecordBob, joinRecordMartha);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
validateMetric(4, appId, joinerName + ".records.out");
validateMetric(4, appId, sinkName + ".records.in");
}
use of io.cdap.cdap.api.data.format.StructuredRecord in project cdap by caskdata.
the class DataPipelineTest method testAlertPublisher.
private void testAlertPublisher(Engine engine) throws Exception {
String sourceName = "alertSource" + engine.name();
String sinkName = "alertSink" + engine.name();
String topic = "alertTopic" + engine.name();
/*
* source --> nullAlert --> sink
* |
* |--> TMS publisher
*/
ETLBatchConfig config = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("nullAlert", NullAlertTransform.getPlugin("id"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addStage(new ETLStage("tms alert", TMSAlertPublisher.getPlugin(topic, NamespaceId.DEFAULT.getNamespace()))).addConnection("source", "nullAlert").addConnection("nullAlert", "sink").addConnection("nullAlert", "tms alert").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
ApplicationId appId = NamespaceId.DEFAULT.app("AlertTest-" + engine);
ApplicationManager appManager = deployApplication(appId, appRequest);
Schema schema = Schema.recordOf("x", Schema.Field.of("id", Schema.nullableOf(Schema.of(Schema.Type.LONG))));
StructuredRecord record1 = StructuredRecord.builder(schema).set("id", 1L).build();
StructuredRecord record2 = StructuredRecord.builder(schema).set("id", 2L).build();
StructuredRecord alertRecord = StructuredRecord.builder(schema).build();
DataSetManager<Table> sourceTable = getDataset(sourceName);
MockSource.writeInput(sourceTable, ImmutableList.of(record1, record2, alertRecord));
WorkflowManager manager = appManager.getWorkflowManager(SmartWorkflow.NAME);
manager.start();
manager.waitForRun(ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
DataSetManager<Table> sinkTable = getDataset(sinkName);
Set<StructuredRecord> actual = new HashSet<>(MockSink.readOutput(sinkTable));
Set<StructuredRecord> expected = ImmutableSet.of(record1, record2);
Assert.assertEquals(expected, actual);
MessageFetcher messageFetcher = getMessagingContext().getMessageFetcher();
Set<Alert> actualMessages = new HashSet<>();
try (CloseableIterator<Message> iter = messageFetcher.fetch(NamespaceId.DEFAULT.getNamespace(), topic, 5, 0)) {
while (iter.hasNext()) {
Message message = iter.next();
Alert alert = message.decodePayload(r -> GSON.fromJson(r, Alert.class));
actualMessages.add(alert);
}
}
Set<Alert> expectedMessages = ImmutableSet.of(new Alert("nullAlert", new HashMap<>()));
Assert.assertEquals(expectedMessages, actualMessages);
validateMetric(3, appId, "source.records.out");
validateMetric(3, appId, "nullAlert.records.in");
validateMetric(2, appId, "nullAlert.records.out");
validateMetric(1, appId, "nullAlert.records.alert");
validateMetric(2, appId, "sink.records.in");
validateMetric(1, appId, "tms alert.records.in");
}
Aggregations