use of io.cdap.cdap.api.data.format.StructuredRecord in project cdap by caskdata.
the class DataStreamsTest method testAlertPublisher.
@Test
public void testAlertPublisher() throws Exception {
String sinkName = "alertSink";
String topic = "alertTopic";
Schema schema = Schema.recordOf("x", Schema.Field.of("id", Schema.nullableOf(Schema.of(Schema.Type.LONG))));
StructuredRecord record1 = StructuredRecord.builder(schema).set("id", 1L).build();
StructuredRecord record2 = StructuredRecord.builder(schema).set("id", 2L).build();
StructuredRecord alertRecord = StructuredRecord.builder(schema).build();
/*
* source --> nullAlert --> sink
* |
* |--> TMS publisher
*/
DataStreamsConfig config = DataStreamsConfig.builder().setBatchInterval("5s").addStage(new ETLStage("source", MockSource.getPlugin(schema, ImmutableList.of(record1, record2, alertRecord)))).addStage(new ETLStage("nullAlert", NullAlertTransform.getPlugin("id"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addStage(new ETLStage("tms", TMSAlertPublisher.getPlugin(topic, NamespaceId.DEFAULT.getNamespace()))).addConnection("source", "nullAlert").addConnection("nullAlert", "sink").addConnection("nullAlert", "tms").setCheckpointDir(checkpointDir).build();
AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
ApplicationId appId = NamespaceId.DEFAULT.app("AlertTest");
ApplicationManager appManager = deployApplication(appId, appRequest);
SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
sparkManager.start();
sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
final Set<StructuredRecord> expectedRecords = ImmutableSet.of(record1, record2);
final Set<Alert> expectedMessages = ImmutableSet.of(new Alert("nullAlert", new HashMap<String, String>()));
final DataSetManager<Table> sinkTable = getDataset(sinkName);
Tasks.waitFor(true, () -> {
// get alerts from TMS
try {
getMessagingAdmin(NamespaceId.DEFAULT.getNamespace()).getTopicProperties(topic);
} catch (TopicNotFoundException e) {
return false;
}
MessageFetcher messageFetcher = getMessagingContext().getMessageFetcher();
Set<Alert> actualMessages = new HashSet<>();
try (CloseableIterator<Message> iter = messageFetcher.fetch(NamespaceId.DEFAULT.getNamespace(), topic, 5, 0)) {
while (iter.hasNext()) {
Message message = iter.next();
Alert alert = message.decodePayload(r -> GSON.fromJson(r, Alert.class));
actualMessages.add(alert);
}
}
// get records from sink
sinkTable.flush();
Set<StructuredRecord> outputRecords = new HashSet<>(MockSink.readOutput(sinkTable));
return expectedRecords.equals(outputRecords) && expectedMessages.equals(actualMessages);
}, 4, TimeUnit.MINUTES);
sparkManager.stop();
sparkManager.waitForStopped(10, TimeUnit.SECONDS);
validateMetric(appId, "source.records.out", 3);
validateMetric(appId, "nullAlert.records.in", 3);
validateMetric(appId, "nullAlert.records.out", 2);
validateMetric(appId, "nullAlert.records.alert", 1);
validateMetric(appId, "sink.records.in", 2);
validateMetric(appId, "tms.records.in", 1);
}
use of io.cdap.cdap.api.data.format.StructuredRecord in project cdap by caskdata.
the class MockRuntimeDatasetSink method readOutput.
/**
* Used to read the records written by this sink.
*
* @param tableManager dataset manager used to get the sink dataset to read from
*/
public static List<StructuredRecord> readOutput(DataSetManager<Table> tableManager) throws Exception {
Table table = tableManager.get();
try (Scanner scanner = table.scan(null, null)) {
List<StructuredRecord> records = new ArrayList<>();
Row row;
while ((row = scanner.next()) != null) {
Schema schema = Schema.parseJson(row.getString(SCHEMA_COL));
String recordStr = row.getString(RECORD_COL);
records.add(StructuredRecordStringConverter.fromJsonString(recordStr, schema));
}
return records;
}
}
use of io.cdap.cdap.api.data.format.StructuredRecord in project cdap by caskdata.
the class MockSQLEngine method writeInput.
/**
* Used to write the input records for the pipeline run. Should be called after the pipeline has been created.
*
* @param fileName file to write the records into
* @param records records that should be the input for the pipeline
*/
public static void writeInput(String fileName, Iterable<StructuredRecord> records) throws Exception {
Function<StructuredRecord, String> mapper = input -> {
try {
return StructuredRecordStringConverter.toJsonString(input);
} catch (IOException e) {
throw new RuntimeException("Unable to set up file for test.", e);
}
};
String output = Joiner.on("\n").join(Iterables.transform(records, mapper));
Files.write(output, new File(fileName), Charsets.UTF_8);
}
use of io.cdap.cdap.api.data.format.StructuredRecord in project cdap by caskdata.
the class MockSQLEngineWithStageSettings method writeInput.
/**
* Used to write the input records for the pipeline run. Should be called after the pipeline has been created.
*
* @param fileName file to write the records into
* @param records records that should be the input for the pipeline
*/
public static void writeInput(String fileName, Iterable<StructuredRecord> records) throws Exception {
Function<StructuredRecord, String> mapper = input -> {
try {
return StructuredRecordStringConverter.toJsonString(input);
} catch (IOException e) {
throw new RuntimeException("Unable to set up file for test.", e);
}
};
String output = Joiner.on("\n").join(Iterables.transform(records, mapper));
Files.write(output, new File(fileName), Charsets.UTF_8);
}
use of io.cdap.cdap.api.data.format.StructuredRecord in project cdap by caskdata.
the class MockPullProducer method produce.
@Override
public RecordCollection produce(SQLDataset dataset) {
// Create a spark session and write RDD as JSON
TypeToken<HashSet<StructuredRecord>> typeToken = new TypeToken<HashSet<StructuredRecord>>() {
};
Type setOfStructuredRecordType = typeToken.getType();
// Read records from JSON and adjust data types
Set<StructuredRecord> jsonRecords = GSON.fromJson(expected, setOfStructuredRecordType);
Set<StructuredRecord> records = new HashSet<>();
for (StructuredRecord jsonRecord : jsonRecords) {
records.add(transform(jsonRecord, jsonRecord.getSchema()));
}
// Build RDD and generate a new Recrd Collection.
SparkContext sc = SparkContext.getOrCreate();
JavaSparkContext jsc = JavaSparkContext.fromSparkContext(SparkContext.getOrCreate());
SQLContext sqlContext = new SQLContext(sc);
StructType sparkSchema = DataFrames.toDataType(this.datasetDescription.getSchema());
JavaRDD<Row> rdd = jsc.parallelize(new ArrayList<>(records)).map(sr -> DataFrames.toRow(sr, sparkSchema));
Dataset<Row> ds = sqlContext.createDataFrame(rdd.rdd(), sparkSchema);
return new SparkRecordCollectionImpl(ds);
}
Aggregations