use of io.cdap.cdap.test.SparkManager in project cdap by caskdata.
the class ReportGenerationAppTest method testGenerateReport.
@Test
public void testGenerateReport() throws Exception {
Map<String, String> runTimeArguments = new HashMap<>();
// disable tms subscriber thread as the RunMetaFileSet avro files are written directly by the test case
// if the tms subscriber thread is enabled, in order to find the latest message id to start fetching from,
// we read the latest RunMetaFileSet avro file's content
// whereas the derived message_id will be invalid in TMS as these runs aren't in TMS,
// in order to avoid the exception we disable the tms subscriber thread for the test case
runTimeArguments.put(Constants.DISABLE_TMS_SUBSCRIBER_THREAD, "true");
Long currentTimeMillis = System.currentTimeMillis();
DatasetId metaFileset = createAndInitializeDataset(NamespaceId.DEFAULT, currentTimeMillis);
SparkManager sparkManager = deployAndStartReportingApplication(NamespaceId.DEFAULT, runTimeArguments);
URL url = sparkManager.getServiceURL(1, TimeUnit.MINUTES);
Assert.assertNotNull(url);
URL reportURL = url.toURI().resolve("reports/").toURL();
List<Filter> filters = ImmutableList.of(// white list filter
new ValueFilter<>(Constants.NAMESPACE, ImmutableSet.of("ns1", "ns2"), null), new RangeFilter<>(Constants.DURATION, new RangeFilter.Range<>(null, 500L)), // black list filter
new ValueFilter<>(Constants.ARTIFACT_NAME, null, ImmutableSet.of("cdap-data-streams", "cdap-data-pipeline")));
long startSecs = TimeUnit.MILLISECONDS.toSeconds(currentTimeMillis);
ReportGenerationRequest request = new ReportGenerationRequest("ns1_ns2_report", startSecs, startSecs + 30, new ArrayList<>(ReportField.FIELD_NAME_MAP.keySet()), ImmutableList.of(new Sort(Constants.DURATION, Sort.Order.DESCENDING)), filters);
HttpURLConnection urlConn = (HttpURLConnection) reportURL.openConnection();
urlConn.setDoOutput(true);
urlConn.setRequestMethod("POST");
urlConn.getOutputStream().write(GSON.toJson(request).getBytes(StandardCharsets.UTF_8));
if (urlConn.getErrorStream() != null) {
Assert.fail(Bytes.toString(ByteStreams.toByteArray(urlConn.getErrorStream())));
}
Assert.assertEquals(200, urlConn.getResponseCode());
Map<String, String> reportIdMap = getResponseObject(urlConn, STRING_STRING_MAP);
String reportId = reportIdMap.get("id");
Assert.assertNotNull(reportId);
URL reportIdURL = reportURL.toURI().resolve("info?report-id=" + reportId).toURL();
validateReportSummary(reportIdURL, startSecs);
// share the report to get the share id
URL shareReportURL = reportURL.toURI().resolve(reportId + "/share").toURL();
HttpURLConnection shareURLConnection = (HttpURLConnection) shareReportURL.openConnection();
shareURLConnection.setRequestMethod("POST");
ShareId shareId = getResponseObject(shareURLConnection, ShareId.class);
// test if we are able to get summary and read the summary using share id
URL shareIdSummaryURL = reportURL.toURI().resolve("info?share-id=" + shareId.getShareId()).toURL();
validateReportSummary(shareIdSummaryURL, startSecs);
// assert the number of report details is correct
URL reportRunsURL = reportURL.toURI().resolve("download?report-id=" + reportId).toURL();
validateReportContent(reportRunsURL);
// test if we are able to download and read the report using share id
URL shareIdRunsURL = reportURL.toURI().resolve("download?share-id=" + shareId.getShareId()).toURL();
validateReportContent(shareIdRunsURL);
// save the report with a new name and description
URL reportSaveURL = reportURL.toURI().resolve(reportId + "/" + "save").toURL();
urlConn = (HttpURLConnection) reportSaveURL.openConnection();
urlConn.setDoOutput(true);
urlConn.setRequestMethod("POST");
urlConn.getOutputStream().write(GSON.toJson(new ReportSaveRequest("newName", "newDescription")).getBytes(StandardCharsets.UTF_8));
if (urlConn.getErrorStream() != null) {
Assert.fail(Bytes.toString(ByteStreams.toByteArray(urlConn.getErrorStream())));
}
Assert.assertEquals(200, urlConn.getResponseCode());
// verify that the name and description of the report have been updated, and the expiry time is null
ReportGenerationInfo reportGenerationInfo = getResponseObject(reportIdURL.openConnection(), REPORT_GEN_INFO_TYPE);
Assert.assertEquals("newName", reportGenerationInfo.getName());
Assert.assertEquals("newDescription", reportGenerationInfo.getDescription());
Assert.assertNull(reportGenerationInfo.getExpiry());
// save the report again should fail
urlConn = (HttpURLConnection) reportSaveURL.openConnection();
urlConn.setDoOutput(true);
urlConn.setRequestMethod("POST");
urlConn.getOutputStream().write(GSON.toJson(new ReportSaveRequest("anotherNewName", "anotherNewDescription")).getBytes(StandardCharsets.UTF_8));
if (urlConn.getErrorStream() != null) {
Assert.fail(Bytes.toString(ByteStreams.toByteArray(urlConn.getErrorStream())));
}
Assert.assertEquals(403, urlConn.getResponseCode());
// delete the report
URL reportDeleteURL = reportURL.toURI().resolve(reportId).toURL();
urlConn = (HttpURLConnection) reportDeleteURL.openConnection();
urlConn.setRequestMethod("DELETE");
Assert.assertEquals(200, urlConn.getResponseCode());
// getting status of a deleted report will get 404
Assert.assertEquals(404, ((HttpURLConnection) reportIdURL.openConnection()).getResponseCode());
// deleting a deleted report again will get 404
urlConn = (HttpURLConnection) reportDeleteURL.openConnection();
urlConn.setRequestMethod("DELETE");
Assert.assertEquals(404, urlConn.getResponseCode());
// test querying for time range before the start secs, to verify empty results contents
validateEmptyReports(reportURL, startSecs - TimeUnit.HOURS.toSeconds(2), startSecs - 30, filters);
// test querying for time range after the start secs, but with filter that doesnt match any records
// to verify empty results contents
List<Filter> filters2 = ImmutableList.of(new ValueFilter<>(Constants.NAMESPACE, ImmutableSet.of("ns1", "ns2"), null), // all the programs are run by user alice, user bob will match no records.
new ValueFilter<>(Constants.USER, ImmutableSet.of(USER_BOB), null));
validateEmptyReports(reportURL, startSecs, startSecs + 30, filters2);
List<Filter> filters3 = ImmutableList.of(new ValueFilter<>(Constants.NAMESPACE, ImmutableSet.of("ns1", "ns2"), null), // all the programs have the same test artifact name, blacklisting that will provide empty results
new ValueFilter<>(Constants.ARTIFACT_NAME, null, ImmutableSet.of(TEST_ARTIFACT_NAME)));
validateEmptyReports(reportURL, startSecs, startSecs + 30, filters3);
sparkManager.stop();
sparkManager.waitForStopped(60, TimeUnit.SECONDS);
deleteDatasetInstance(metaFileset);
}
use of io.cdap.cdap.test.SparkManager in project cdap by caskdata.
the class DataStreamsTest method testAutoJoinNullEquality.
private void testAutoJoinNullEquality(boolean nullSafe) throws Exception {
/*
* customers ----------|
* |
* |---> join ---> sink
* |
* transactions -------|
*/
Schema inputSchema1 = Schema.recordOf("customer", Schema.Field.of("customer_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("customer_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
Schema inputSchema2 = Schema.recordOf("transaction", Schema.Field.of("t_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("item_id", Schema.of(Schema.Type.STRING)));
Schema outSchema = Schema.recordOf("customers.transactions", Schema.Field.of("customers_customer_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("customers_customer_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("transactions_t_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("transactions_customer_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("transactions_item_id", Schema.of(Schema.Type.STRING)));
StructuredRecord recordSamuel = StructuredRecord.builder(inputSchema1).set("customer_id", "1").set("customer_name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(inputSchema1).set("customer_name", "bob").build();
StructuredRecord recordJane = StructuredRecord.builder(inputSchema1).set("customer_id", "3").set("customer_name", "jane").build();
StructuredRecord trans1 = StructuredRecord.builder(inputSchema2).set("t_id", "1").set("customer_id", "1").set("item_id", "11").build();
StructuredRecord trans2 = StructuredRecord.builder(inputSchema2).set("t_id", "2").set("customer_id", "3").set("item_id", "22").build();
StructuredRecord trans3 = StructuredRecord.builder(inputSchema2).set("t_id", "3").set("item_id", "33").build();
List<StructuredRecord> input1 = ImmutableList.of(recordSamuel, recordBob, recordJane);
List<StructuredRecord> input2 = ImmutableList.of(trans1, trans2, trans3);
String outputName = UUID.randomUUID().toString();
DataStreamsConfig etlConfig = DataStreamsConfig.builder().addStage(new ETLStage("customers", MockSource.getPlugin(inputSchema1, input1))).addStage(new ETLStage("transactions", MockSource.getPlugin(inputSchema2, input2))).addStage(new ETLStage("join", MockAutoJoiner.getPlugin(Arrays.asList("customers", "transactions"), Collections.singletonList("customer_id"), Collections.singletonList("transactions"), Collections.emptyList(), Collections.emptyList(), nullSafe))).addStage(new ETLStage("sink", MockSink.getPlugin(outputName))).addConnection("customers", "join").addConnection("transactions", "join").addConnection("join", "sink").setBatchInterval("5s").setCheckpointDir(checkpointDir).build();
AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app(UUID.randomUUID().toString());
ApplicationManager appManager = deployApplication(appId, appRequest);
SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
sparkManager.start();
sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
StructuredRecord join1 = StructuredRecord.builder(outSchema).set("customers_customer_id", "1").set("customers_customer_name", "samuel").set("transactions_t_id", "1").set("transactions_customer_id", "1").set("transactions_item_id", "11").build();
StructuredRecord join2 = StructuredRecord.builder(outSchema).set("customers_customer_id", "3").set("customers_customer_name", "jane").set("transactions_t_id", "2").set("transactions_customer_id", "3").set("transactions_item_id", "22").build();
StructuredRecord join3;
if (nullSafe) {
// this transaction has a null customer id, which should match with the null id from customers
join3 = StructuredRecord.builder(outSchema).set("transactions_t_id", "3").set("transactions_item_id", "33").set("customers_customer_name", "bob").build();
} else {
// this transaction has a null customer id, which should not match with the null id from customers
join3 = StructuredRecord.builder(outSchema).set("transactions_t_id", "3").set("transactions_item_id", "33").build();
}
Set<StructuredRecord> expected = ImmutableSet.of(join1, join2, join3);
DataSetManager<Table> outputManager = getDataset(outputName);
Tasks.waitFor(true, () -> {
outputManager.flush();
Set<StructuredRecord> outputRecords = new HashSet<>(MockSink.readOutput(outputManager));
return expected.equals(outputRecords);
}, 4, TimeUnit.MINUTES);
sparkManager.stop();
sparkManager.waitForStopped(10, TimeUnit.SECONDS);
}
use of io.cdap.cdap.test.SparkManager in project cdap by caskdata.
the class DataStreamsTest method testTransformComputeRun.
private void testTransformComputeRun(ApplicationManager appManager, final Set<StructuredRecord> expected, String val1, String val2, final String outputName) throws Exception {
SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
sparkManager.start(ImmutableMap.of("field", "name", "val1", val1, "val2", val2, "output", outputName));
sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
// since dataset name is a macro, the dataset isn't created until it is needed. Wait for it to exist
Tasks.waitFor(true, () -> getDataset(outputName).get() != null, 1, TimeUnit.MINUTES);
DataSetManager<Table> outputManager = getDataset(outputName);
Tasks.waitFor(true, () -> {
outputManager.flush();
Set<StructuredRecord> outputRecords = new HashSet<>(MockSink.readOutput(outputManager));
return expected.equals(outputRecords);
}, 1, TimeUnit.MINUTES);
sparkManager.stop();
sparkManager.waitForStopped(10, TimeUnit.SECONDS);
}
use of io.cdap.cdap.test.SparkManager in project cdap by caskdata.
the class DataStreamsTest method testAutoJoin.
@Test
public void testAutoJoin() throws Exception {
/*
* customers ----------|
* |
* |---> join ---> sink
* |
* transactions -------|
*/
Schema inputSchema1 = Schema.recordOf("customer", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING)));
Schema inputSchema2 = Schema.recordOf("transaction", Schema.Field.of("t_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("item_id", Schema.of(Schema.Type.STRING)));
Schema outSchema = Schema.recordOf("customers.transactions", Schema.Field.of("customers_customer_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("customers_customer_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("transactions_t_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("transactions_customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("transactions_item_id", Schema.of(Schema.Type.STRING)));
StructuredRecord recordSamuel = StructuredRecord.builder(inputSchema1).set("customer_id", "1").set("customer_name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(inputSchema1).set("customer_id", "2").set("customer_name", "bob").build();
StructuredRecord recordJane = StructuredRecord.builder(inputSchema1).set("customer_id", "3").set("customer_name", "jane").build();
StructuredRecord tx1 = StructuredRecord.builder(inputSchema2).set("t_id", "1").set("customer_id", "1").set("item_id", "11").build();
StructuredRecord tx2 = StructuredRecord.builder(inputSchema2).set("t_id", "2").set("customer_id", "3").set("item_id", "22").build();
StructuredRecord tx3 = StructuredRecord.builder(inputSchema2).set("t_id", "3").set("customer_id", "4").set("item_id", "33").build();
List<StructuredRecord> input1 = ImmutableList.of(recordSamuel, recordBob, recordJane);
List<StructuredRecord> input2 = ImmutableList.of(tx1, tx2, tx3);
String outputName = UUID.randomUUID().toString();
DataStreamsConfig etlConfig = DataStreamsConfig.builder().addStage(new ETLStage("customers", MockSource.getPlugin(inputSchema1, input1))).addStage(new ETLStage("transactions", MockSource.getPlugin(inputSchema2, input2))).addStage(new ETLStage("join", MockAutoJoiner.getPlugin(Arrays.asList("customers", "transactions"), Collections.singletonList("customer_id"), Collections.singletonList("transactions"), Collections.emptyList(), Collections.emptyList(), true))).addStage(new ETLStage("sink", MockSink.getPlugin(outputName))).addConnection("customers", "join").addConnection("transactions", "join").addConnection("join", "sink").setBatchInterval("5s").setCheckpointDir(checkpointDir).build();
AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("AutoJoinerApp");
ApplicationManager appManager = deployApplication(appId, appRequest);
SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
sparkManager.start();
sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
StructuredRecord join1 = StructuredRecord.builder(outSchema).set("customers_customer_id", "1").set("customers_customer_name", "samuel").set("transactions_t_id", "1").set("transactions_customer_id", "1").set("transactions_item_id", "11").build();
StructuredRecord join2 = StructuredRecord.builder(outSchema).set("customers_customer_id", "3").set("customers_customer_name", "jane").set("transactions_t_id", "2").set("transactions_customer_id", "3").set("transactions_item_id", "22").build();
StructuredRecord join3 = StructuredRecord.builder(outSchema).set("transactions_t_id", "3").set("transactions_customer_id", "4").set("transactions_item_id", "33").build();
Set<StructuredRecord> expected = ImmutableSet.of(join1, join2, join3);
DataSetManager<Table> outputManager = getDataset(outputName);
Tasks.waitFor(true, () -> {
outputManager.flush();
Set<StructuredRecord> outputRecords = new HashSet<>(MockSink.readOutput(outputManager));
return expected.equals(outputRecords);
}, 4, TimeUnit.MINUTES);
sparkManager.stop();
sparkManager.waitForStopped(10, TimeUnit.SECONDS);
}
use of io.cdap.cdap.test.SparkManager in project cdap by caskdata.
the class DataStreamsTest method testAggregatorJoinerMacrosWithCheckpoints.
private void testAggregatorJoinerMacrosWithCheckpoints(boolean isReducibleAggregator) throws Exception {
/*
|--> aggregator --> sink1
users1 --|
|----|
|--> dupeFlagger --> sink2
users2 -------|
*/
Schema userSchema = Schema.recordOf("user", Schema.Field.of("id", Schema.of(Schema.Type.LONG)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
List<StructuredRecord> users1 = ImmutableList.of(StructuredRecord.builder(userSchema).set("id", 1L).set("name", "Samuel").build(), StructuredRecord.builder(userSchema).set("id", 2L).set("name", "Dwayne").build(), StructuredRecord.builder(userSchema).set("id", 3L).set("name", "Terry").build());
List<StructuredRecord> users2 = ImmutableList.of(StructuredRecord.builder(userSchema).set("id", 1L).set("name", "Samuel").build(), StructuredRecord.builder(userSchema).set("id", 2L).set("name", "Dwayne").build(), StructuredRecord.builder(userSchema).set("id", 4L).set("name", "Terry").build(), StructuredRecord.builder(userSchema).set("id", 5L).set("name", "Christopher").build());
DataStreamsConfig pipelineConfig = DataStreamsConfig.builder().setBatchInterval("5s").addStage(new ETLStage("users1", MockSource.getPlugin(userSchema, users1))).addStage(new ETLStage("users2", MockSource.getPlugin(userSchema, users2))).addStage(new ETLStage("sink1", MockSink.getPlugin("sink1"))).addStage(new ETLStage("sink2", MockSink.getPlugin("sink2"))).addStage(new ETLStage("aggregator", isReducibleAggregator ? FieldCountReducibleAggregator.getPlugin("${aggfield}", "${aggType}") : FieldCountAggregator.getPlugin("${aggfield}", "${aggType}"))).addStage(new ETLStage("dupeFlagger", DupeFlagger.getPlugin("users1", "${flagField}"))).addConnection("users1", "aggregator").addConnection("aggregator", "sink1").addConnection("users1", "dupeFlagger").addConnection("users2", "dupeFlagger").addConnection("dupeFlagger", "sink2").setCheckpointDir(checkpointDir).build();
AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, pipelineConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("ParallelAggJoinApp" + isReducibleAggregator);
ApplicationManager appManager = deployApplication(appId, appRequest);
// run it once with this set of macros
Map<String, String> arguments = new HashMap<>();
arguments.put("aggfield", "id");
arguments.put("aggType", "long");
arguments.put("flagField", "isDupe");
SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
sparkManager.start(arguments);
sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
DataSetManager<Table> sink1 = getDataset("sink1");
DataSetManager<Table> sink2 = getDataset("sink2");
Schema aggSchema = Schema.recordOf("id.count", Schema.Field.of("id", Schema.of(Schema.Type.LONG)), Schema.Field.of("ct", Schema.of(Schema.Type.LONG)));
Set<StructuredRecord> expectedAggregates = ImmutableSet.of(StructuredRecord.builder(aggSchema).set("id", 0L).set("ct", 3L).build(), StructuredRecord.builder(aggSchema).set("id", 1L).set("ct", 1L).build(), StructuredRecord.builder(aggSchema).set("id", 2L).set("ct", 1L).build(), StructuredRecord.builder(aggSchema).set("id", 3L).set("ct", 1L).build());
Schema outputSchema = Schema.recordOf("user.flagged", Schema.Field.of("id", Schema.of(Schema.Type.LONG)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("isDupe", Schema.of(Schema.Type.BOOLEAN)));
Set<StructuredRecord> expectedJoined = ImmutableSet.of(StructuredRecord.builder(outputSchema).set("id", 1L).set("name", "Samuel").set("isDupe", true).build(), StructuredRecord.builder(outputSchema).set("id", 2L).set("name", "Dwayne").set("isDupe", true).build(), StructuredRecord.builder(outputSchema).set("id", 3L).set("name", "Terry").set("isDupe", false).build());
Tasks.waitFor(true, () -> {
sink1.flush();
sink2.flush();
Set<StructuredRecord> actualAggs = new HashSet<>(MockSink.readOutput(sink1));
Set<StructuredRecord> actualJoined = new HashSet<>(MockSink.readOutput(sink2));
return expectedAggregates.equals(actualAggs) && expectedJoined.equals(actualJoined);
}, 1, TimeUnit.MINUTES);
sparkManager.stop();
sparkManager.waitForStopped(30, TimeUnit.SECONDS);
MockSink.clear(sink1);
MockSink.clear(sink2);
// run it again with different macros to make sure they are re-evaluated and not stored in the checkpoint
arguments = new HashMap<>();
arguments.put("aggfield", "name");
arguments.put("aggType", "string");
arguments.put("flagField", "dupe");
sparkManager.start(arguments);
sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
aggSchema = Schema.recordOf("name.count", Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("ct", Schema.of(Schema.Type.LONG)));
Set<StructuredRecord> expectedAggregates2 = ImmutableSet.of(StructuredRecord.builder(aggSchema).set("name", "all").set("ct", 3L).build(), StructuredRecord.builder(aggSchema).set("name", "Samuel").set("ct", 1L).build(), StructuredRecord.builder(aggSchema).set("name", "Dwayne").set("ct", 1L).build(), StructuredRecord.builder(aggSchema).set("name", "Terry").set("ct", 1L).build());
outputSchema = Schema.recordOf("user.flagged", Schema.Field.of("id", Schema.of(Schema.Type.LONG)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("dupe", Schema.of(Schema.Type.BOOLEAN)));
Set<StructuredRecord> expectedJoined2 = ImmutableSet.of(StructuredRecord.builder(outputSchema).set("id", 1L).set("name", "Samuel").set("dupe", true).build(), StructuredRecord.builder(outputSchema).set("id", 2L).set("name", "Dwayne").set("dupe", true).build(), StructuredRecord.builder(outputSchema).set("id", 3L).set("name", "Terry").set("dupe", false).build());
Tasks.waitFor(true, () -> {
sink1.flush();
sink2.flush();
Set<StructuredRecord> actualAggs = new HashSet<>(MockSink.readOutput(sink1));
Set<StructuredRecord> actualJoined = new HashSet<>(MockSink.readOutput(sink2));
return expectedAggregates2.equals(actualAggs) && expectedJoined2.equals(actualJoined);
}, 1, TimeUnit.MINUTES);
sparkManager.stop();
MockSink.clear(sink1);
MockSink.clear(sink2);
}
Aggregations