use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class BigQueryIOPushDownIT method readUsingDirectReadMethodPushDown.
@Test
public void readUsingDirectReadMethodPushDown() {
sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ.toString()));
BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT);
BeamSqlRelUtils.toPCollection(pipeline, beamRelNode).apply(ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC)));
PipelineResult result = pipeline.run();
result.waitUntilFinish();
collectAndPublishMetrics(result, "_directread_pushdown");
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class BigQueryIOPushDownIT method readUsingDirectReadMethod.
@Test
public void readUsingDirectReadMethod() {
List<RelOptRule> ruleList = new ArrayList<>();
for (RuleSet x : getRuleSets()) {
x.iterator().forEachRemaining(ruleList::add);
}
// Remove push-down rule
ruleList.remove(BeamIOPushDownRule.INSTANCE);
InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore();
inMemoryMetaStore.registerProvider(new BigQueryPerfTableProvider(NAMESPACE, FIELDS_READ_METRIC));
sqlEnv = BeamSqlEnv.builder(inMemoryMetaStore).setPipelineOptions(PipelineOptionsFactory.create()).setRuleSets(ImmutableList.of(RuleSets.ofList(ruleList))).build();
sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ.toString()));
BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT);
BeamSqlRelUtils.toPCollection(pipeline, beamRelNode).apply(ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC)));
PipelineResult result = pipeline.run();
result.waitUntilFinish();
collectAndPublishMetrics(result, "_directread");
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class DynamoDBIOWriteTest method testWritePutItemsWithPartialSuccess.
@Test
public void testWritePutItemsWithPartialSuccess() {
List<WriteRequest> writes = putRequests(Item.range(0, 10));
when(client.batchWriteItem(any(BatchWriteItemRequest.class))).thenReturn(partialWriteSuccess(writes.subList(4, 10))).thenReturn(partialWriteSuccess(writes.subList(8, 10))).thenReturn(BatchWriteItemResponse.builder().build());
pipeline.apply(// number if items to produce
Create.of(10)).apply(// 10 items in one bundle
ParDo.of(new GenerateItems())).apply("write", DynamoDBIO.<Item>write().withWriteRequestMapperFn(putRequestMapper));
PipelineResult result = pipeline.run();
result.waitUntilFinish();
verify(client, times(3)).batchWriteItem(any(BatchWriteItemRequest.class));
InOrder ordered = inOrder(client);
ordered.verify(client).batchWriteItem(argThat(matchWritesUnordered(writes)));
ordered.verify(client).batchWriteItem(argThat(matchWritesUnordered(writes.subList(4, 10))));
ordered.verify(client).batchWriteItem(argThat(matchWritesUnordered(writes.subList(8, 10))));
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class NexmarkLauncher method monitor.
/**
* Monitor the performance and progress of a running job. Return final performance if it was
* measured.
*/
@Nullable
private NexmarkPerf monitor(NexmarkQuery query) {
if (!options.getMonitorJobs()) {
return null;
}
if (configuration.debug) {
NexmarkUtils.console("Waiting for main pipeline to 'finish'");
} else {
NexmarkUtils.console("--debug=false, so job will not self-cancel");
}
PipelineResult job = mainResult;
PipelineResult publisherJob = publisherResult;
List<NexmarkPerf.ProgressSnapshot> snapshots = new ArrayList<>();
long startMsSinceEpoch = System.currentTimeMillis();
long endMsSinceEpoch = -1;
if (options.getRunningTimeMinutes() != null) {
endMsSinceEpoch = startMsSinceEpoch + Duration.standardMinutes(options.getRunningTimeMinutes()).getMillis() - Duration.standardSeconds(configuration.preloadSeconds).getMillis();
}
long lastActivityMsSinceEpoch = -1;
NexmarkPerf perf = null;
boolean waitingForShutdown = false;
boolean cancelJob = false;
boolean publisherCancelled = false;
List<String> errors = new ArrayList<>();
while (true) {
long now = System.currentTimeMillis();
if (endMsSinceEpoch >= 0 && now > endMsSinceEpoch && !waitingForShutdown) {
NexmarkUtils.console("Reached end of test, cancelling job");
try {
cancelJob = true;
job.cancel();
} catch (IOException e) {
throw new RuntimeException("Unable to cancel main job: ", e);
}
if (publisherResult != null) {
try {
publisherJob.cancel();
} catch (IOException e) {
throw new RuntimeException("Unable to cancel publisher job: ", e);
}
publisherCancelled = true;
}
waitingForShutdown = true;
}
PipelineResult.State state = job.getState();
NexmarkUtils.console("%s %s%s", state, queryName, waitingForShutdown ? " (waiting for shutdown)" : "");
NexmarkPerf currPerf;
if (configuration.debug) {
currPerf = currentPerf(startMsSinceEpoch, now, job, snapshots, query.eventMonitor, query.resultMonitor);
} else {
currPerf = null;
}
if (perf == null || perf.anyActivity(currPerf)) {
lastActivityMsSinceEpoch = now;
}
if (options.isStreaming() && !waitingForShutdown) {
Duration quietFor = new Duration(lastActivityMsSinceEpoch, now);
long fatalCount = new MetricsReader(job, query.getName()).getCounterMetric("fatal");
if (fatalCount == -1) {
fatalCount = 0;
}
if (fatalCount > 0) {
NexmarkUtils.console("ERROR: job has fatal errors, cancelling.");
errors.add(String.format("Pipeline reported %s fatal errors", fatalCount));
waitingForShutdown = true;
cancelJob = true;
} else if (configuration.debug && configuration.numEvents > 0 && currPerf.numEvents >= configuration.numEvents && currPerf.numResults >= 0 && quietFor.isLongerThan(DONE_DELAY)) {
NexmarkUtils.console("streaming query appears to have finished waiting for completion.");
waitingForShutdown = true;
if (options.getCancelStreamingJobAfterFinish()) {
cancelJob = true;
}
} else if (quietFor.isLongerThan(STUCK_TERMINATE_DELAY)) {
NexmarkUtils.console("ERROR: streaming query appears to have been stuck for %d minutes, cancelling job.", quietFor.getStandardMinutes());
errors.add(String.format("Cancelling streaming job since it appeared stuck for %d min.", quietFor.getStandardMinutes()));
waitingForShutdown = true;
cancelJob = true;
} else if (quietFor.isLongerThan(STUCK_WARNING_DELAY)) {
NexmarkUtils.console("WARNING: streaming query appears to have been stuck for %d min.", quietFor.getStandardMinutes());
}
if (cancelJob) {
try {
job.cancel();
} catch (IOException e) {
throw new RuntimeException("Unable to cancel main job: ", e);
}
}
}
perf = currPerf;
boolean running = true;
switch(state) {
case UNKNOWN:
case UNRECOGNIZED:
case STOPPED:
case RUNNING:
// Keep going.
break;
case DONE:
// All done.
running = false;
break;
case CANCELLED:
running = false;
if (!cancelJob) {
errors.add("Job was unexpectedly cancelled");
}
break;
case FAILED:
case UPDATED:
// Abnormal termination.
running = false;
errors.add("Job was unexpectedly updated");
break;
}
if (!running) {
break;
}
if (lastActivityMsSinceEpoch == now) {
NexmarkUtils.console("new perf %s", perf);
} else {
NexmarkUtils.console("no activity");
}
try {
Thread.sleep(PERF_DELAY.getMillis());
} catch (InterruptedException e) {
Thread.interrupted();
NexmarkUtils.console("Interrupted: pipeline is still running");
}
}
perf.errors = errors;
perf.snapshots = snapshots;
if (publisherResult != null) {
NexmarkUtils.console("Shutting down publisher pipeline.");
try {
if (!publisherCancelled) {
publisherJob.cancel();
}
publisherJob.waitUntilFinish(Duration.standardMinutes(5));
} catch (IOException e) {
throw new RuntimeException("Unable to cancel publisher job: ", e);
}
}
return perf;
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class DynamoDBIOWriteTest method testWritePutItemsWithRetrySuccess.
@Test
public void testWritePutItemsWithRetrySuccess() {
when(client.batchWriteItem(any(BatchWriteItemRequest.class))).thenThrow(AmazonDynamoDBException.class, AmazonDynamoDBException.class, AmazonDynamoDBException.class).thenReturn(new BatchWriteItemResult().withUnprocessedItems(ImmutableMap.of()));
pipeline.apply(Create.of(Item.of(1))).apply("write", DynamoDBIO.<Item>write().withWriteRequestMapperFn(putRequestMapper).withAwsClientsProvider(StaticAwsClientsProvider.of(client)).withRetryConfiguration(try4Times));
PipelineResult result = pipeline.run();
result.waitUntilFinish();
verify(client, times(4)).batchWriteItem(any(BatchWriteItemRequest.class));
}
Aggregations