use of com.datastax.oss.dsbulk.workflow.api.Workflow in project dsbulk by datastax.
the class UnloadWorkflow method manyWriters.
private Flux<Record> manyWriters() {
// writeConcurrency and readConcurrency are >= 0.5C here
int actualConcurrency = Math.min(readConcurrency, writeConcurrency);
int numThreads = Math.min(numCores * 2, actualConcurrency);
Scheduler scheduler = Schedulers.newParallel(numThreads, new DefaultThreadFactory("workflow"));
schedulers.add(scheduler);
return Flux.fromIterable(readStatements).flatMap(results -> {
Flux<Record> records = Flux.from(executor.readReactive(results)).publishOn(scheduler, 500).transform(queryWarningsHandler).transform(totalItemsMonitor).transform(totalItemsCounter).transform(failedReadResultsMonitor).transform(failedReadsHandler).map(readResultMapper::map).transform(failedRecordsMonitor).transform(unmappableRecordsHandler);
if (actualConcurrency == writeConcurrency) {
records = records.transform(writer);
} else {
// If the actual concurrency is lesser than the connector's desired write
// concurrency, we need to give the connector a chance to switch writers
// frequently so that it can really redirect records to all the final destinations
// (to that many files on disk for example). If the connector is correctly
// implemented, each window will be redirected to a different destination
// in a round-robin fashion.
records = records.window(500).flatMap(window -> window.transform(writer), 1, 500);
}
return records.transform(failedRecordsMonitor).transform(failedRecordsHandler);
}, actualConcurrency, 500);
}
use of com.datastax.oss.dsbulk.workflow.api.Workflow in project dsbulk by datastax.
the class UnloadWorkflow method oneWriter.
private Flux<Record> oneWriter() {
int numThreads = Math.min(numCores * 2, readConcurrency);
Scheduler scheduler = numThreads == 1 ? Schedulers.immediate() : Schedulers.newParallel(numThreads, new DefaultThreadFactory("workflow"));
schedulers.add(scheduler);
return Flux.fromIterable(readStatements).flatMap(results -> Flux.from(executor.readReactive(results)).publishOn(scheduler, 500).transform(queryWarningsHandler).transform(totalItemsMonitor).transform(totalItemsCounter).transform(failedReadResultsMonitor).transform(failedReadsHandler).map(readResultMapper::map).transform(failedRecordsMonitor).transform(unmappableRecordsHandler), readConcurrency, 500).transform(writer).transform(failedRecordsMonitor).transform(failedRecordsHandler);
}
use of com.datastax.oss.dsbulk.workflow.api.Workflow in project dsbulk by datastax.
the class DataStaxBulkLoader method run.
@NonNull
public ExitStatus run() {
Workflow workflow = null;
try {
AnsiConfigurator.configureAnsi(args);
CommandLineParser parser = new CommandLineParser(args);
ParsedCommandLine result = parser.parse();
Config config = result.getConfig();
workflow = result.getWorkflowProvider().newWorkflow(config);
WorkflowThread workflowThread = new WorkflowThread(workflow);
Runtime.getRuntime().addShutdownHook(new CleanupThread(workflow, workflowThread));
// start the workflow and wait for its completion
workflowThread.start();
workflowThread.join();
return workflowThread.getExitStatus();
} catch (GlobalHelpRequestException e) {
HelpEmitter.emitGlobalHelp(e.getConnectorName());
return STATUS_OK;
} catch (SectionHelpRequestException e) {
try {
HelpEmitter.emitSectionHelp(e.getSectionName(), e.getConnectorName());
return STATUS_OK;
} catch (Exception e2) {
LOGGER.error(e2.getMessage(), e2);
return STATUS_CRASHED;
}
} catch (VersionRequestException e) {
// Use the OS charset
PrintWriter pw = new PrintWriter(new BufferedWriter(new OutputStreamWriter(System.out, Charset.defaultCharset())));
pw.println(WorkflowUtils.getBulkLoaderNameAndVersion());
pw.flush();
return STATUS_OK;
} catch (Throwable t) {
return ErrorHandler.handleUnexpectedError(workflow, t);
}
}
Aggregations