use of com.datastax.oss.dsbulk.connectors.api.Connector in project dsbulk by datastax.
the class ConnectorSettingsTest method should_find_csv_connector_short_name.
@Test
void should_find_csv_connector_short_name() {
Config config = ConfigFactory.parseString("name: csv, csv{url:\"file:///a/b.csv\"}").withFallback(CONNECTOR_DEFAULT_SETTINGS);
ConnectorSettings connectorSettings = new ConnectorSettings(config, true);
connectorSettings.init(true);
Connector connector = connectorSettings.getConnector();
assertThat(connector).isNotNull().isInstanceOf(CSVConnector.class);
assertThat(config.getConfig("csv")).hasPaths("url", "fileNamePattern", "recursive", "maxConcurrentFiles", "encoding", "header", "delimiter", "quote", "escape", "comment", "skipRecords", "maxRecords").doesNotHavePath("csv");
}
use of com.datastax.oss.dsbulk.connectors.api.Connector in project dsbulk by datastax.
the class PrometheusEndToEndSimulacronIT method newConnectorDelegate.
@NonNull
private Connector newConnectorDelegate() {
return new Connector() {
@Override
public int readConcurrency() {
return 1;
}
@Override
public boolean supports(@NonNull ConnectorFeature feature) {
return feature == CommonConnectorFeature.INDEXED_RECORDS;
}
@NonNull
@Override
public Publisher<Publisher<Record>> read() {
AtomicInteger counter = new AtomicInteger();
AtomicBoolean running = new AtomicBoolean(true);
return Flux.just(Flux.generate(sink -> {
int i = counter.getAndAdd(1);
if (i == 0) {
startTimer(running);
}
if (running.get()) {
Record record = RecordUtils.indexedCSV("pk", "pk" + 1, "cc", "cc" + 1, "v", "v" + 1);
sink.next(record);
} else {
sink.complete();
}
}));
}
@NonNull
@Override
public RecordMetadata getRecordMetadata() {
return (fieldType, cqlType) -> GenericType.STRING;
}
@NonNull
@Override
public Function<Publisher<Record>, Publisher<Record>> write() {
throw new UnsupportedOperationException();
}
@Override
public int writeConcurrency() {
throw new UnsupportedOperationException();
}
};
}
use of com.datastax.oss.dsbulk.connectors.api.Connector in project dsbulk by datastax.
the class UnloadWorkflow method manyWriters.
private Flux<Record> manyWriters() {
// writeConcurrency and readConcurrency are >= 0.5C here
int actualConcurrency = Math.min(readConcurrency, writeConcurrency);
int numThreads = Math.min(numCores * 2, actualConcurrency);
Scheduler scheduler = Schedulers.newParallel(numThreads, new DefaultThreadFactory("workflow"));
schedulers.add(scheduler);
return Flux.fromIterable(readStatements).flatMap(results -> {
Flux<Record> records = Flux.from(executor.readReactive(results)).publishOn(scheduler, 500).transform(queryWarningsHandler).transform(totalItemsMonitor).transform(totalItemsCounter).transform(failedReadResultsMonitor).transform(failedReadsHandler).map(readResultMapper::map).transform(failedRecordsMonitor).transform(unmappableRecordsHandler);
if (actualConcurrency == writeConcurrency) {
records = records.transform(writer);
} else {
// If the actual concurrency is lesser than the connector's desired write
// concurrency, we need to give the connector a chance to switch writers
// frequently so that it can really redirect records to all the final destinations
// (to that many files on disk for example). If the connector is correctly
// implemented, each window will be redirected to a different destination
// in a round-robin fashion.
records = records.window(500).flatMap(window -> window.transform(writer), 1, 500);
}
return records.transform(failedRecordsMonitor).transform(failedRecordsHandler);
}, actualConcurrency, 500);
}
use of com.datastax.oss.dsbulk.connectors.api.Connector in project dsbulk by datastax.
the class ConnectorSettingsTest method should_find_json_connector_short_name.
@Test
void should_find_json_connector_short_name() {
Config config = ConfigFactory.parseString("name: json, json{ url:\"file:///a/b.json\"}").withFallback(CONNECTOR_DEFAULT_SETTINGS);
ConnectorSettings connectorSettings = new ConnectorSettings(config, true);
connectorSettings.init(true);
Connector connector = connectorSettings.getConnector();
assertThat(connector).isNotNull().isInstanceOf(JsonConnector.class);
assertThat(config.getConfig("json")).hasPaths("url", "fileNamePattern", "recursive", "maxConcurrentFiles", "encoding", "skipRecords", "maxRecords", "parserFeatures", "generatorFeatures", "prettyPrint").doesNotHavePath("json");
}
use of com.datastax.oss.dsbulk.connectors.api.Connector in project dsbulk by datastax.
the class MockConnector method mockWrites.
/**
* Sets up the mock connector to emulate writes; it will store all received records as if they
* were written to an external sink. The "written" records will appear in the returned list.
*
* @return the list where "written" records will be stored.
*/
public static List<Record> mockWrites() {
List<Record> records = new ArrayList<>();
setDelegate(new Connector() {
@Override
public void init() {
}
@Override
public void configure(@NonNull Config settings, boolean read, boolean retainRecordSources) {
}
@Override
public int readConcurrency() {
return -1;
}
@Override
public int writeConcurrency() {
return 1;
}
@Override
public boolean supports(@NonNull ConnectorFeature feature) {
return true;
}
@NonNull
@Override
public RecordMetadata getRecordMetadata() {
return (field, cql) -> GenericType.STRING;
}
@NonNull
@Override
public Publisher<Publisher<Record>> read() {
return Flux::just;
}
@NonNull
@Override
public Function<Publisher<Record>, Publisher<Record>> write() {
return upstream -> Flux.from(upstream).doOnNext(records::add);
}
});
return records;
}
Aggregations