use of com.datastax.oss.dsbulk.connectors.csv.CSVConnector in project dsbulk by datastax.
the class CSVEndToEndSimulacronIT method massive_load_errors.
/**
* Test for DAT-593. Emulates 100 resources with 1000 records each, among which 100 bad records,
* for a total of 10,000 failed records. Verifies that LogManager is capable of handling a high
* number of bad records, without disrupting the main load workflow.
*/
@Test
void massive_load_errors() throws Exception {
SimulacronUtils.primeTables(simulacron, new Keyspace("ks1", new Table("table1", new Column("pk", TEXT), new Column("cc", TEXT), new Column("v", TEXT))));
MockConnector.setDelegate(new CSVConnector() {
@Override
public void configure(@NonNull Config settings, boolean read, boolean retainRecordSources) {
}
@Override
public void init() {
}
@Override
public int readConcurrency() {
// to force runner to use maximum parallelism
return Integer.MAX_VALUE;
}
@NonNull
@Override
public Publisher<Publisher<Record>> read() {
List<Publisher<Record>> resources = new ArrayList<>();
for (int i = 0; i < 100; i++) {
AtomicInteger counter = new AtomicInteger();
resources.add(Flux.generate((sink) -> {
int next = counter.getAndIncrement();
if (next == 1_000) {
sink.complete();
} else if (next % 10 == 0) {
sink.next(RecordUtils.error(new IllegalArgumentException("Record could not be read: " + next)));
} else {
sink.next(RecordUtils.indexedCSV("pk", String.valueOf(next), "cc", String.valueOf(next), "v", String.valueOf(next)));
}
}));
}
return Flux.fromIterable(resources);
}
});
String[] args = { "load", "-c", "mock", "--log.maxErrors", "10000", "--schema.keyspace", "ks1", "--schema.table", "table1" };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_COMPLETED_WITH_ERRORS);
assertThat(logs.getAllMessagesAsString()).contains("completed with 10000 errors").contains("Records: total: 100,000, successful: 90,000, failed: 10,000");
validateExceptionsLog(10_000, "Record could not be read:", "connector-errors.log");
}
use of com.datastax.oss.dsbulk.connectors.csv.CSVConnector in project dsbulk by datastax.
the class CSVEndToEndSimulacronIT method unload_write_error.
@Test
void unload_write_error() {
Path file1 = unloadDir.resolve("output-000001.csv");
Path file2 = unloadDir.resolve("output-000002.csv");
Path file3 = unloadDir.resolve("output-000003.csv");
Path file4 = unloadDir.resolve("output-000004.csv");
MockConnector.setDelegate(new CSVConnector() {
@Override
public void configure(@NonNull Config settings, boolean read, boolean retainRecordSources) {
settings = ConfigFactory.parseString("url = " + quoteJson(unloadDir) + ", header = false, maxConcurrentFiles = 4").withFallback(ConfigUtils.createReferenceConfig().getConfig("dsbulk.connector.csv"));
super.configure(settings, read, retainRecordSources);
}
@NonNull
@Override
public Function<Publisher<Record>, Publisher<Record>> write() {
// will cause the write workers to fail because the files already exist
try {
Files.createFile(file1);
Files.createFile(file2);
Files.createFile(file3);
Files.createFile(file4);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return super.write();
}
});
primeIpByCountryTable(simulacron);
RequestPrime prime = createQueryWithResultSet(SELECT_FROM_IP_BY_COUNTRY, 10);
simulacron.prime(new Prime(prime));
String[] args = { "unload", "--connector.name", "mock", "--schema.keyspace", "ks1", "--schema.query", SELECT_FROM_IP_BY_COUNTRY, "--schema.mapping", IP_BY_COUNTRY_MAPPING_INDEXED };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_ABORTED_FATAL_ERROR);
assertThat(stdErr.getStreamAsString()).contains("failed").containsPattern("output-00000[1-4].csv");
assertThat(logs.getAllMessagesAsString()).contains("failed").containsPattern("output-00000[1-4].csv");
}
Aggregations