use of com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column in project dsbulk by datastax.
the class CSVEndToEndSimulacronIT method massive_load_errors.
/**
* Test for DAT-593. Emulates 100 resources with 1000 records each, among which 100 bad records,
* for a total of 10,000 failed records. Verifies that LogManager is capable of handling a high
* number of bad records, without disrupting the main load workflow.
*/
@Test
void massive_load_errors() throws Exception {
SimulacronUtils.primeTables(simulacron, new Keyspace("ks1", new Table("table1", new Column("pk", TEXT), new Column("cc", TEXT), new Column("v", TEXT))));
MockConnector.setDelegate(new CSVConnector() {
@Override
public void configure(@NonNull Config settings, boolean read, boolean retainRecordSources) {
}
@Override
public void init() {
}
@Override
public int readConcurrency() {
// to force runner to use maximum parallelism
return Integer.MAX_VALUE;
}
@NonNull
@Override
public Publisher<Publisher<Record>> read() {
List<Publisher<Record>> resources = new ArrayList<>();
for (int i = 0; i < 100; i++) {
AtomicInteger counter = new AtomicInteger();
resources.add(Flux.generate((sink) -> {
int next = counter.getAndIncrement();
if (next == 1_000) {
sink.complete();
} else if (next % 10 == 0) {
sink.next(RecordUtils.error(new IllegalArgumentException("Record could not be read: " + next)));
} else {
sink.next(RecordUtils.indexedCSV("pk", String.valueOf(next), "cc", String.valueOf(next), "v", String.valueOf(next)));
}
}));
}
return Flux.fromIterable(resources);
}
});
String[] args = { "load", "-c", "mock", "--log.maxErrors", "10000", "--schema.keyspace", "ks1", "--schema.table", "table1" };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_COMPLETED_WITH_ERRORS);
assertThat(logs.getAllMessagesAsString()).contains("completed with 10000 errors").contains("Records: total: 100,000, successful: 90,000, failed: 10,000");
validateExceptionsLog(10_000, "Record could not be read:", "connector-errors.log");
}
use of com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column in project dsbulk by datastax.
the class JsonEndToEndSimulacronIT method full_load_custom_features.
@Test
void full_load_custom_features() {
SimulacronUtils.primeTables(simulacron, new SimulacronUtils.Keyspace("ks1", new Table("table1", new Column("key", INT), new Column("value", FLOAT))));
String[] args = { "load", "-c", "json", "--connector.json.url", StringUtils.quoteJson(JsonUtils.JSON_RECORDS_WITH_COMMENTS), "--schema.query", "INSERT INTO ks1.table1 (key, value) VALUES (:key, :value)", "--connector.json.parserFeatures", "{ALLOW_COMMENTS = true}", "--connector.json.deserializationFeatures", "{USE_BIG_INTEGER_FOR_INTS = false, USE_BIG_DECIMAL_FOR_FLOATS = false}" };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateQueryCount(simulacron, 1, "INSERT INTO ks1.table1 (key, value) VALUES (:key, :value)", LOCAL_ONE);
}
use of com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column in project dsbulk by datastax.
the class JsonEndToEndSimulacronIT method error_load_extra_field.
@Test
void error_load_extra_field() throws Exception {
SimulacronUtils.primeTables(simulacron, new SimulacronUtils.Keyspace("ks1", new Table("table1", new Column("a", INT), new Column("b", TEXT))));
String[] args = { "load", "-c", "json", "--log.maxErrors", "1", "--log.verbosity", "2", "--connector.json.url", StringUtils.quoteJson(getClass().getResource("/missing-extra.json")), "--schema.query", "INSERT INTO ks1.table1 (a, b) VALUES (:a, :b)", "--schema.mapping", "A = a, B = b", "--schema.allowExtraFields", "false" };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_ABORTED_TOO_MANY_ERRORS);
assertThat(logs.getAllMessagesAsString()).contains("aborted: Too many errors, the maximum allowed is 1").contains("Records: total: 3, successful: 1, failed: 2");
validateNumberOfBadRecords(2);
validateExceptionsLog(1, "Extraneous field C was found in record", "mapping-errors.log");
validateExceptionsLog(1, "Extraneous field D was found in record", "mapping-errors.log");
}
use of com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column in project dsbulk by datastax.
the class JsonEndToEndSimulacronIT method error_load_missing_field.
@Test
void error_load_missing_field() throws Exception {
SimulacronUtils.primeTables(simulacron, new SimulacronUtils.Keyspace("ks1", new Table("table1", new Column("a", INT), new Column("b", TEXT), new Column("c", BOOLEAN), new Column("d", INT))));
String[] args = { "load", "-c", "json", "--log.maxErrors", "2", "--log.verbosity", "2", "--connector.json.url", StringUtils.quoteJson(getClass().getResource("/missing-extra.json")), "--schema.query", "INSERT INTO ks1.table1 (a, b, c, d) VALUES (:a, :b, :c, :d)", "--schema.mapping", "A = a, B = b, C = c, D = d", "--schema.allowMissingFields", "false" };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_ABORTED_TOO_MANY_ERRORS);
assertThat(logs.getAllMessagesAsString()).contains("aborted: Too many errors, the maximum allowed is 2").contains("Records: total: 3, successful: 0, failed: 3");
validateNumberOfBadRecords(3);
validateExceptionsLog(2, "Required field C (mapped to column c) was missing from record", "mapping-errors.log");
validateExceptionsLog(1, "Required field D (mapped to column d) was missing from record", "mapping-errors.log");
}
Aggregations