use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.
the class CSVConnectorTest method should_honor_multi_char_delimiter.
@Test
void should_honor_multi_char_delimiter() throws Exception {
CSVConnector connector = new CSVConnector();
Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "url", url("/multi-char-delimiter.csv"), "delimiter", "\"||\"", "ignoreLeadingWhitespaces", true, "ignoreTrailingWhitespaces", true, "header", true);
connector.configure(settings, true, true);
connector.init();
List<Record> records = Flux.merge(connector.read()).collectList().block();
assertThat(records).hasSize(1);
Record record = records.get(0);
assertThat(record.fields()).hasSize(6);
assertThat(record.getFieldValue(new DefaultIndexedField(0))).isEqualTo("foo");
assertThat(record.getFieldValue(new DefaultIndexedField(1))).isEqualTo("|bar|");
assertThat(record.getFieldValue(new DefaultIndexedField(2))).isEqualTo("foo||bar");
assertThat(record.getFieldValue(new DefaultMappedField("field A"))).isEqualTo("foo");
assertThat(record.getFieldValue(new DefaultMappedField("field B"))).isEqualTo("|bar|");
assertThat(record.getFieldValue(new DefaultMappedField("field C"))).isEqualTo("foo||bar");
connector.close();
}
use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.
the class CSVConnectorTest method should_read_from_stdin_with_special_encoding.
@Test
void should_read_from_stdin_with_special_encoding() throws Exception {
InputStream stdin = System.in;
try {
String line = "fóô,bàr,qïx\n";
InputStream is = new ByteArrayInputStream(line.getBytes(ISO_8859_1));
System.setIn(is);
CSVConnector connector = new CSVConnector();
Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "header", false, "url", "-", "encoding", "ISO-8859-1");
connector.configure(settings, true, true);
connector.init();
assertThat(connector.readConcurrency()).isOne();
assertThat(ReflectionUtils.invokeMethod("isDataSizeSamplingAvailable", connector, Boolean.TYPE)).isFalse();
List<Record> actual = Flux.merge(connector.read()).collectList().block();
assertThat(actual).hasSize(1);
assertThat(actual.get(0).getSource()).isEqualTo(line);
assertThat(actual.get(0).getResource()).isEqualTo(URI.create("std:/"));
assertThat(actual.get(0).getPosition()).isEqualTo(1L);
assertThat(actual.get(0).values()).containsExactly("fóô", "bàr", "qïx");
connector.close();
} finally {
System.setIn(stdin);
}
}
use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.
the class CSVConnectorTest method should_return_unmappable_record_when_line_malformed.
@Test
void should_return_unmappable_record_when_line_malformed() throws Exception {
InputStream stdin = System.in;
try {
String lines = "header1,header2\nvalue1,value2,value3";
InputStream is = new ByteArrayInputStream(lines.getBytes(UTF_8));
System.setIn(is);
CSVConnector connector = new CSVConnector();
Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "header", true);
connector.configure(settings, true, true);
connector.init();
List<Record> actual = Flux.merge(connector.read()).collectList().block();
assertThat(actual).hasSize(1);
assertThat(actual.get(0)).isInstanceOf(ErrorRecord.class);
assertThat(actual.get(0).getSource()).isEqualTo("value1,value2,value3");
assertThat(((ErrorRecord) actual.get(0)).getError()).isInstanceOf(IllegalArgumentException.class);
assertThat(actual.get(0).values()).isEmpty();
connector.close();
} finally {
System.setIn(stdin);
}
}
use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.
the class CSVConnectorTest method should_honor_nullValue_when_writing.
@ParameterizedTest
@MethodSource
void should_honor_nullValue_when_writing(String nullValue, String expected) throws Exception {
Path out = Files.createTempDirectory("test");
CSVConnector connector = new CSVConnector();
Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "url", StringUtils.quoteJson(out), "nullValue", StringUtils.quoteJson(nullValue), "header", false);
connector.configure(settings, false, false);
connector.init();
Flux.<Record>just(DefaultRecord.mapped("source", resource, IRRELEVANT_POSITION, new Field[] { new DefaultMappedField("field1"), new DefaultMappedField("field2") }, null, "field2")).transform(connector.write()).blockLast();
connector.close();
List<String> actual = Files.readAllLines(out.resolve("output-000001.csv"));
assertThat(actual).hasSize(1).containsExactly(expected);
}
use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.
the class CSVEndToEndSimulacronIT method massive_load_errors.
/**
* Test for DAT-593. Emulates 100 resources with 1000 records each, among which 100 bad records,
* for a total of 10,000 failed records. Verifies that LogManager is capable of handling a high
* number of bad records, without disrupting the main load workflow.
*/
@Test
void massive_load_errors() throws Exception {
SimulacronUtils.primeTables(simulacron, new Keyspace("ks1", new Table("table1", new Column("pk", TEXT), new Column("cc", TEXT), new Column("v", TEXT))));
MockConnector.setDelegate(new CSVConnector() {
@Override
public void configure(@NonNull Config settings, boolean read, boolean retainRecordSources) {
}
@Override
public void init() {
}
@Override
public int readConcurrency() {
// to force runner to use maximum parallelism
return Integer.MAX_VALUE;
}
@NonNull
@Override
public Publisher<Publisher<Record>> read() {
List<Publisher<Record>> resources = new ArrayList<>();
for (int i = 0; i < 100; i++) {
AtomicInteger counter = new AtomicInteger();
resources.add(Flux.generate((sink) -> {
int next = counter.getAndIncrement();
if (next == 1_000) {
sink.complete();
} else if (next % 10 == 0) {
sink.next(RecordUtils.error(new IllegalArgumentException("Record could not be read: " + next)));
} else {
sink.next(RecordUtils.indexedCSV("pk", String.valueOf(next), "cc", String.valueOf(next), "v", String.valueOf(next)));
}
}));
}
return Flux.fromIterable(resources);
}
});
String[] args = { "load", "-c", "mock", "--log.maxErrors", "10000", "--schema.keyspace", "ks1", "--schema.table", "table1" };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_COMPLETED_WITH_ERRORS);
assertThat(logs.getAllMessagesAsString()).contains("completed with 10000 errors").contains("Records: total: 100,000, successful: 90,000, failed: 10,000");
validateExceptionsLog(10_000, "Record could not be read:", "connector-errors.log");
}
Aggregations