use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.
the class CSVConnectorTest method should_write_multiple_files.
@Test
void should_write_multiple_files() throws Exception {
CSVConnector connector = new CSVConnector();
Path out = Files.createTempDirectory("test");
try {
int maxConcurrentFiles = 4;
Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "url", StringUtils.quoteJson(out), "escape", "\"\\\"\"", "maxConcurrentFiles", maxConcurrentFiles);
connector.configure(settings, false, true);
connector.init();
assertThat(connector.writeConcurrency()).isEqualTo(maxConcurrentFiles);
// repeat the records 1000 times to fully exercise multiple file writing
Scheduler scheduler = Schedulers.newParallel("workflow");
Function<Publisher<Record>, Publisher<Record>> write = connector.write();
Flux.range(0, 1000).flatMap(i -> Flux.fromIterable(createRecords()).transform(write).subscribeOn(scheduler), maxConcurrentFiles).blockLast();
connector.close();
scheduler.dispose();
List<String> actual = FileUtils.readAllLinesInDirectoryAsStream(out).sorted().distinct().collect(Collectors.toList());
assertThat(actual).containsExactly(",,\"Venture \"\"Extended Edition\"\"\",,4900.00", "1996,Jeep,Grand Cherokee,\"MUST SELL!", "1997,Ford,E350,\" ac, abs, moon \",3000.00", "1999,Chevy,\"Venture \"\"Extended Edition\"\"\",,4900.00", "1999,Chevy,\"Venture \"\"Extended Edition, Very Large\"\"\",,5000.00", "Year,Make,Model,Description,Price", "air, moon roof, loaded\",4799.00");
} finally {
FileUtils.deleteDirectory(out);
}
}
use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.
the class CSVConnectorTest method should_read_from_http_url.
@ParameterizedTest
@ValueSource(booleans = { true, false })
void should_read_from_http_url(boolean retainRecordSources, @Wiremock WireMockServer server) throws Exception {
server.givenThat(any(urlPathEqualTo("/file.csv")).willReturn(aResponse().withStatus(200).withHeader("Content-Type", "text/csv").withBody(FileUtils.readFile(path("/sample.csv")))));
CSVConnector connector = new CSVConnector();
Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "url", String.format("\"%s/file.csv\"", server.baseUrl()), "normalizeLineEndingsInQuotes", true, "escape", "\"\\\"\"", "comment", "\"#\"");
connector.configure(settings, true, retainRecordSources);
connector.init();
assertThat(connector.readConcurrency()).isOne();
List<Record> actual = Flux.merge(connector.read()).collectList().block();
assertRecords(actual, retainRecordSources);
connector.close();
}
use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.
the class CSVConnectorTest method should_honor_ignoreLeadingWhitespaces_and_ignoreTrailingWhitespaces_when_reading2.
@Test
void should_honor_ignoreLeadingWhitespaces_and_ignoreTrailingWhitespaces_when_reading2() throws Exception {
Path file = Files.createTempFile("test", ".csv");
Files.write(file, Collections.singleton(" foo "));
CSVConnector connector = new CSVConnector();
Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "url", StringUtils.quoteJson(file), "ignoreLeadingWhitespaces", true, "ignoreTrailingWhitespaces", true, "header", false);
connector.configure(settings, true, true);
connector.init();
List<Record> records = Flux.merge(connector.read()).collectList().block();
assertThat(records).hasSize(1);
assertThat(records.get(0).getFieldValue(new DefaultIndexedField(0))).isEqualTo("foo");
connector.close();
}
use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.
the class CSVConnectorTest method should_honor_emptyValue_when_writing.
@ParameterizedTest
@MethodSource
void should_honor_emptyValue_when_writing(String quote, String emptyValue, String expected) throws Exception {
Path out = Files.createTempDirectory("test");
CSVConnector connector = new CSVConnector();
Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "url", StringUtils.quoteJson(out), "quote", StringUtils.quoteJson(quote), "emptyValue", StringUtils.quoteJson(emptyValue), "header", false);
connector.configure(settings, false, true);
connector.init();
Flux.<Record>just(DefaultRecord.mapped("source", resource, IRRELEVANT_POSITION, new Field[] { new DefaultMappedField("field1"), new DefaultMappedField("field2") }, "", "field2")).transform(connector.write()).blockLast();
connector.close();
List<String> actual = Files.readAllLines(out.resolve("output-000001.csv"));
assertThat(actual).hasSize(1).containsExactly(expected);
}
use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.
the class CSVConnectorTest method should_quote_comment_character.
/**
* DAT-516: Always quote comment character when unloading
*/
@Test
void should_quote_comment_character() throws Exception {
Path out = Files.createTempDirectory("test");
CSVConnector connector = new CSVConnector();
Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "url", StringUtils.quoteJson(out), "header", "false", "maxConcurrentFiles", 1, "comment", "\"#\"");
connector.configure(settings, false, true);
connector.init();
Flux.<Record>just(DefaultRecord.indexed("source", resource, IRRELEVANT_POSITION, "#shouldbequoted")).transform(connector.write()).blockFirst();
connector.close();
List<String> actual = Files.readAllLines(out.resolve("output-000001.csv"));
assertThat(actual).hasSize(1).containsExactly("\"#shouldbequoted\"");
}
Aggregations