Search in sources :

Example 6 with Record

use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.

the class CSVConnectorTest method should_write_multiple_files.

@Test
void should_write_multiple_files() throws Exception {
    CSVConnector connector = new CSVConnector();
    Path out = Files.createTempDirectory("test");
    try {
        int maxConcurrentFiles = 4;
        Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "url", StringUtils.quoteJson(out), "escape", "\"\\\"\"", "maxConcurrentFiles", maxConcurrentFiles);
        connector.configure(settings, false, true);
        connector.init();
        assertThat(connector.writeConcurrency()).isEqualTo(maxConcurrentFiles);
        // repeat the records 1000 times to fully exercise multiple file writing
        Scheduler scheduler = Schedulers.newParallel("workflow");
        Function<Publisher<Record>, Publisher<Record>> write = connector.write();
        Flux.range(0, 1000).flatMap(i -> Flux.fromIterable(createRecords()).transform(write).subscribeOn(scheduler), maxConcurrentFiles).blockLast();
        connector.close();
        scheduler.dispose();
        List<String> actual = FileUtils.readAllLinesInDirectoryAsStream(out).sorted().distinct().collect(Collectors.toList());
        assertThat(actual).containsExactly(",,\"Venture \"\"Extended Edition\"\"\",,4900.00", "1996,Jeep,Grand Cherokee,\"MUST SELL!", "1997,Ford,E350,\"  ac, abs, moon  \",3000.00", "1999,Chevy,\"Venture \"\"Extended Edition\"\"\",,4900.00", "1999,Chevy,\"Venture \"\"Extended Edition, Very Large\"\"\",,5000.00", "Year,Make,Model,Description,Price", "air, moon roof, loaded\",4799.00");
    } finally {
        FileUtils.deleteDirectory(out);
    }
}
Also used : Path(java.nio.file.Path) Arrays(java.util.Arrays) GZIPInputStream(java.util.zip.GZIPInputStream) Strings(com.datastax.oss.driver.shaded.guava.common.base.Strings) URL(java.net.URL) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) URISyntaxException(java.net.URISyntaxException) ConfigUtils(com.datastax.oss.dsbulk.config.ConfigUtils) Throwables(org.assertj.core.util.Throwables) WireMockServer(com.github.tomakehurst.wiremock.WireMockServer) AfterAll(org.junit.jupiter.api.AfterAll) ISO_8859_1(java.nio.charset.StandardCharsets.ISO_8859_1) ByteArrayInputStream(java.io.ByteArrayInputStream) ExtendWith(org.junit.jupiter.api.extension.ExtendWith) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) BeforeAll(org.junit.jupiter.api.BeforeAll) Level(org.slf4j.event.Level) URI(java.net.URI) DefaultRecord(com.datastax.oss.dsbulk.connectors.api.DefaultRecord) Method(java.lang.reflect.Method) Path(java.nio.file.Path) Record(com.datastax.oss.dsbulk.connectors.api.Record) LogInterceptor(com.datastax.oss.dsbulk.tests.logging.LogInterceptor) MethodSource(org.junit.jupiter.params.provider.MethodSource) WiremockResolver(ru.lanwen.wiremock.ext.WiremockResolver) CommonConnectorFeature(com.datastax.oss.dsbulk.connectors.api.CommonConnectorFeature) WireMock.aResponse(com.github.tomakehurst.wiremock.client.WireMock.aResponse) StringUtils(com.datastax.oss.dsbulk.tests.utils.StringUtils) BulkLoaderURLStreamHandlerFactory(com.datastax.oss.dsbulk.url.BulkLoaderURLStreamHandlerFactory) Arguments(org.junit.jupiter.params.provider.Arguments) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) Field(com.datastax.oss.dsbulk.connectors.api.Field) List(java.util.List) Stream(java.util.stream.Stream) TestConfigUtils(com.datastax.oss.dsbulk.tests.utils.TestConfigUtils) Assertions.assertDoesNotThrow(org.junit.jupiter.api.Assertions.assertDoesNotThrow) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) ByteArrayOutputStream(java.io.ByteArrayOutputStream) WireMock.any(com.github.tomakehurst.wiremock.client.WireMock.any) Scheduler(reactor.core.scheduler.Scheduler) Function(java.util.function.Function) Wiremock(ru.lanwen.wiremock.ext.WiremockResolver.Wiremock) ArrayList(java.util.ArrayList) ErrorRecord(com.datastax.oss.dsbulk.connectors.api.ErrorRecord) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Schedulers(reactor.core.scheduler.Schedulers) CompressedIOUtils(com.datastax.oss.dsbulk.io.CompressedIOUtils) Arguments.arguments(org.junit.jupiter.params.provider.Arguments.arguments) DefaultIndexedField(com.datastax.oss.dsbulk.connectors.api.DefaultIndexedField) FileUtils(com.datastax.oss.dsbulk.tests.utils.FileUtils) WireMock.urlPathEqualTo(com.github.tomakehurst.wiremock.client.WireMock.urlPathEqualTo) PrintStream(java.io.PrintStream) ValueSource(org.junit.jupiter.params.provider.ValueSource) Config(com.typesafe.config.Config) Files(java.nio.file.Files) UTF_8(java.nio.charset.StandardCharsets.UTF_8) TextParsingException(com.univocity.parsers.common.TextParsingException) Publisher(org.reactivestreams.Publisher) DefaultMappedField(com.datastax.oss.dsbulk.connectors.api.DefaultMappedField) IOException(java.io.IOException) FileAlreadyExistsException(java.nio.file.FileAlreadyExistsException) InputStreamReader(java.io.InputStreamReader) LogCapture(com.datastax.oss.dsbulk.tests.logging.LogCapture) DisplayName(org.junit.jupiter.api.DisplayName) Flux(reactor.core.publisher.Flux) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Paths(java.nio.file.Paths) ReflectionUtils(com.datastax.oss.dsbulk.tests.utils.ReflectionUtils) BufferedReader(java.io.BufferedReader) LogInterceptingExtension(com.datastax.oss.dsbulk.tests.logging.LogInterceptingExtension) Collections(java.util.Collections) Throwables.getRootCause(org.assertj.core.util.Throwables.getRootCause) Charsets(com.datastax.oss.driver.shaded.guava.common.base.Charsets) InputStream(java.io.InputStream) Config(com.typesafe.config.Config) Scheduler(reactor.core.scheduler.Scheduler) Publisher(org.reactivestreams.Publisher) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 7 with Record

use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.

the class CSVConnectorTest method should_read_from_http_url.

@ParameterizedTest
@ValueSource(booleans = { true, false })
void should_read_from_http_url(boolean retainRecordSources, @Wiremock WireMockServer server) throws Exception {
    server.givenThat(any(urlPathEqualTo("/file.csv")).willReturn(aResponse().withStatus(200).withHeader("Content-Type", "text/csv").withBody(FileUtils.readFile(path("/sample.csv")))));
    CSVConnector connector = new CSVConnector();
    Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "url", String.format("\"%s/file.csv\"", server.baseUrl()), "normalizeLineEndingsInQuotes", true, "escape", "\"\\\"\"", "comment", "\"#\"");
    connector.configure(settings, true, retainRecordSources);
    connector.init();
    assertThat(connector.readConcurrency()).isOne();
    List<Record> actual = Flux.merge(connector.read()).collectList().block();
    assertRecords(actual, retainRecordSources);
    connector.close();
}
Also used : Config(com.typesafe.config.Config) DefaultRecord(com.datastax.oss.dsbulk.connectors.api.DefaultRecord) Record(com.datastax.oss.dsbulk.connectors.api.Record) ErrorRecord(com.datastax.oss.dsbulk.connectors.api.ErrorRecord) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 8 with Record

use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.

the class CSVConnectorTest method should_honor_ignoreLeadingWhitespaces_and_ignoreTrailingWhitespaces_when_reading2.

@Test
void should_honor_ignoreLeadingWhitespaces_and_ignoreTrailingWhitespaces_when_reading2() throws Exception {
    Path file = Files.createTempFile("test", ".csv");
    Files.write(file, Collections.singleton(" foo "));
    CSVConnector connector = new CSVConnector();
    Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "url", StringUtils.quoteJson(file), "ignoreLeadingWhitespaces", true, "ignoreTrailingWhitespaces", true, "header", false);
    connector.configure(settings, true, true);
    connector.init();
    List<Record> records = Flux.merge(connector.read()).collectList().block();
    assertThat(records).hasSize(1);
    assertThat(records.get(0).getFieldValue(new DefaultIndexedField(0))).isEqualTo("foo");
    connector.close();
}
Also used : Path(java.nio.file.Path) DefaultIndexedField(com.datastax.oss.dsbulk.connectors.api.DefaultIndexedField) Config(com.typesafe.config.Config) DefaultRecord(com.datastax.oss.dsbulk.connectors.api.DefaultRecord) Record(com.datastax.oss.dsbulk.connectors.api.Record) ErrorRecord(com.datastax.oss.dsbulk.connectors.api.ErrorRecord) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 9 with Record

use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.

the class CSVConnectorTest method should_honor_emptyValue_when_writing.

@ParameterizedTest
@MethodSource
void should_honor_emptyValue_when_writing(String quote, String emptyValue, String expected) throws Exception {
    Path out = Files.createTempDirectory("test");
    CSVConnector connector = new CSVConnector();
    Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "url", StringUtils.quoteJson(out), "quote", StringUtils.quoteJson(quote), "emptyValue", StringUtils.quoteJson(emptyValue), "header", false);
    connector.configure(settings, false, true);
    connector.init();
    Flux.<Record>just(DefaultRecord.mapped("source", resource, IRRELEVANT_POSITION, new Field[] { new DefaultMappedField("field1"), new DefaultMappedField("field2") }, "", "field2")).transform(connector.write()).blockLast();
    connector.close();
    List<String> actual = Files.readAllLines(out.resolve("output-000001.csv"));
    assertThat(actual).hasSize(1).containsExactly(expected);
}
Also used : Path(java.nio.file.Path) Field(com.datastax.oss.dsbulk.connectors.api.Field) DefaultIndexedField(com.datastax.oss.dsbulk.connectors.api.DefaultIndexedField) DefaultMappedField(com.datastax.oss.dsbulk.connectors.api.DefaultMappedField) Config(com.typesafe.config.Config) DefaultMappedField(com.datastax.oss.dsbulk.connectors.api.DefaultMappedField) DefaultRecord(com.datastax.oss.dsbulk.connectors.api.DefaultRecord) Record(com.datastax.oss.dsbulk.connectors.api.Record) ErrorRecord(com.datastax.oss.dsbulk.connectors.api.ErrorRecord) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 10 with Record

use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.

the class CSVConnectorTest method should_quote_comment_character.

/**
 * DAT-516: Always quote comment character when unloading
 */
@Test
void should_quote_comment_character() throws Exception {
    Path out = Files.createTempDirectory("test");
    CSVConnector connector = new CSVConnector();
    Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "url", StringUtils.quoteJson(out), "header", "false", "maxConcurrentFiles", 1, "comment", "\"#\"");
    connector.configure(settings, false, true);
    connector.init();
    Flux.<Record>just(DefaultRecord.indexed("source", resource, IRRELEVANT_POSITION, "#shouldbequoted")).transform(connector.write()).blockFirst();
    connector.close();
    List<String> actual = Files.readAllLines(out.resolve("output-000001.csv"));
    assertThat(actual).hasSize(1).containsExactly("\"#shouldbequoted\"");
}
Also used : Path(java.nio.file.Path) Config(com.typesafe.config.Config) DefaultRecord(com.datastax.oss.dsbulk.connectors.api.DefaultRecord) Record(com.datastax.oss.dsbulk.connectors.api.Record) ErrorRecord(com.datastax.oss.dsbulk.connectors.api.ErrorRecord) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

Record (com.datastax.oss.dsbulk.connectors.api.Record)54 DefaultRecord (com.datastax.oss.dsbulk.connectors.api.DefaultRecord)40 Config (com.typesafe.config.Config)39 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)37 Test (org.junit.jupiter.api.Test)35 ErrorRecord (com.datastax.oss.dsbulk.connectors.api.ErrorRecord)24 Path (java.nio.file.Path)24 DefaultIndexedField (com.datastax.oss.dsbulk.connectors.api.DefaultIndexedField)10 Function (java.util.function.Function)9 MethodSource (org.junit.jupiter.params.provider.MethodSource)9 DefaultMappedField (com.datastax.oss.dsbulk.connectors.api.DefaultMappedField)8 ArrayList (java.util.ArrayList)8 List (java.util.List)8 Publisher (org.reactivestreams.Publisher)8 DefaultErrorRecord (com.datastax.oss.dsbulk.connectors.api.DefaultErrorRecord)7 IOException (java.io.IOException)7 ValueSource (org.junit.jupiter.params.provider.ValueSource)7 Flux (reactor.core.publisher.Flux)7 DataStaxBulkLoader (com.datastax.oss.dsbulk.runner.DataStaxBulkLoader)6 ExitStatus (com.datastax.oss.dsbulk.runner.ExitStatus)6