Search in sources :

Example 41 with Record

use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.

the class JsonConnectorTest method should_read_single_file_multi_doc.

@ParameterizedTest
@ValueSource(booleans = { true, false })
void should_read_single_file_multi_doc(boolean retainRecordSources) throws Exception {
    JsonConnector connector = new JsonConnector();
    Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.json", "url", url("/multi_doc.json"), "parserFeatures", "{ALLOW_COMMENTS:true}", "deserializationFeatures", "{USE_BIG_DECIMAL_FOR_FLOATS : false}");
    connector.configure(settings, true, retainRecordSources);
    connector.init();
    assertThat(connector.readConcurrency()).isOne();
    List<Record> actual = Flux.merge(connector.read()).collectList().block();
    verifyRecords(actual, retainRecordSources, rawURL("/multi_doc.json").toURI());
    connector.close();
}
Also used : Config(com.typesafe.config.Config) DefaultRecord(com.datastax.oss.dsbulk.connectors.api.DefaultRecord) Record(com.datastax.oss.dsbulk.connectors.api.Record) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 42 with Record

use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.

the class JsonConnectorTest method createRecords.

private List<Record> createRecords(boolean retainRecordSources, URI resource) {
    ArrayList<Record> records = new ArrayList<>();
    Field[] fields = new Field[5];
    fields[0] = new DefaultMappedField("Year");
    fields[1] = new DefaultMappedField("Make");
    fields[2] = new DefaultMappedField("Model");
    fields[3] = new DefaultMappedField("Description");
    fields[4] = new DefaultMappedField("Price");
    JsonNode source1;
    JsonNode source2;
    JsonNode source3;
    JsonNode source4;
    JsonNode source5;
    try {
        source1 = objectMapper.readTree("{" + "\"Year\": 1997,\n" + "\"Make\": \"Ford\",\n" + "\"Model\": \"E350\",\n" + "\"Description\": \"ac, abs, moon\",\n" + "\"Price\": 3000.0\n" + "}");
        source2 = objectMapper.readTree("{\n" + "\"Year\": 1999,\n" + "\"Make\": \"Chevy\",\n" + "\"Model\": \"Venture \\\"Extended Edition\\\"\",\n" + "\"Description\": null,\n" + "\"Price\": 4900.0\n" + "}");
        source3 = objectMapper.readTree("{\n" + "\"Year\": 1996,\n" + "\"Make\": \"Jeep\",\n" + "\"Model\": \"Grand Cherokee\",\n" + "\"Description\": \"MUST SELL!\\nair, moon roof, loaded\",\n" + "\"Price\": 4799.0\n" + "}");
        source4 = objectMapper.readTree("{\n" + "\"Year\": 1999,\n" + "\"Make\": \"Chevy\",\n" + "\"Model\": \"Venture \\\"Extended Edition, Very Large\\\"\",\n" + "\"Description\": null,\n" + "\"Price\": 5000.0\n" + "}");
        source5 = objectMapper.readTree("{\n" + "\"Year\": null,\n" + "\"Make\": null,\n" + "\"Model\": \"Venture \\\"Extended Edition\\\"\",\n" + "\"Description\": null,\n" + "\"Price\": 4900.0\n" + "}");
    } catch (JsonProcessingException e) {
        throw new UncheckedIOException(e);
    }
    records.add(DefaultRecord.mapped(retainRecordSources ? source1 : null, resource, 1, fields, source1.get("Year"), source1.get("Make"), source1.get("Model"), source1.get("Description"), source1.get("Price")));
    records.add(DefaultRecord.mapped(retainRecordSources ? source2 : null, resource, 2, fields, source2.get("Year"), source2.get("Make"), source2.get("Model"), source2.get("Description"), source2.get("Price")));
    records.add(DefaultRecord.mapped(retainRecordSources ? source3 : null, resource, 3, fields, source3.get("Year"), source3.get("Make"), source3.get("Model"), source3.get("Description"), source3.get("Price")));
    records.add(DefaultRecord.mapped(retainRecordSources ? source4 : null, resource, 4, fields, source4.get("Year"), source4.get("Make"), source4.get("Model"), source4.get("Description"), source4.get("Price")));
    records.add(DefaultRecord.mapped(retainRecordSources ? source5 : null, resource, 5, fields, source5.get("Year"), source5.get("Make"), source5.get("Model"), source5.get("Description"), source5.get("Price")));
    return records;
}
Also used : Field(com.datastax.oss.dsbulk.connectors.api.Field) DefaultMappedField(com.datastax.oss.dsbulk.connectors.api.DefaultMappedField) DefaultMappedField(com.datastax.oss.dsbulk.connectors.api.DefaultMappedField) ArrayList(java.util.ArrayList) DefaultRecord(com.datastax.oss.dsbulk.connectors.api.DefaultRecord) Record(com.datastax.oss.dsbulk.connectors.api.Record) JsonNode(com.fasterxml.jackson.databind.JsonNode) UncheckedIOException(java.io.UncheckedIOException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException)

Example 43 with Record

use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.

the class CSVConnectorTest method should_honor_ignoreLeadingWhitespaces_and_ignoreTrailingWhitespaces_when_reading.

@Test
void should_honor_ignoreLeadingWhitespaces_and_ignoreTrailingWhitespaces_when_reading() throws Exception {
    Path file = Files.createTempFile("test", ".csv");
    Files.write(file, Collections.singleton(" foo "));
    CSVConnector connector = new CSVConnector();
    Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "url", StringUtils.quoteJson(file), "ignoreLeadingWhitespaces", false, "ignoreTrailingWhitespaces", false, "header", false);
    connector.configure(settings, true, true);
    connector.init();
    List<Record> records = Flux.merge(connector.read()).collectList().block();
    assertThat(records).hasSize(1);
    assertThat(records.get(0).getFieldValue(new DefaultIndexedField(0))).isEqualTo(" foo ");
    connector.close();
}
Also used : Path(java.nio.file.Path) DefaultIndexedField(com.datastax.oss.dsbulk.connectors.api.DefaultIndexedField) Config(com.typesafe.config.Config) DefaultRecord(com.datastax.oss.dsbulk.connectors.api.DefaultRecord) Record(com.datastax.oss.dsbulk.connectors.api.Record) ErrorRecord(com.datastax.oss.dsbulk.connectors.api.ErrorRecord) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 44 with Record

use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.

the class CSVConnectorTest method should_read_from_stdin_with_special_newline.

@Test
void should_read_from_stdin_with_special_newline() throws Exception {
    InputStream stdin = System.in;
    try {
        String line = "abc,de\nf,ghk\r\n";
        InputStream is = new ByteArrayInputStream(line.getBytes(UTF_8));
        System.setIn(is);
        CSVConnector connector = new CSVConnector();
        Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "header", false, "url", "-", "newline", "\"\\r\\n\"");
        connector.configure(settings, true, true);
        connector.init();
        assertThat(connector.readConcurrency()).isOne();
        assertThat(ReflectionUtils.invokeMethod("isDataSizeSamplingAvailable", connector, Boolean.TYPE)).isFalse();
        List<Record> actual = Flux.merge(connector.read()).collectList().block();
        assertThat(actual).hasSize(1);
        assertThat(actual.get(0).getSource()).isEqualTo(line);
        assertThat(actual.get(0).values()).containsExactly("abc", "de\nf", "ghk");
        connector.close();
    } finally {
        System.setIn(stdin);
    }
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) Config(com.typesafe.config.Config) DefaultRecord(com.datastax.oss.dsbulk.connectors.api.DefaultRecord) Record(com.datastax.oss.dsbulk.connectors.api.Record) ErrorRecord(com.datastax.oss.dsbulk.connectors.api.ErrorRecord) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 45 with Record

use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.

the class CSVConnectorTest method should_honor_max_records_and_skip_records2.

@Test
void should_honor_max_records_and_skip_records2() throws Exception {
    CSVConnector connector = new CSVConnector();
    Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "url", url("/root/ip-by-country-sample1.csv"), "skipRecords", 10, "maxRecords", 1);
    connector.configure(settings, true, true);
    connector.init();
    List<Record> records = Flux.merge(connector.read()).collectList().block();
    assertThat(records).hasSize(1);
    assertThat(records.get(0).getSource().toString().trim()).isEqualTo("\"212.63.180.20\",\"212.63.180.23\",\"3560944660\",\"3560944663\",\"MZ\",\"Mozambique\"");
    connector.close();
}
Also used : Config(com.typesafe.config.Config) DefaultRecord(com.datastax.oss.dsbulk.connectors.api.DefaultRecord) Record(com.datastax.oss.dsbulk.connectors.api.Record) ErrorRecord(com.datastax.oss.dsbulk.connectors.api.ErrorRecord) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

Record (com.datastax.oss.dsbulk.connectors.api.Record)54 DefaultRecord (com.datastax.oss.dsbulk.connectors.api.DefaultRecord)40 Config (com.typesafe.config.Config)39 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)37 Test (org.junit.jupiter.api.Test)35 ErrorRecord (com.datastax.oss.dsbulk.connectors.api.ErrorRecord)24 Path (java.nio.file.Path)24 DefaultIndexedField (com.datastax.oss.dsbulk.connectors.api.DefaultIndexedField)10 Function (java.util.function.Function)9 MethodSource (org.junit.jupiter.params.provider.MethodSource)9 DefaultMappedField (com.datastax.oss.dsbulk.connectors.api.DefaultMappedField)8 ArrayList (java.util.ArrayList)8 List (java.util.List)8 Publisher (org.reactivestreams.Publisher)8 DefaultErrorRecord (com.datastax.oss.dsbulk.connectors.api.DefaultErrorRecord)7 IOException (java.io.IOException)7 ValueSource (org.junit.jupiter.params.provider.ValueSource)7 Flux (reactor.core.publisher.Flux)7 DataStaxBulkLoader (com.datastax.oss.dsbulk.runner.DataStaxBulkLoader)6 ExitStatus (com.datastax.oss.dsbulk.runner.ExitStatus)6