use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.
the class JsonConnectorTest method should_read_single_file_multi_doc.
@ParameterizedTest
@ValueSource(booleans = { true, false })
void should_read_single_file_multi_doc(boolean retainRecordSources) throws Exception {
JsonConnector connector = new JsonConnector();
Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.json", "url", url("/multi_doc.json"), "parserFeatures", "{ALLOW_COMMENTS:true}", "deserializationFeatures", "{USE_BIG_DECIMAL_FOR_FLOATS : false}");
connector.configure(settings, true, retainRecordSources);
connector.init();
assertThat(connector.readConcurrency()).isOne();
List<Record> actual = Flux.merge(connector.read()).collectList().block();
verifyRecords(actual, retainRecordSources, rawURL("/multi_doc.json").toURI());
connector.close();
}
use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.
the class JsonConnectorTest method createRecords.
private List<Record> createRecords(boolean retainRecordSources, URI resource) {
ArrayList<Record> records = new ArrayList<>();
Field[] fields = new Field[5];
fields[0] = new DefaultMappedField("Year");
fields[1] = new DefaultMappedField("Make");
fields[2] = new DefaultMappedField("Model");
fields[3] = new DefaultMappedField("Description");
fields[4] = new DefaultMappedField("Price");
JsonNode source1;
JsonNode source2;
JsonNode source3;
JsonNode source4;
JsonNode source5;
try {
source1 = objectMapper.readTree("{" + "\"Year\": 1997,\n" + "\"Make\": \"Ford\",\n" + "\"Model\": \"E350\",\n" + "\"Description\": \"ac, abs, moon\",\n" + "\"Price\": 3000.0\n" + "}");
source2 = objectMapper.readTree("{\n" + "\"Year\": 1999,\n" + "\"Make\": \"Chevy\",\n" + "\"Model\": \"Venture \\\"Extended Edition\\\"\",\n" + "\"Description\": null,\n" + "\"Price\": 4900.0\n" + "}");
source3 = objectMapper.readTree("{\n" + "\"Year\": 1996,\n" + "\"Make\": \"Jeep\",\n" + "\"Model\": \"Grand Cherokee\",\n" + "\"Description\": \"MUST SELL!\\nair, moon roof, loaded\",\n" + "\"Price\": 4799.0\n" + "}");
source4 = objectMapper.readTree("{\n" + "\"Year\": 1999,\n" + "\"Make\": \"Chevy\",\n" + "\"Model\": \"Venture \\\"Extended Edition, Very Large\\\"\",\n" + "\"Description\": null,\n" + "\"Price\": 5000.0\n" + "}");
source5 = objectMapper.readTree("{\n" + "\"Year\": null,\n" + "\"Make\": null,\n" + "\"Model\": \"Venture \\\"Extended Edition\\\"\",\n" + "\"Description\": null,\n" + "\"Price\": 4900.0\n" + "}");
} catch (JsonProcessingException e) {
throw new UncheckedIOException(e);
}
records.add(DefaultRecord.mapped(retainRecordSources ? source1 : null, resource, 1, fields, source1.get("Year"), source1.get("Make"), source1.get("Model"), source1.get("Description"), source1.get("Price")));
records.add(DefaultRecord.mapped(retainRecordSources ? source2 : null, resource, 2, fields, source2.get("Year"), source2.get("Make"), source2.get("Model"), source2.get("Description"), source2.get("Price")));
records.add(DefaultRecord.mapped(retainRecordSources ? source3 : null, resource, 3, fields, source3.get("Year"), source3.get("Make"), source3.get("Model"), source3.get("Description"), source3.get("Price")));
records.add(DefaultRecord.mapped(retainRecordSources ? source4 : null, resource, 4, fields, source4.get("Year"), source4.get("Make"), source4.get("Model"), source4.get("Description"), source4.get("Price")));
records.add(DefaultRecord.mapped(retainRecordSources ? source5 : null, resource, 5, fields, source5.get("Year"), source5.get("Make"), source5.get("Model"), source5.get("Description"), source5.get("Price")));
return records;
}
use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.
the class CSVConnectorTest method should_honor_ignoreLeadingWhitespaces_and_ignoreTrailingWhitespaces_when_reading.
@Test
void should_honor_ignoreLeadingWhitespaces_and_ignoreTrailingWhitespaces_when_reading() throws Exception {
Path file = Files.createTempFile("test", ".csv");
Files.write(file, Collections.singleton(" foo "));
CSVConnector connector = new CSVConnector();
Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "url", StringUtils.quoteJson(file), "ignoreLeadingWhitespaces", false, "ignoreTrailingWhitespaces", false, "header", false);
connector.configure(settings, true, true);
connector.init();
List<Record> records = Flux.merge(connector.read()).collectList().block();
assertThat(records).hasSize(1);
assertThat(records.get(0).getFieldValue(new DefaultIndexedField(0))).isEqualTo(" foo ");
connector.close();
}
use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.
the class CSVConnectorTest method should_read_from_stdin_with_special_newline.
@Test
void should_read_from_stdin_with_special_newline() throws Exception {
InputStream stdin = System.in;
try {
String line = "abc,de\nf,ghk\r\n";
InputStream is = new ByteArrayInputStream(line.getBytes(UTF_8));
System.setIn(is);
CSVConnector connector = new CSVConnector();
Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "header", false, "url", "-", "newline", "\"\\r\\n\"");
connector.configure(settings, true, true);
connector.init();
assertThat(connector.readConcurrency()).isOne();
assertThat(ReflectionUtils.invokeMethod("isDataSizeSamplingAvailable", connector, Boolean.TYPE)).isFalse();
List<Record> actual = Flux.merge(connector.read()).collectList().block();
assertThat(actual).hasSize(1);
assertThat(actual.get(0).getSource()).isEqualTo(line);
assertThat(actual.get(0).values()).containsExactly("abc", "de\nf", "ghk");
connector.close();
} finally {
System.setIn(stdin);
}
}
use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.
the class CSVConnectorTest method should_honor_max_records_and_skip_records2.
@Test
void should_honor_max_records_and_skip_records2() throws Exception {
CSVConnector connector = new CSVConnector();
Config settings = TestConfigUtils.createTestConfig("dsbulk.connector.csv", "url", url("/root/ip-by-country-sample1.csv"), "skipRecords", 10, "maxRecords", 1);
connector.configure(settings, true, true);
connector.init();
List<Record> records = Flux.merge(connector.read()).collectList().block();
assertThat(records).hasSize(1);
assertThat(records.get(0).getSource().toString().trim()).isEqualTo("\"212.63.180.20\",\"212.63.180.23\",\"3560944660\",\"3560944663\",\"MZ\",\"Mozambique\"");
connector.close();
}
Aggregations