Search in sources :

Example 66 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class JsonEndToEndSimulacronIT method full_load_dry_run.

@Test
void full_load_dry_run() {
    primeIpByCountryTable(simulacron);
    RequestPrime insert = createSimpleParameterizedQuery(INSERT_INTO_IP_BY_COUNTRY);
    simulacron.prime(new Prime(insert));
    String[] args = { "load", "-c", "json", "--connector.json.url", StringUtils.quoteJson(JsonUtils.JSON_RECORDS_UNIQUE), "-dryRun", "true", "--schema.keyspace", "ks1", "--schema.query", INSERT_INTO_IP_BY_COUNTRY, "--schema.mapping", IP_BY_COUNTRY_MAPPING_NAMED };
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateQueryCount(simulacron, 0, "INSERT INTO ip_by_country", ONE);
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) Prime(com.datastax.oss.simulacron.common.stubbing.Prime) RequestPrime(com.datastax.oss.simulacron.common.cluster.RequestPrime) RequestPrime(com.datastax.oss.simulacron.common.cluster.RequestPrime) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 67 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class GraphEndToEndCCMIT method json_full_load_unload_and_load_again_edges.

@Test
void json_full_load_unload_and_load_again_edges() throws Exception {
    // Load Customer Order data
    List<String> args = com.datastax.oss.driver.shaded.guava.common.collect.Lists.newArrayList("load", "-g", FRAUD_GRAPH, "-e", PLACES_EDGE_LABEL, "-from", CUSTOMER_VERTEX_LABEL, "-to", ORDER_VERTEX_LABEL, "-url", StringUtils.quoteJson(JSON_CUSTOMER_ORDER_RECORDS), "-m", CUSTOMER_ORDER_MAPPINGS, "--connector.name", "json");
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateResultSetSize(14, SELECT_ALL_CUSTOMER_ORDERS);
    TestAssertions.assertThat(logs).hasMessageContaining("completed successfully");
    assertThat(stderr.getStreamAsStringPlain()).contains("completed successfully");
    logs.clear();
    stderr.clear();
    GraphResultSet results = session.execute(ScriptGraphStatement.newInstance("g.E().hasLabel('" + PLACES_EDGE_LABEL + "')"));
    assertThat(results).hasSize(14);
    FileUtils.deleteDirectory(logDir);
    // Unload customer order data
    args = com.datastax.oss.driver.shaded.guava.common.collect.Lists.newArrayList("unload", "-g", FRAUD_GRAPH, "-e", PLACES_EDGE_LABEL, "-from", CUSTOMER_VERTEX_LABEL, "-to", ORDER_VERTEX_LABEL, "--connector.name", "json", "-url", quoteJson(unloadDir), "-m", CUSTOMER_ORDER_MAPPINGS, "--connector.json.maxConcurrentFiles", "1");
    status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateOutputFiles(14, unloadDir);
    TestAssertions.assertThat(logs).hasMessageContaining("completed successfully");
    assertThat(stderr.getStreamAsStringPlain()).contains("completed successfully");
    logs.clear();
    stderr.clear();
    // Remove data for reload validation
    truncateTables();
    // Reload Customer Order data
    args = com.datastax.oss.driver.shaded.guava.common.collect.Lists.newArrayList("load", "-g", FRAUD_GRAPH, "-e", PLACES_EDGE_LABEL, "-from", CUSTOMER_VERTEX_LABEL, "-to", ORDER_VERTEX_LABEL, "-url", quoteJson(unloadDir), "--connector.name", "json", "-m", CUSTOMER_ORDER_MAPPINGS);
    status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateResultSetSize(14, SELECT_ALL_CUSTOMER_ORDERS);
    TestAssertions.assertThat(logs).hasMessageContaining("completed successfully");
    assertThat(stderr.getStreamAsStringPlain()).contains("completed successfully");
    results = session.execute(ScriptGraphStatement.newInstance("g.E().hasLabel('" + PLACES_EDGE_LABEL + "')"));
    assertThat(results).hasSize(14);
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) GraphResultSet(com.datastax.dse.driver.api.core.graph.GraphResultSet) Test(org.junit.jupiter.api.Test)

Example 68 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class GraphEndToEndCCMIT method csv_full_load_unload_and_load_again_vertices.

@Test
void csv_full_load_unload_and_load_again_vertices() throws Exception {
    List<String> args = Lists.newArrayList("load", "-g", FRAUD_GRAPH, "-v", CUSTOMER_VERTEX_LABEL, "-url", StringUtils.quoteJson(CSV_CUSTOMER_RECORDS), "--connector.csv.delimiter", "|");
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateResultSetSize(34, SELECT_ALL_CUSTOMERS);
    GraphResultSet results = session.execute(ScriptGraphStatement.newInstance("g.V().hasLabel('" + CUSTOMER_VERTEX_LABEL + "')"));
    assertThat(results).hasSize(34);
    TestAssertions.assertThat(logs).hasMessageContaining("completed successfully");
    assertThat(stderr.getStreamAsStringPlain()).contains("completed successfully");
    logs.clear();
    stderr.clear();
    FileUtils.deleteDirectory(logDir);
    args = Lists.newArrayList("unload", "-g", FRAUD_GRAPH, "-v", CUSTOMER_VERTEX_LABEL, "-url", quoteJson(unloadDir), "--connector.csv.delimiter", "|", "--connector.csv.maxConcurrentFiles", "1");
    status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateOutputFiles(35, unloadDir);
    TestAssertions.assertThat(logs).hasMessageContaining("completed successfully");
    assertThat(stderr.getStreamAsStringPlain()).contains("completed successfully");
    logs.clear();
    stderr.clear();
    // Remove data for reload validation
    truncateTables();
    args = Lists.newArrayList("load", "-g", FRAUD_GRAPH, "-v", CUSTOMER_VERTEX_LABEL, "-url", quoteJson(unloadDir), "--connector.csv.delimiter", "|");
    status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateResultSetSize(34, SELECT_ALL_CUSTOMERS);
    TestAssertions.assertThat(logs).hasMessageContaining("completed successfully");
    assertThat(stderr.getStreamAsStringPlain()).contains("completed successfully");
    results = session.execute(ScriptGraphStatement.newInstance("g.V().hasLabel('" + CUSTOMER_VERTEX_LABEL + "')"));
    assertThat(results).hasSize(34);
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) GraphResultSet(com.datastax.dse.driver.api.core.graph.GraphResultSet) Test(org.junit.jupiter.api.Test)

Example 69 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class GraphEndToEndCCMIT method json_full_load_unload_and_load_again_vertices.

@Test
void json_full_load_unload_and_load_again_vertices() throws Exception {
    // Load customer JSON file.
    List<String> args = com.datastax.oss.driver.shaded.guava.common.collect.Lists.newArrayList("load", "-g", FRAUD_GRAPH, "-v", CUSTOMER_VERTEX_LABEL, "-url", StringUtils.quoteJson(JSON_CUSTOMER_RECORDS), "--connector.name", "json");
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateResultSetSize(34, SELECT_ALL_CUSTOMERS);
    TestAssertions.assertThat(logs).hasMessageContaining("completed successfully");
    assertThat(stderr.getStreamAsStringPlain()).contains("completed successfully");
    logs.clear();
    stderr.clear();
    GraphResultSet results = session.execute(ScriptGraphStatement.newInstance("g.V().hasLabel('" + CUSTOMER_VERTEX_LABEL + "')"));
    assertThat(results).hasSize(34);
    FileUtils.deleteDirectory(logDir);
    // Unload customer JSON file
    args = com.datastax.oss.driver.shaded.guava.common.collect.Lists.newArrayList("unload", "-g", FRAUD_GRAPH, "-v", CUSTOMER_VERTEX_LABEL, "-url", quoteJson(unloadDir), "--connector.name", "json", "--connector.json.maxConcurrentFiles", "1");
    status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateOutputFiles(34, unloadDir);
    TestAssertions.assertThat(logs).hasMessageContaining("completed successfully");
    assertThat(stderr.getStreamAsStringPlain()).contains("completed successfully");
    logs.clear();
    stderr.clear();
    // Remove data for reload validation
    truncateTables();
    // Reload customer data
    args = com.datastax.oss.driver.shaded.guava.common.collect.Lists.newArrayList("load", "-g", FRAUD_GRAPH, "-v", CUSTOMER_VERTEX_LABEL, "-url", StringUtils.quoteJson(JSON_CUSTOMER_RECORDS), "--connector.name", "json");
    status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateResultSetSize(34, SELECT_ALL_CUSTOMERS);
    TestAssertions.assertThat(logs).hasMessageContaining("completed successfully");
    assertThat(stderr.getStreamAsStringPlain()).contains("completed successfully");
    results = session.execute(ScriptGraphStatement.newInstance("g.V().hasLabel('" + CUSTOMER_VERTEX_LABEL + "')"));
    assertThat(results).hasSize(34);
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) GraphResultSet(com.datastax.dse.driver.api.core.graph.GraphResultSet) Test(org.junit.jupiter.api.Test)

Example 70 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class JsonConnectorEndToEndCCMIT method should_not_truncate_nor_round.

/**
 * Test for DAT-224.
 */
@Test
void should_not_truncate_nor_round() throws Exception {
    session.execute("DROP TABLE IF EXISTS numbers");
    session.execute("CREATE TABLE IF NOT EXISTS numbers (key varchar PRIMARY KEY, vdouble double, vdecimal decimal)");
    List<String> args = new ArrayList<>();
    args.add("load");
    args.add("--connector.name");
    args.add("json");
    args.add("--connector.json.url");
    args.add(StringUtils.quoteJson(ClassLoader.getSystemResource("number.json").toExternalForm()));
    args.add("--connector.json.mode");
    args.add("SINGLE_DOCUMENT");
    args.add("--codec.overflowStrategy");
    args.add("REJECT");
    args.add("--schema.keyspace");
    args.add(session.getKeyspace().map(CqlIdentifier::asInternal).orElseThrow(IllegalStateException::new));
    args.add("--schema.table");
    args.add("numbers");
    args.add("--schema.mapping");
    args.add("*=*");
    ExitStatus loadStatus = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(loadStatus, STATUS_COMPLETED_WITH_ERRORS);
    validateExceptionsLog(1, "ArithmeticException: Cannot convert 0.12345678901234567890123456789 from BigDecimal to Double", "mapping-errors.log");
    checkNumbersWritten(OverflowStrategy.REJECT, UNNECESSARY, session);
    FileUtils.deleteDirectory(logDir);
    args = new ArrayList<>();
    args.add("unload");
    args.add("--connector.name");
    args.add("json");
    args.add("--connector.json.url");
    args.add(quoteJson(unloadDir));
    args.add("--connector.json.mode");
    args.add("MULTI_DOCUMENT");
    args.add("--connector.json.maxConcurrentFiles");
    args.add("1");
    args.add("--codec.roundingStrategy");
    args.add("UNNECESSARY");
    args.add("--schema.keyspace");
    args.add(session.getKeyspace().map(CqlIdentifier::asInternal).orElseThrow(IllegalStateException::new));
    args.add("--schema.query");
    args.add("SELECT key, vdouble, vdecimal FROM numbers");
    ExitStatus unloadStatus = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(unloadStatus, STATUS_OK);
    checkNumbersRead(OverflowStrategy.REJECT, unloadDir);
    FileUtils.deleteDirectory(logDir);
    // check we can load from the unloaded dataset
    args = new ArrayList<>();
    args.add("load");
    args.add("--connector.name");
    args.add("json");
    args.add("--connector.json.url");
    args.add(quoteJson(unloadDir));
    args.add("--codec.overflowStrategy");
    args.add("REJECT");
    args.add("--schema.keyspace");
    args.add(session.getKeyspace().map(CqlIdentifier::asInternal).orElseThrow(IllegalStateException::new));
    args.add("--schema.table");
    args.add("numbers");
    args.add("--schema.mapping");
    args.add("*=*");
    loadStatus = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(loadStatus, STATUS_OK);
    checkNumbersWritten(OverflowStrategy.REJECT, UNNECESSARY, session);
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) ArrayList(java.util.ArrayList) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) CqlIdentifier(com.datastax.oss.driver.api.core.CqlIdentifier) Test(org.junit.jupiter.api.Test)

Aggregations

DataStaxBulkLoader (com.datastax.oss.dsbulk.runner.DataStaxBulkLoader)165 ExitStatus (com.datastax.oss.dsbulk.runner.ExitStatus)165 Test (org.junit.jupiter.api.Test)142 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)135 ArrayList (java.util.ArrayList)75 Row (com.datastax.oss.driver.api.core.cql.Row)30 RequestPrime (com.datastax.oss.simulacron.common.cluster.RequestPrime)30 Prime (com.datastax.oss.simulacron.common.stubbing.Prime)30 CqlIdentifier (com.datastax.oss.driver.api.core.CqlIdentifier)22 ResultSet (com.datastax.oss.driver.api.core.cql.ResultSet)14 MethodSource (org.junit.jupiter.params.provider.MethodSource)10 Column (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column)9 Table (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Table)9 URL (java.net.URL)9 EndToEndUtils.primeIpByCountryTable (com.datastax.oss.dsbulk.runner.tests.EndToEndUtils.primeIpByCountryTable)8 CsvSource (org.junit.jupiter.params.provider.CsvSource)7 Record (com.datastax.oss.dsbulk.connectors.api.Record)6 SimulacronUtils (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils)5 Keyspace (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Keyspace)5 Path (java.nio.file.Path)5