use of com.datastax.oss.dsbulk.workflow.commons.schema.ReadResultMapper in project dsbulk by datastax.
the class UnloadWorkflow method manyWriters.
private Flux<Record> manyWriters() {
// writeConcurrency and readConcurrency are >= 0.5C here
int actualConcurrency = Math.min(readConcurrency, writeConcurrency);
int numThreads = Math.min(numCores * 2, actualConcurrency);
Scheduler scheduler = Schedulers.newParallel(numThreads, new DefaultThreadFactory("workflow"));
schedulers.add(scheduler);
return Flux.fromIterable(readStatements).flatMap(results -> {
Flux<Record> records = Flux.from(executor.readReactive(results)).publishOn(scheduler, 500).transform(queryWarningsHandler).transform(totalItemsMonitor).transform(totalItemsCounter).transform(failedReadResultsMonitor).transform(failedReadsHandler).map(readResultMapper::map).transform(failedRecordsMonitor).transform(unmappableRecordsHandler);
if (actualConcurrency == writeConcurrency) {
records = records.transform(writer);
} else {
// If the actual concurrency is lesser than the connector's desired write
// concurrency, we need to give the connector a chance to switch writers
// frequently so that it can really redirect records to all the final destinations
// (to that many files on disk for example). If the connector is correctly
// implemented, each window will be redirected to a different destination
// in a round-robin fashion.
records = records.window(500).flatMap(window -> window.transform(writer), 1, 500);
}
return records.transform(failedRecordsMonitor).transform(failedRecordsHandler);
}, actualConcurrency, 500);
}
use of com.datastax.oss.dsbulk.workflow.commons.schema.ReadResultMapper in project dsbulk by datastax.
the class SchemaSettingsTest method should_create_row_mapper_with_inferred_mapping_and_skip_multiple.
@Test
void should_create_row_mapper_with_inferred_mapping_and_skip_multiple() {
// Infer mapping, but skip C2 and C3.
Config config = TestConfigUtils.createTestConfig("dsbulk.schema", "keyspace", "ks", "table", "t1", "mapping", "\" *=[-\\\"COL 2\\\", -c3] \"");
SchemaSettings settings = new SchemaSettings(config, READ_AND_MAP);
settings.init(session, codecFactory, false, true);
ReadResultMapper mapper = settings.createReadResultMapper(session, recordMetadata, codecFactory, true);
assertThat(mapper).isNotNull();
ArgumentCaptor<String> argument = ArgumentCaptor.forClass(String.class);
verify(session).prepare(argument.capture());
assertThat(argument.getValue()).isEqualTo("SELECT c1 FROM ks.t1 WHERE token(c1) > :start AND token(c1) <= :end");
assertMapping(mapper, C1, C1);
}
use of com.datastax.oss.dsbulk.workflow.commons.schema.ReadResultMapper in project dsbulk by datastax.
the class SchemaSettingsTest method should_infer_select_query_without_solr_query_column.
@Test
void should_infer_select_query_without_solr_query_column() {
ColumnMetadata solrQueryCol = mock(ColumnMetadata.class);
CqlIdentifier solrQueryColName = CqlIdentifier.fromInternal("solr_query");
when(solrQueryCol.getName()).thenReturn(solrQueryColName);
when(solrQueryCol.getType()).thenReturn(DataTypes.TEXT);
when(table.getColumns()).thenReturn(ImmutableMap.of(C1, col1, C2, col2, C3, col3, solrQueryColName, solrQueryCol));
IndexMetadata idx = mock(IndexMetadata.class);
CqlIdentifier idxName = CqlIdentifier.fromInternal("idx");
when(table.getIndexes()).thenReturn(ImmutableMap.of(idxName, idx));
when(idx.getClassName()).thenReturn(Optional.of("com.datastax.bdp.search.solr.Cql3SolrSecondaryIndex"));
Config config = TestConfigUtils.createTestConfig("dsbulk.schema", "keyspace", "ks", "table", "t1");
SchemaSettings settings = new SchemaSettings(config, READ_AND_MAP);
settings.init(session, codecFactory, false, true);
ReadResultMapper mapper = settings.createReadResultMapper(session, recordMetadata, codecFactory, true);
ArgumentCaptor<String> argument = ArgumentCaptor.forClass(String.class);
verify(session).prepare(argument.capture());
assertThat(argument.getValue()).isEqualTo("SELECT c1, \"COL 2\", c3 FROM ks.t1 WHERE token(c1) > :start AND token(c1) <= :end");
assertMapping(mapper, C1, C1, C2, C2, C3, C3);
}
use of com.datastax.oss.dsbulk.workflow.commons.schema.ReadResultMapper in project dsbulk by datastax.
the class SchemaSettingsTest method should_create_row_mapper_when_mapping_is_a_list_and_mapped.
@Test
void should_create_row_mapper_when_mapping_is_a_list_and_mapped() {
Config config = TestConfigUtils.createTestConfig("dsbulk.schema", "mapping", "\"\\\"COL 2\\\", c1\", ", "keyspace", "ks", "table", "t1");
SchemaSettings settings = new SchemaSettings(config, READ_AND_MAP);
settings.init(session, codecFactory, false, true);
ReadResultMapper mapper = settings.createReadResultMapper(session, recordMetadata, codecFactory, true);
assertThat(mapper).isNotNull();
ArgumentCaptor<String> argument = ArgumentCaptor.forClass(String.class);
verify(session).prepare(argument.capture());
assertThat(argument.getValue()).isEqualTo("SELECT \"COL 2\", c1 FROM ks.t1 WHERE token(c1) > :start AND token(c1) <= :end");
assertMapping(mapper, C1, C1, C2, C2);
}
use of com.datastax.oss.dsbulk.workflow.commons.schema.ReadResultMapper in project dsbulk by datastax.
the class SchemaSettingsTest method should_create_row_mapper_when_mapping_keyspace_and_table_provided.
@Test
void should_create_row_mapper_when_mapping_keyspace_and_table_provided() {
Config config = TestConfigUtils.createTestConfig("dsbulk.schema", "mapping", "\" 0 = \\\"COL 2\\\" , 2 = c1 \", ", "keyspace", "ks", "table", "t1");
SchemaSettings settings = new SchemaSettings(config, READ_AND_MAP);
settings.init(session, codecFactory, true, false);
ReadResultMapper mapper = settings.createReadResultMapper(session, recordMetadata, codecFactory, true);
assertThat(mapper).isNotNull();
ArgumentCaptor<String> argument = ArgumentCaptor.forClass(String.class);
verify(session).prepare(argument.capture());
assertThat(argument.getValue()).isEqualTo("SELECT \"COL 2\", c1 FROM ks.t1 WHERE token(c1) > :start AND token(c1) <= :end");
assertMapping(mapper, "0", C2, "2", C1);
}
Aggregations