use of com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory in project dsbulk by datastax.
the class StringToByteCodecTest method setUp.
@BeforeEach
void setUp() {
ConversionContext context = new TextConversionContext().setNullStrings("NULL");
ConvertingCodecFactory codecFactory = new ConvertingCodecFactory(context);
codec = (StringToByteCodec) codecFactory.<String, Byte>createConvertingCodec(DataTypes.TINYINT, GenericType.STRING, true);
}
use of com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory in project dsbulk by datastax.
the class StringToDoubleCodecTest method setUp.
@BeforeEach
void setUp() {
ConversionContext context = new TextConversionContext().setNullStrings("NULL").setFormatNumbers(true).setRoundingMode(HALF_EVEN);
ConvertingCodecFactory codecFactory = new ConvertingCodecFactory(context);
codec = (StringToDoubleCodec) codecFactory.<String, Double>createConvertingCodec(DataTypes.DOUBLE, GenericType.STRING, true);
}
use of com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory in project dsbulk by datastax.
the class LoadWorkflow method init.
@Override
public void init() throws Exception {
settingsManager.init("LOAD", true, SchemaGenerationStrategy.MAP_AND_WRITE);
executionId = settingsManager.getExecutionId();
LogSettings logSettings = settingsManager.getLogSettings();
logSettings.init();
ConnectorSettings connectorSettings = settingsManager.getConnectorSettings();
connectorSettings.init(logSettings.isSources());
connector = connectorSettings.getConnector();
connector.init();
DriverSettings driverSettings = settingsManager.getDriverSettings();
SchemaSettings schemaSettings = settingsManager.getSchemaSettings();
BatchSettings batchSettings = settingsManager.getBatchSettings();
ExecutorSettings executorSettings = settingsManager.getExecutorSettings();
CodecSettings codecSettings = settingsManager.getCodecSettings();
MonitoringSettings monitoringSettings = settingsManager.getMonitoringSettings();
engineSettings = settingsManager.getEngineSettings();
driverSettings.init(true);
logSettings.logEffectiveSettings(settingsManager.getEffectiveBulkLoaderConfig(), driverSettings.getDriverConfig());
monitoringSettings.init();
codecSettings.init();
executorSettings.init();
engineSettings.init();
ConvertingCodecFactory codecFactory = codecSettings.createCodecFactory(schemaSettings.isAllowExtraFields(), schemaSettings.isAllowMissingFields());
session = driverSettings.newSession(executionId, codecFactory.getCodecRegistry(), monitoringSettings.getRegistry());
ClusterInformationUtils.printDebugInfoAboutCluster(session);
schemaSettings.init(session, codecFactory, connector.supports(CommonConnectorFeature.INDEXED_RECORDS), connector.supports(CommonConnectorFeature.MAPPED_RECORDS));
logManager = logSettings.newLogManager(session, true);
logManager.init();
batchSettings.init();
batchingEnabled = batchSettings.isBatchingEnabled();
batchBufferSize = batchSettings.getBufferSize();
RecordMapper recordMapper;
try {
recordMapper = schemaSettings.createRecordMapper(session, connector.getRecordMetadata(), batchingEnabled);
} catch (NestedBatchException e) {
LOGGER.warn(e.getMessage());
batchingEnabled = false;
recordMapper = schemaSettings.createRecordMapper(session, connector.getRecordMetadata(), false);
}
mapper = recordMapper::map;
if (batchingEnabled) {
batcher = batchSettings.newStatementBatcher(session)::batchByGroupingKey;
}
metricsManager = monitoringSettings.newMetricsManager(true, batchingEnabled, logManager.getOperationDirectory(), logSettings.getVerbosity(), session.getContext().getProtocolVersion(), session.getContext().getCodecRegistry(), schemaSettings.getRowType());
metricsManager.init();
executor = executorSettings.newWriteExecutor(session, metricsManager.getExecutionListener());
dryRun = engineSettings.isDryRun();
if (dryRun) {
LOGGER.info("Dry-run mode enabled.");
}
closed.set(false);
totalItemsMonitor = metricsManager.newTotalItemsMonitor();
failedRecordsMonitor = metricsManager.newFailedItemsMonitor();
failedStatementsMonitor = metricsManager.newFailedItemsMonitor();
batcherMonitor = metricsManager.newBatcherMonitor();
totalItemsCounter = logManager.newTotalItemsCounter();
failedRecordsHandler = logManager.newFailedRecordsHandler();
unmappableStatementsHandler = logManager.newUnmappableStatementsHandler();
queryWarningsHandler = logManager.newQueryWarningsHandler();
failedWritesHandler = logManager.newFailedWritesHandler();
resultPositionsHndler = logManager.newResultPositionsHandler();
terminationHandler = logManager.newTerminationHandler();
numCores = Runtime.getRuntime().availableProcessors();
if (connector.readConcurrency() < 1) {
throw new IllegalArgumentException("Invalid read concurrency: " + connector.readConcurrency());
}
readConcurrency = connector.readConcurrency();
hasManyReaders = readConcurrency >= Math.max(4, numCores / 4);
LOGGER.debug("Using read concurrency: {}", readConcurrency);
writeConcurrency = engineSettings.getMaxConcurrentQueries().orElseGet(this::determineWriteConcurrency);
LOGGER.debug("Using write concurrency: {} (user-supplied: {})", writeConcurrency, engineSettings.getMaxConcurrentQueries().isPresent());
}
use of com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory in project dsbulk by datastax.
the class UnloadWorkflow method init.
@Override
public void init() throws Exception {
settingsManager.init("UNLOAD", false, SchemaGenerationStrategy.READ_AND_MAP);
executionId = settingsManager.getExecutionId();
LogSettings logSettings = settingsManager.getLogSettings();
DriverSettings driverSettings = settingsManager.getDriverSettings();
ConnectorSettings connectorSettings = settingsManager.getConnectorSettings();
SchemaSettings schemaSettings = settingsManager.getSchemaSettings();
ExecutorSettings executorSettings = settingsManager.getExecutorSettings();
CodecSettings codecSettings = settingsManager.getCodecSettings();
MonitoringSettings monitoringSettings = settingsManager.getMonitoringSettings();
EngineSettings engineSettings = settingsManager.getEngineSettings();
engineSettings.init();
// First verify that dry-run is off; that's unsupported for unload.
if (engineSettings.isDryRun()) {
throw new IllegalArgumentException("Dry-run is not supported for unload");
}
// No logs should be produced until the following statement returns
logSettings.init();
connectorSettings.init(false);
connector = connectorSettings.getConnector();
connector.init();
driverSettings.init(false);
logSettings.logEffectiveSettings(settingsManager.getEffectiveBulkLoaderConfig(), driverSettings.getDriverConfig());
codecSettings.init();
monitoringSettings.init();
executorSettings.init();
ConvertingCodecFactory codecFactory = codecSettings.createCodecFactory(schemaSettings.isAllowExtraFields(), schemaSettings.isAllowMissingFields());
session = driverSettings.newSession(executionId, codecFactory.getCodecRegistry(), monitoringSettings.getRegistry());
ClusterInformationUtils.printDebugInfoAboutCluster(session);
schemaSettings.init(session, codecFactory, connector.supports(CommonConnectorFeature.INDEXED_RECORDS), connector.supports(CommonConnectorFeature.MAPPED_RECORDS));
logManager = logSettings.newLogManager(session, false);
logManager.init();
metricsManager = monitoringSettings.newMetricsManager(false, false, logManager.getOperationDirectory(), logSettings.getVerbosity(), session.getContext().getProtocolVersion(), session.getContext().getCodecRegistry(), schemaSettings.getRowType());
metricsManager.init();
RecordMetadata recordMetadata = connector.getRecordMetadata();
readResultMapper = schemaSettings.createReadResultMapper(session, recordMetadata, codecFactory, logSettings.isSources());
readStatements = schemaSettings.createReadStatements(session);
executor = executorSettings.newReadExecutor(session, metricsManager.getExecutionListener(), schemaSettings.isSearchQuery());
closed.set(false);
writer = connector.write();
totalItemsMonitor = metricsManager.newTotalItemsMonitor();
failedRecordsMonitor = metricsManager.newFailedItemsMonitor();
failedReadResultsMonitor = metricsManager.newFailedItemsMonitor();
failedRecordsHandler = logManager.newFailedRecordsHandler();
totalItemsCounter = logManager.newTotalItemsCounter();
failedReadsHandler = logManager.newFailedReadsHandler();
queryWarningsHandler = logManager.newQueryWarningsHandler();
unmappableRecordsHandler = logManager.newUnmappableRecordsHandler();
terminationHandler = logManager.newTerminationHandler();
numCores = Runtime.getRuntime().availableProcessors();
if (connector.writeConcurrency() < 1) {
throw new IllegalArgumentException("Invalid write concurrency: " + connector.writeConcurrency());
}
writeConcurrency = connector.writeConcurrency();
LOGGER.debug("Using write concurrency: {}", writeConcurrency);
readConcurrency = Math.min(readStatements.size(), // a good readConcurrency is then numCores.
engineSettings.getMaxConcurrentQueries().orElse(numCores));
LOGGER.debug("Using read concurrency: {} (user-supplied: {})", readConcurrency, engineSettings.getMaxConcurrentQueries().isPresent());
schedulers = new HashSet<>();
}
use of com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory in project dsbulk by datastax.
the class CountWorkflow method init.
@Override
public void init() throws Exception {
settingsManager.init("COUNT", false, SchemaGenerationStrategy.READ_AND_COUNT);
executionId = settingsManager.getExecutionId();
LogSettings logSettings = settingsManager.getLogSettings();
DriverSettings driverSettings = settingsManager.getDriverSettings();
SchemaSettings schemaSettings = settingsManager.getSchemaSettings();
ExecutorSettings executorSettings = settingsManager.getExecutorSettings();
CodecSettings codecSettings = settingsManager.getCodecSettings();
MonitoringSettings monitoringSettings = settingsManager.getMonitoringSettings();
EngineSettings engineSettings = settingsManager.getEngineSettings();
StatsSettings statsSettings = settingsManager.getStatsSettings();
engineSettings.init();
// First verify that dry-run is off; that's unsupported for count.
if (engineSettings.isDryRun()) {
throw new IllegalArgumentException("Dry-run is not supported for count");
}
logSettings.init();
driverSettings.init(false);
logSettings.logEffectiveSettings(settingsManager.getEffectiveBulkLoaderConfig(), driverSettings.getDriverConfig());
codecSettings.init();
monitoringSettings.init();
executorSettings.init();
statsSettings.init();
ConvertingCodecFactory codecFactory = codecSettings.createCodecFactory(schemaSettings.isAllowExtraFields(), schemaSettings.isAllowMissingFields());
session = driverSettings.newSession(executionId, codecFactory.getCodecRegistry(), monitoringSettings.getRegistry());
ClusterInformationUtils.printDebugInfoAboutCluster(session);
schemaSettings.init(session, codecFactory, false, false);
logManager = logSettings.newLogManager(session, false);
logManager.init();
metricsManager = monitoringSettings.newMetricsManager(false, false, logManager.getOperationDirectory(), logSettings.getVerbosity(), session.getContext().getProtocolVersion(), session.getContext().getCodecRegistry(), schemaSettings.getRowType());
metricsManager.init();
executor = executorSettings.newReadExecutor(session, metricsManager.getExecutionListener(), false);
EnumSet<StatsSettings.StatisticsMode> modes = statsSettings.getStatisticsModes();
int numPartitions = statsSettings.getNumPartitions();
readResultCounter = schemaSettings.createReadResultCounter(session, codecFactory, modes, numPartitions);
readStatements = schemaSettings.createReadStatements(session);
closed.set(false);
success = false;
totalItemsMonitor = metricsManager.newTotalItemsMonitor();
failedItemsMonitor = metricsManager.newFailedItemsMonitor();
totalItemsCounter = logManager.newTotalItemsCounter();
failedReadsHandler = logManager.newFailedReadsHandler();
queryWarningsHandler = logManager.newQueryWarningsHandler();
terminationHandler = logManager.newTerminationHandler();
int numCores = Runtime.getRuntime().availableProcessors();
readConcurrency = Math.min(readStatements.size(), engineSettings.getMaxConcurrentQueries().orElse(numCores));
LOGGER.debug("Using read concurrency: {} (user-supplied: {})", readConcurrency, engineSettings.getMaxConcurrentQueries().isPresent());
int numThreads = Math.min(readConcurrency, numCores);
scheduler = Schedulers.newParallel(numThreads, new DefaultThreadFactory("workflow"));
}
Aggregations