Search in sources :

Example 16 with ConvertingCodecFactory

use of com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory in project dsbulk by datastax.

the class StringToByteCodecTest method setUp.

@BeforeEach
void setUp() {
    ConversionContext context = new TextConversionContext().setNullStrings("NULL");
    ConvertingCodecFactory codecFactory = new ConvertingCodecFactory(context);
    codec = (StringToByteCodec) codecFactory.<String, Byte>createConvertingCodec(DataTypes.TINYINT, GenericType.STRING, true);
}
Also used : ConvertingCodecFactory(com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory) TextConversionContext(com.datastax.oss.dsbulk.codecs.text.TextConversionContext) TextConversionContext(com.datastax.oss.dsbulk.codecs.text.TextConversionContext) ConversionContext(com.datastax.oss.dsbulk.codecs.api.ConversionContext) BeforeEach(org.junit.jupiter.api.BeforeEach)

Example 17 with ConvertingCodecFactory

use of com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory in project dsbulk by datastax.

the class StringToDoubleCodecTest method setUp.

@BeforeEach
void setUp() {
    ConversionContext context = new TextConversionContext().setNullStrings("NULL").setFormatNumbers(true).setRoundingMode(HALF_EVEN);
    ConvertingCodecFactory codecFactory = new ConvertingCodecFactory(context);
    codec = (StringToDoubleCodec) codecFactory.<String, Double>createConvertingCodec(DataTypes.DOUBLE, GenericType.STRING, true);
}
Also used : ConvertingCodecFactory(com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory) TextConversionContext(com.datastax.oss.dsbulk.codecs.text.TextConversionContext) TextConversionContext(com.datastax.oss.dsbulk.codecs.text.TextConversionContext) ConversionContext(com.datastax.oss.dsbulk.codecs.api.ConversionContext) BeforeEach(org.junit.jupiter.api.BeforeEach)

Example 18 with ConvertingCodecFactory

use of com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory in project dsbulk by datastax.

the class LoadWorkflow method init.

@Override
public void init() throws Exception {
    settingsManager.init("LOAD", true, SchemaGenerationStrategy.MAP_AND_WRITE);
    executionId = settingsManager.getExecutionId();
    LogSettings logSettings = settingsManager.getLogSettings();
    logSettings.init();
    ConnectorSettings connectorSettings = settingsManager.getConnectorSettings();
    connectorSettings.init(logSettings.isSources());
    connector = connectorSettings.getConnector();
    connector.init();
    DriverSettings driverSettings = settingsManager.getDriverSettings();
    SchemaSettings schemaSettings = settingsManager.getSchemaSettings();
    BatchSettings batchSettings = settingsManager.getBatchSettings();
    ExecutorSettings executorSettings = settingsManager.getExecutorSettings();
    CodecSettings codecSettings = settingsManager.getCodecSettings();
    MonitoringSettings monitoringSettings = settingsManager.getMonitoringSettings();
    engineSettings = settingsManager.getEngineSettings();
    driverSettings.init(true);
    logSettings.logEffectiveSettings(settingsManager.getEffectiveBulkLoaderConfig(), driverSettings.getDriverConfig());
    monitoringSettings.init();
    codecSettings.init();
    executorSettings.init();
    engineSettings.init();
    ConvertingCodecFactory codecFactory = codecSettings.createCodecFactory(schemaSettings.isAllowExtraFields(), schemaSettings.isAllowMissingFields());
    session = driverSettings.newSession(executionId, codecFactory.getCodecRegistry(), monitoringSettings.getRegistry());
    ClusterInformationUtils.printDebugInfoAboutCluster(session);
    schemaSettings.init(session, codecFactory, connector.supports(CommonConnectorFeature.INDEXED_RECORDS), connector.supports(CommonConnectorFeature.MAPPED_RECORDS));
    logManager = logSettings.newLogManager(session, true);
    logManager.init();
    batchSettings.init();
    batchingEnabled = batchSettings.isBatchingEnabled();
    batchBufferSize = batchSettings.getBufferSize();
    RecordMapper recordMapper;
    try {
        recordMapper = schemaSettings.createRecordMapper(session, connector.getRecordMetadata(), batchingEnabled);
    } catch (NestedBatchException e) {
        LOGGER.warn(e.getMessage());
        batchingEnabled = false;
        recordMapper = schemaSettings.createRecordMapper(session, connector.getRecordMetadata(), false);
    }
    mapper = recordMapper::map;
    if (batchingEnabled) {
        batcher = batchSettings.newStatementBatcher(session)::batchByGroupingKey;
    }
    metricsManager = monitoringSettings.newMetricsManager(true, batchingEnabled, logManager.getOperationDirectory(), logSettings.getVerbosity(), session.getContext().getProtocolVersion(), session.getContext().getCodecRegistry(), schemaSettings.getRowType());
    metricsManager.init();
    executor = executorSettings.newWriteExecutor(session, metricsManager.getExecutionListener());
    dryRun = engineSettings.isDryRun();
    if (dryRun) {
        LOGGER.info("Dry-run mode enabled.");
    }
    closed.set(false);
    totalItemsMonitor = metricsManager.newTotalItemsMonitor();
    failedRecordsMonitor = metricsManager.newFailedItemsMonitor();
    failedStatementsMonitor = metricsManager.newFailedItemsMonitor();
    batcherMonitor = metricsManager.newBatcherMonitor();
    totalItemsCounter = logManager.newTotalItemsCounter();
    failedRecordsHandler = logManager.newFailedRecordsHandler();
    unmappableStatementsHandler = logManager.newUnmappableStatementsHandler();
    queryWarningsHandler = logManager.newQueryWarningsHandler();
    failedWritesHandler = logManager.newFailedWritesHandler();
    resultPositionsHndler = logManager.newResultPositionsHandler();
    terminationHandler = logManager.newTerminationHandler();
    numCores = Runtime.getRuntime().availableProcessors();
    if (connector.readConcurrency() < 1) {
        throw new IllegalArgumentException("Invalid read concurrency: " + connector.readConcurrency());
    }
    readConcurrency = connector.readConcurrency();
    hasManyReaders = readConcurrency >= Math.max(4, numCores / 4);
    LOGGER.debug("Using read concurrency: {}", readConcurrency);
    writeConcurrency = engineSettings.getMaxConcurrentQueries().orElseGet(this::determineWriteConcurrency);
    LOGGER.debug("Using write concurrency: {} (user-supplied: {})", writeConcurrency, engineSettings.getMaxConcurrentQueries().isPresent());
}
Also used : ExecutorSettings(com.datastax.oss.dsbulk.workflow.commons.settings.ExecutorSettings) ConvertingCodecFactory(com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory) LogSettings(com.datastax.oss.dsbulk.workflow.commons.settings.LogSettings) RecordMapper(com.datastax.oss.dsbulk.workflow.commons.schema.RecordMapper) NestedBatchException(com.datastax.oss.dsbulk.workflow.commons.schema.NestedBatchException) MonitoringSettings(com.datastax.oss.dsbulk.workflow.commons.settings.MonitoringSettings) ConnectorSettings(com.datastax.oss.dsbulk.workflow.commons.settings.ConnectorSettings) SchemaSettings(com.datastax.oss.dsbulk.workflow.commons.settings.SchemaSettings) CodecSettings(com.datastax.oss.dsbulk.workflow.commons.settings.CodecSettings) DriverSettings(com.datastax.oss.dsbulk.workflow.commons.settings.DriverSettings) BatchSettings(com.datastax.oss.dsbulk.workflow.commons.settings.BatchSettings)

Example 19 with ConvertingCodecFactory

use of com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory in project dsbulk by datastax.

the class UnloadWorkflow method init.

@Override
public void init() throws Exception {
    settingsManager.init("UNLOAD", false, SchemaGenerationStrategy.READ_AND_MAP);
    executionId = settingsManager.getExecutionId();
    LogSettings logSettings = settingsManager.getLogSettings();
    DriverSettings driverSettings = settingsManager.getDriverSettings();
    ConnectorSettings connectorSettings = settingsManager.getConnectorSettings();
    SchemaSettings schemaSettings = settingsManager.getSchemaSettings();
    ExecutorSettings executorSettings = settingsManager.getExecutorSettings();
    CodecSettings codecSettings = settingsManager.getCodecSettings();
    MonitoringSettings monitoringSettings = settingsManager.getMonitoringSettings();
    EngineSettings engineSettings = settingsManager.getEngineSettings();
    engineSettings.init();
    // First verify that dry-run is off; that's unsupported for unload.
    if (engineSettings.isDryRun()) {
        throw new IllegalArgumentException("Dry-run is not supported for unload");
    }
    // No logs should be produced until the following statement returns
    logSettings.init();
    connectorSettings.init(false);
    connector = connectorSettings.getConnector();
    connector.init();
    driverSettings.init(false);
    logSettings.logEffectiveSettings(settingsManager.getEffectiveBulkLoaderConfig(), driverSettings.getDriverConfig());
    codecSettings.init();
    monitoringSettings.init();
    executorSettings.init();
    ConvertingCodecFactory codecFactory = codecSettings.createCodecFactory(schemaSettings.isAllowExtraFields(), schemaSettings.isAllowMissingFields());
    session = driverSettings.newSession(executionId, codecFactory.getCodecRegistry(), monitoringSettings.getRegistry());
    ClusterInformationUtils.printDebugInfoAboutCluster(session);
    schemaSettings.init(session, codecFactory, connector.supports(CommonConnectorFeature.INDEXED_RECORDS), connector.supports(CommonConnectorFeature.MAPPED_RECORDS));
    logManager = logSettings.newLogManager(session, false);
    logManager.init();
    metricsManager = monitoringSettings.newMetricsManager(false, false, logManager.getOperationDirectory(), logSettings.getVerbosity(), session.getContext().getProtocolVersion(), session.getContext().getCodecRegistry(), schemaSettings.getRowType());
    metricsManager.init();
    RecordMetadata recordMetadata = connector.getRecordMetadata();
    readResultMapper = schemaSettings.createReadResultMapper(session, recordMetadata, codecFactory, logSettings.isSources());
    readStatements = schemaSettings.createReadStatements(session);
    executor = executorSettings.newReadExecutor(session, metricsManager.getExecutionListener(), schemaSettings.isSearchQuery());
    closed.set(false);
    writer = connector.write();
    totalItemsMonitor = metricsManager.newTotalItemsMonitor();
    failedRecordsMonitor = metricsManager.newFailedItemsMonitor();
    failedReadResultsMonitor = metricsManager.newFailedItemsMonitor();
    failedRecordsHandler = logManager.newFailedRecordsHandler();
    totalItemsCounter = logManager.newTotalItemsCounter();
    failedReadsHandler = logManager.newFailedReadsHandler();
    queryWarningsHandler = logManager.newQueryWarningsHandler();
    unmappableRecordsHandler = logManager.newUnmappableRecordsHandler();
    terminationHandler = logManager.newTerminationHandler();
    numCores = Runtime.getRuntime().availableProcessors();
    if (connector.writeConcurrency() < 1) {
        throw new IllegalArgumentException("Invalid write concurrency: " + connector.writeConcurrency());
    }
    writeConcurrency = connector.writeConcurrency();
    LOGGER.debug("Using write concurrency: {}", writeConcurrency);
    readConcurrency = Math.min(readStatements.size(), // a good readConcurrency is then numCores.
    engineSettings.getMaxConcurrentQueries().orElse(numCores));
    LOGGER.debug("Using read concurrency: {} (user-supplied: {})", readConcurrency, engineSettings.getMaxConcurrentQueries().isPresent());
    schedulers = new HashSet<>();
}
Also used : ExecutorSettings(com.datastax.oss.dsbulk.workflow.commons.settings.ExecutorSettings) ConvertingCodecFactory(com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory) RecordMetadata(com.datastax.oss.dsbulk.connectors.api.RecordMetadata) DriverSettings(com.datastax.oss.dsbulk.workflow.commons.settings.DriverSettings) EngineSettings(com.datastax.oss.dsbulk.workflow.commons.settings.EngineSettings) LogSettings(com.datastax.oss.dsbulk.workflow.commons.settings.LogSettings) MonitoringSettings(com.datastax.oss.dsbulk.workflow.commons.settings.MonitoringSettings) ConnectorSettings(com.datastax.oss.dsbulk.workflow.commons.settings.ConnectorSettings) SchemaSettings(com.datastax.oss.dsbulk.workflow.commons.settings.SchemaSettings) CodecSettings(com.datastax.oss.dsbulk.workflow.commons.settings.CodecSettings)

Example 20 with ConvertingCodecFactory

use of com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory in project dsbulk by datastax.

the class CountWorkflow method init.

@Override
public void init() throws Exception {
    settingsManager.init("COUNT", false, SchemaGenerationStrategy.READ_AND_COUNT);
    executionId = settingsManager.getExecutionId();
    LogSettings logSettings = settingsManager.getLogSettings();
    DriverSettings driverSettings = settingsManager.getDriverSettings();
    SchemaSettings schemaSettings = settingsManager.getSchemaSettings();
    ExecutorSettings executorSettings = settingsManager.getExecutorSettings();
    CodecSettings codecSettings = settingsManager.getCodecSettings();
    MonitoringSettings monitoringSettings = settingsManager.getMonitoringSettings();
    EngineSettings engineSettings = settingsManager.getEngineSettings();
    StatsSettings statsSettings = settingsManager.getStatsSettings();
    engineSettings.init();
    // First verify that dry-run is off; that's unsupported for count.
    if (engineSettings.isDryRun()) {
        throw new IllegalArgumentException("Dry-run is not supported for count");
    }
    logSettings.init();
    driverSettings.init(false);
    logSettings.logEffectiveSettings(settingsManager.getEffectiveBulkLoaderConfig(), driverSettings.getDriverConfig());
    codecSettings.init();
    monitoringSettings.init();
    executorSettings.init();
    statsSettings.init();
    ConvertingCodecFactory codecFactory = codecSettings.createCodecFactory(schemaSettings.isAllowExtraFields(), schemaSettings.isAllowMissingFields());
    session = driverSettings.newSession(executionId, codecFactory.getCodecRegistry(), monitoringSettings.getRegistry());
    ClusterInformationUtils.printDebugInfoAboutCluster(session);
    schemaSettings.init(session, codecFactory, false, false);
    logManager = logSettings.newLogManager(session, false);
    logManager.init();
    metricsManager = monitoringSettings.newMetricsManager(false, false, logManager.getOperationDirectory(), logSettings.getVerbosity(), session.getContext().getProtocolVersion(), session.getContext().getCodecRegistry(), schemaSettings.getRowType());
    metricsManager.init();
    executor = executorSettings.newReadExecutor(session, metricsManager.getExecutionListener(), false);
    EnumSet<StatsSettings.StatisticsMode> modes = statsSettings.getStatisticsModes();
    int numPartitions = statsSettings.getNumPartitions();
    readResultCounter = schemaSettings.createReadResultCounter(session, codecFactory, modes, numPartitions);
    readStatements = schemaSettings.createReadStatements(session);
    closed.set(false);
    success = false;
    totalItemsMonitor = metricsManager.newTotalItemsMonitor();
    failedItemsMonitor = metricsManager.newFailedItemsMonitor();
    totalItemsCounter = logManager.newTotalItemsCounter();
    failedReadsHandler = logManager.newFailedReadsHandler();
    queryWarningsHandler = logManager.newQueryWarningsHandler();
    terminationHandler = logManager.newTerminationHandler();
    int numCores = Runtime.getRuntime().availableProcessors();
    readConcurrency = Math.min(readStatements.size(), engineSettings.getMaxConcurrentQueries().orElse(numCores));
    LOGGER.debug("Using read concurrency: {} (user-supplied: {})", readConcurrency, engineSettings.getMaxConcurrentQueries().isPresent());
    int numThreads = Math.min(readConcurrency, numCores);
    scheduler = Schedulers.newParallel(numThreads, new DefaultThreadFactory("workflow"));
}
Also used : ExecutorSettings(com.datastax.oss.dsbulk.workflow.commons.settings.ExecutorSettings) ConvertingCodecFactory(com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory) EngineSettings(com.datastax.oss.dsbulk.workflow.commons.settings.EngineSettings) LogSettings(com.datastax.oss.dsbulk.workflow.commons.settings.LogSettings) StatsSettings(com.datastax.oss.dsbulk.workflow.commons.settings.StatsSettings) MonitoringSettings(com.datastax.oss.dsbulk.workflow.commons.settings.MonitoringSettings) SchemaSettings(com.datastax.oss.dsbulk.workflow.commons.settings.SchemaSettings) CodecSettings(com.datastax.oss.dsbulk.workflow.commons.settings.CodecSettings) DefaultThreadFactory(io.netty.util.concurrent.DefaultThreadFactory) DriverSettings(com.datastax.oss.dsbulk.workflow.commons.settings.DriverSettings)

Aggregations

ConvertingCodecFactory (com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory)37 TextConversionContext (com.datastax.oss.dsbulk.codecs.text.TextConversionContext)29 BeforeEach (org.junit.jupiter.api.BeforeEach)29 ConversionContext (com.datastax.oss.dsbulk.codecs.api.ConversionContext)28 JsonNode (com.fasterxml.jackson.databind.JsonNode)14 Config (com.typesafe.config.Config)5 Test (org.junit.jupiter.api.Test)5 List (java.util.List)4 CodecSettings (com.datastax.oss.dsbulk.workflow.commons.settings.CodecSettings)3 DriverSettings (com.datastax.oss.dsbulk.workflow.commons.settings.DriverSettings)3 ExecutorSettings (com.datastax.oss.dsbulk.workflow.commons.settings.ExecutorSettings)3 LogSettings (com.datastax.oss.dsbulk.workflow.commons.settings.LogSettings)3 MonitoringSettings (com.datastax.oss.dsbulk.workflow.commons.settings.MonitoringSettings)3 SchemaSettings (com.datastax.oss.dsbulk.workflow.commons.settings.SchemaSettings)3 Instant (java.time.Instant)3 TupleValue (com.datastax.oss.driver.api.core.data.TupleValue)2 UdtValue (com.datastax.oss.driver.api.core.data.UdtValue)2 ImmutableMap (com.datastax.oss.driver.shaded.guava.common.collect.ImmutableMap)2 ConnectorSettings (com.datastax.oss.dsbulk.workflow.commons.settings.ConnectorSettings)2 EngineSettings (com.datastax.oss.dsbulk.workflow.commons.settings.EngineSettings)2