Search in sources :

Example 1 with StatisticsMode

use of com.datastax.oss.dsbulk.workflow.commons.settings.StatsSettings.StatisticsMode in project dsbulk by datastax.

the class SchemaSettings method prepareStatementAndCreateMapping.

@NonNull
private Mapping prepareStatementAndCreateMapping(CqlSession session, boolean batchingEnabled, EnumSet<StatisticsMode> modes) {
    ImmutableMultimap<MappingField, CQLFragment> fieldsToVariables = null;
    if (!config.hasPath(QUERY)) {
        // in the absence of user-provided queries, create the mapping *before* query generation and
        // preparation
        List<CQLFragment> columns = table.getColumns().values().stream().filter(col -> !isDSESearchPseudoColumn(col)).flatMap(column -> {
            CQLWord colName = CQLWord.fromCqlIdentifier(column.getName());
            List<CQLFragment> cols = Lists.newArrayList(colName);
            if (schemaGenerationStrategy.isMapping()) {
                if (preserveTimestamp && checkWritetimeTtlSupported(column, WRITETIME)) {
                    cols.add(new FunctionCall(null, WRITETIME, colName));
                }
                if (preserveTtl && checkWritetimeTtlSupported(column, TTL)) {
                    cols.add(new FunctionCall(null, TTL, colName));
                }
            }
            return cols.stream();
        }).collect(Collectors.toList());
        fieldsToVariables = createFieldsToVariablesMap(columns);
        // query generation
        if (schemaGenerationStrategy.isWriting()) {
            if (isCounterTable()) {
                query = inferUpdateCounterQuery(fieldsToVariables);
            } else if (requiresBatchInsertQuery(fieldsToVariables)) {
                query = inferBatchInsertQuery(fieldsToVariables);
            } else {
                query = inferInsertQuery(fieldsToVariables);
            }
        } else if (schemaGenerationStrategy.isReading() && schemaGenerationStrategy.isMapping()) {
            query = inferReadQuery(fieldsToVariables);
        } else if (schemaGenerationStrategy.isReading() && schemaGenerationStrategy.isCounting()) {
            query = inferCountQuery(modes);
        } else {
            throw new IllegalStateException("Unsupported schema generation strategy: " + schemaGenerationStrategy);
        }
        LOGGER.debug("Inferred query: {}", query);
        queryInspector = new QueryInspector(query);
        // validate generated query
        if (schemaGenerationStrategy.isWriting()) {
            validatePrimaryKeyPresent(fieldsToVariables);
        }
    }
    assert query != null;
    assert queryInspector != null;
    if (!queryInspector.getKeyspaceName().isPresent()) {
        session.execute("USE " + keyspaceName);
    }
    // Transform user-provided queries before preparation
    if (config.hasPath(QUERY)) {
        if (schemaGenerationStrategy.isReading() && queryInspector.isParallelizable()) {
            int whereClauseIndex = queryInspector.getFromClauseEndIndex() + 1;
            StringBuilder sb = new StringBuilder(query.substring(0, whereClauseIndex));
            appendTokenRangeRestriction(sb);
            query = sb.append(query.substring(whereClauseIndex)).toString();
        }
        if (schemaGenerationStrategy.isCounting()) {
            if (modes.contains(StatisticsMode.partitions) || modes.contains(StatisticsMode.ranges) || modes.contains(StatisticsMode.hosts)) {
                throw new IllegalArgumentException(String.format("Cannot count with stats.modes = %s when schema.query is provided; " + "only stats.modes = [global] is allowed", modes));
            }
            // reduce row size by only selecting one column
            StringBuilder sb = new StringBuilder("SELECT ");
            sb.append(getGlobalCountSelector());
            query = sb.append(' ').append(query.substring(queryInspector.getFromClauseStartIndex())).toString();
        }
        queryInspector = new QueryInspector(query);
    }
    if (batchingEnabled && queryInspector.isBatch()) {
        preparedStatements = unwrapAndPrepareBatchChildStatements(session);
    } else {
        preparedStatements = Collections.singletonList(session.prepare(query));
    }
    if (config.hasPath(QUERY)) {
        // in the presence of user-provided queries, create the mapping *after* query preparation
        Stream<ColumnDefinitions> variables = getVariables();
        fieldsToVariables = createFieldsToVariablesMap(variables.flatMap(defs -> StreamSupport.stream(defs.spliterator(), false)).map(def -> def.getName().asInternal()).map(CQLWord::fromInternal).collect(Collectors.toList()));
        // validate user-provided query
        if (schemaGenerationStrategy.isWriting()) {
            if (mutatesOnlyStaticColumns()) {
                // DAT-414: mutations that only affect static columns are allowed
                // to skip the clustering columns, only the partition key should be present.
                validatePartitionKeyPresent(fieldsToVariables);
            } else {
                validatePrimaryKeyPresent(fieldsToVariables);
            }
        }
    }
    assert fieldsToVariables != null;
    return new DefaultMapping(transformFieldsToVariables(fieldsToVariables), codecFactory, transformWriteTimeVariables(queryInspector.getWriteTimeVariables()));
}
Also used : MAPPED_OR_INDEXED(com.datastax.oss.dsbulk.mapping.MappingPreference.MAPPED_OR_INDEXED) CqlIdentifier(com.datastax.oss.driver.api.core.CqlIdentifier) MAPPED_ONLY(com.datastax.oss.dsbulk.mapping.MappingPreference.MAPPED_ONLY) DefaultMapping(com.datastax.oss.dsbulk.mapping.DefaultMapping) GenericType(com.datastax.oss.driver.api.core.type.reflect.GenericType) TokenRangeReadStatementGenerator(com.datastax.oss.dsbulk.partitioner.TokenRangeReadStatementGenerator) ConfigUtils(com.datastax.oss.dsbulk.config.ConfigUtils) ALIASED_SELECTOR(com.datastax.oss.dsbulk.mapping.CQLRenderMode.ALIASED_SELECTOR) BatchType(com.datastax.oss.driver.api.core.cql.BatchType) Map(java.util.Map) VisibleForTesting(com.datastax.oss.driver.shaded.guava.common.annotations.VisibleForTesting) ProtocolVersion(com.datastax.oss.driver.api.core.ProtocolVersion) EnumSet(java.util.EnumSet) CQLRenderMode(com.datastax.oss.dsbulk.mapping.CQLRenderMode) VARIABLE(com.datastax.oss.dsbulk.mapping.CQLRenderMode.VARIABLE) MapType(com.datastax.oss.driver.api.core.type.MapType) GraphUtils(com.datastax.oss.dsbulk.workflow.commons.utils.GraphUtils) Set(java.util.Set) ImmutableList(com.datastax.oss.driver.shaded.guava.common.collect.ImmutableList) Field(com.datastax.oss.dsbulk.connectors.api.Field) Stream(java.util.stream.Stream) ConfigException(com.typesafe.config.ConfigException) CQLWord(com.datastax.oss.dsbulk.mapping.CQLWord) INDEXED_ONLY(com.datastax.oss.dsbulk.mapping.MappingPreference.INDEXED_ONLY) MICROSECONDS(java.util.concurrent.TimeUnit.MICROSECONDS) CQLLiteral(com.datastax.oss.dsbulk.mapping.CQLLiteral) ImmutableSet(com.datastax.oss.driver.shaded.guava.common.collect.ImmutableSet) Preconditions(com.datastax.oss.driver.shaded.guava.common.base.Preconditions) STAR(com.datastax.oss.dsbulk.mapping.MappingInspector.STAR) ViewMetadata(com.datastax.oss.driver.api.core.metadata.schema.ViewMetadata) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) CqlSession(com.datastax.oss.driver.api.core.CqlSession) DefaultReadResultMapper(com.datastax.oss.dsbulk.workflow.commons.schema.DefaultReadResultMapper) FunctionCall(com.datastax.oss.dsbulk.mapping.FunctionCall) StreamSupport(java.util.stream.StreamSupport) Metadata(com.datastax.oss.driver.api.core.metadata.Metadata) LinkedHashSet(java.util.LinkedHashSet) DseEdgeMetadata(com.datastax.dse.driver.api.core.metadata.schema.DseEdgeMetadata) TableMetadata(com.datastax.oss.driver.api.core.metadata.schema.TableMetadata) RelationMetadata(com.datastax.oss.driver.api.core.metadata.schema.RelationMetadata) MappingPreference(com.datastax.oss.dsbulk.mapping.MappingPreference) Config(com.typesafe.config.Config) NestedBatchException(com.datastax.oss.dsbulk.workflow.commons.schema.NestedBatchException) DataType(com.datastax.oss.driver.api.core.type.DataType) ConvertingCodecFactory(com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory) DefaultRecordMapper(com.datastax.oss.dsbulk.workflow.commons.schema.DefaultRecordMapper) RecordMapper(com.datastax.oss.dsbulk.workflow.commons.schema.RecordMapper) DseTableMetadata(com.datastax.dse.driver.api.core.metadata.schema.DseTableMetadata) CQLFragment(com.datastax.oss.dsbulk.mapping.CQLFragment) WRITETIME(com.datastax.oss.dsbulk.mapping.MappingInspector.WRITETIME) ReadResultMapper(com.datastax.oss.dsbulk.workflow.commons.schema.ReadResultMapper) DseVertexMetadata(com.datastax.dse.driver.api.core.metadata.schema.DseVertexMetadata) Nullable(edu.umd.cs.findbugs.annotations.Nullable) StatisticsMode(com.datastax.oss.dsbulk.workflow.commons.settings.StatsSettings.StatisticsMode) DefaultReadResultCounter(com.datastax.oss.dsbulk.workflow.commons.schema.DefaultReadResultCounter) CodecUtils.instantToNumber(com.datastax.oss.dsbulk.codecs.api.util.CodecUtils.instantToNumber) IndexedMappingField(com.datastax.oss.dsbulk.mapping.IndexedMappingField) LoggerFactory(org.slf4j.LoggerFactory) MappingField(com.datastax.oss.dsbulk.mapping.MappingField) QueryInspector(com.datastax.oss.dsbulk.workflow.commons.schema.QueryInspector) Mapping(com.datastax.oss.dsbulk.mapping.Mapping) ReadResultCounter(com.datastax.oss.dsbulk.workflow.commons.schema.ReadResultCounter) Lists(com.datastax.oss.driver.shaded.guava.common.collect.Lists) ConvertingCodec(com.datastax.oss.dsbulk.codecs.api.ConvertingCodec) NonNull(edu.umd.cs.findbugs.annotations.NonNull) Predicates(com.datastax.oss.driver.shaded.guava.common.base.Predicates) URI(java.net.URI) IndexMetadata(com.datastax.oss.driver.api.core.metadata.schema.IndexMetadata) Record(com.datastax.oss.dsbulk.connectors.api.Record) ImmutableMultimap(com.datastax.oss.driver.shaded.guava.common.collect.ImmutableMultimap) Predicate(java.util.function.Predicate) Collection(java.util.Collection) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) NAMED_ASSIGNMENT(com.datastax.oss.dsbulk.mapping.CQLRenderMode.NAMED_ASSIGNMENT) Objects(java.util.Objects) KeyspaceMetadata(com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata) DataTypes(com.datastax.oss.driver.api.core.type.DataTypes) List(java.util.List) Entry(java.util.Map.Entry) Optional(java.util.Optional) SetType(com.datastax.oss.driver.api.core.type.SetType) TIMESTAMP_PATTERN(com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.TIMESTAMP_PATTERN) MappingInspector(com.datastax.oss.dsbulk.mapping.MappingInspector) DseGraphKeyspaceMetadata(com.datastax.dse.driver.api.core.metadata.schema.DseGraphKeyspaceMetadata) ListType(com.datastax.oss.driver.api.core.type.ListType) HashSet(java.util.HashSet) RecordMetadata(com.datastax.oss.dsbulk.connectors.api.RecordMetadata) ColumnDefinitions(com.datastax.oss.driver.api.core.cql.ColumnDefinitions) MappedMappingField(com.datastax.oss.dsbulk.mapping.MappedMappingField) ImmutableSetMultimap(com.datastax.oss.driver.shaded.guava.common.collect.ImmutableSetMultimap) INTERNAL(com.datastax.oss.dsbulk.mapping.CQLRenderMode.INTERNAL) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) TypedCQLLiteral(com.datastax.oss.dsbulk.mapping.TypedCQLLiteral) PreparedStatement(com.datastax.oss.driver.api.core.cql.PreparedStatement) DseGraphTableMetadata(com.datastax.dse.driver.api.core.metadata.schema.DseGraphTableMetadata) ColumnMetadata(com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata) DefaultProtocolVersion(com.datastax.oss.driver.api.core.DefaultProtocolVersion) UserDefinedType(com.datastax.oss.driver.api.core.type.UserDefinedType) TTL(com.datastax.oss.dsbulk.mapping.MappingInspector.TTL) Multimap(com.datastax.oss.driver.shaded.guava.common.collect.Multimap) EPOCH(java.time.Instant.EPOCH) Collections(java.util.Collections) Statement(com.datastax.oss.driver.api.core.cql.Statement) ColumnDefinitions(com.datastax.oss.driver.api.core.cql.ColumnDefinitions) DefaultMapping(com.datastax.oss.dsbulk.mapping.DefaultMapping) CQLFragment(com.datastax.oss.dsbulk.mapping.CQLFragment) IndexedMappingField(com.datastax.oss.dsbulk.mapping.IndexedMappingField) MappingField(com.datastax.oss.dsbulk.mapping.MappingField) MappedMappingField(com.datastax.oss.dsbulk.mapping.MappedMappingField) QueryInspector(com.datastax.oss.dsbulk.workflow.commons.schema.QueryInspector) CQLWord(com.datastax.oss.dsbulk.mapping.CQLWord) ImmutableList(com.datastax.oss.driver.shaded.guava.common.collect.ImmutableList) ArrayList(java.util.ArrayList) List(java.util.List) FunctionCall(com.datastax.oss.dsbulk.mapping.FunctionCall) NonNull(edu.umd.cs.findbugs.annotations.NonNull)

Aggregations

DseEdgeMetadata (com.datastax.dse.driver.api.core.metadata.schema.DseEdgeMetadata)1 DseGraphKeyspaceMetadata (com.datastax.dse.driver.api.core.metadata.schema.DseGraphKeyspaceMetadata)1 DseGraphTableMetadata (com.datastax.dse.driver.api.core.metadata.schema.DseGraphTableMetadata)1 DseTableMetadata (com.datastax.dse.driver.api.core.metadata.schema.DseTableMetadata)1 DseVertexMetadata (com.datastax.dse.driver.api.core.metadata.schema.DseVertexMetadata)1 CqlIdentifier (com.datastax.oss.driver.api.core.CqlIdentifier)1 CqlSession (com.datastax.oss.driver.api.core.CqlSession)1 DefaultProtocolVersion (com.datastax.oss.driver.api.core.DefaultProtocolVersion)1 ProtocolVersion (com.datastax.oss.driver.api.core.ProtocolVersion)1 BatchType (com.datastax.oss.driver.api.core.cql.BatchType)1 ColumnDefinitions (com.datastax.oss.driver.api.core.cql.ColumnDefinitions)1 PreparedStatement (com.datastax.oss.driver.api.core.cql.PreparedStatement)1 Statement (com.datastax.oss.driver.api.core.cql.Statement)1 Metadata (com.datastax.oss.driver.api.core.metadata.Metadata)1 ColumnMetadata (com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata)1 IndexMetadata (com.datastax.oss.driver.api.core.metadata.schema.IndexMetadata)1 KeyspaceMetadata (com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata)1 RelationMetadata (com.datastax.oss.driver.api.core.metadata.schema.RelationMetadata)1 TableMetadata (com.datastax.oss.driver.api.core.metadata.schema.TableMetadata)1 ViewMetadata (com.datastax.oss.driver.api.core.metadata.schema.ViewMetadata)1