use of com.datastax.oss.dsbulk.mapping.Mapping in project dsbulk by datastax.
the class DefaultReadResultMapperTest method setUp.
@BeforeEach
void setUp() {
recordMetadata = new TestRecordMetadata(ImmutableMap.of(F0, GenericType.of(Integer.class), F1, GenericType.of(String.class), F2, GenericType.of(String.class)));
mapping = mock(Mapping.class);
row = mock(Row.class);
when(row.codecRegistry()).thenReturn(new DefaultCodecRegistry("test"));
result = mock(ReadResult.class);
when(result.getRow()).thenReturn(Optional.of(row));
ColumnDefinition c1 = mockColumnDefinition(C1.asIdentifier(), DataTypes.INT);
ColumnDefinition c2 = mockColumnDefinition(C2.asIdentifier(), DataTypes.TEXT);
ColumnDefinition c3 = mockColumnDefinition(C3.asIdentifier(), DataTypes.TEXT);
ColumnDefinitions variables = mockColumnDefinitions(c1, c2, c3);
when(row.getColumnDefinitions()).thenReturn(variables);
when(mapping.fields()).thenReturn(newLinkedHashSet(F0, F1, F2));
when(mapping.fieldToVariables(F0)).thenReturn(singleton(C1));
when(mapping.fieldToVariables(F1)).thenReturn(singleton(C2));
when(mapping.fieldToVariables(F2)).thenReturn(singleton(C3));
when(mapping.variableToFields(C1)).thenReturn(singleton(F0));
when(mapping.variableToFields(C2)).thenReturn(singleton(F1));
when(mapping.variableToFields(C3)).thenReturn(singleton(F2));
codec1 = TypeCodecs.INT;
TypeCodec<String> codec2 = TypeCodecs.TEXT;
when(mapping.codec(C1, DataTypes.INT, GenericType.of(Integer.class))).thenReturn(codec1);
when(mapping.codec(C2, DataTypes.TEXT, GenericType.of(String.class))).thenReturn(codec2);
when(mapping.codec(C3, DataTypes.TEXT, GenericType.of(String.class))).thenReturn(codec2);
when(row.get(C1.asIdentifier(), codec1)).thenReturn(42);
when(row.get(C2.asIdentifier(), codec2)).thenReturn("foo");
when(row.get(C3.asIdentifier(), codec2)).thenReturn("bar");
// to generate locations
BoundStatement boundStatement = mock(BoundStatement.class);
PreparedStatement ps = mock(PreparedStatement.class);
when(result.getStatement()).then(args -> boundStatement);
when(boundStatement.getPreparedStatement()).thenReturn(ps);
when(ps.getQuery()).thenReturn("irrelevant");
ColumnDefinition start = mockColumnDefinition("start", DataTypes.BIGINT);
ColumnDefinition end = mockColumnDefinition("end", DataTypes.BIGINT);
ColumnDefinitions boundVariables = mockColumnDefinitions(start, end);
when(ps.getVariableDefinitions()).thenReturn(boundVariables);
when(row.getObject(C1.asIdentifier())).thenReturn(42);
when(row.getObject(C2.asIdentifier())).thenReturn("foo");
when(row.getObject(C3.asIdentifier())).thenReturn("bar");
when(boundStatement.getObject(CqlIdentifier.fromInternal("start"))).thenReturn(1234L);
when(boundStatement.getObject(CqlIdentifier.fromInternal("end"))).thenReturn(5678L);
}
use of com.datastax.oss.dsbulk.mapping.Mapping in project dsbulk by datastax.
the class SchemaSettings method inferBatchInsertQuery.
private String inferBatchInsertQuery(ImmutableMultimap<MappingField, CQLFragment> fieldsToVariables) {
List<CQLWord> pks = primaryKeyColumns();
Set<CQLFragment> allSpecificVariables = new LinkedHashSet<>();
Map<CQLWord, WriteTimeAndTTL> specificWriteTimesAndTTLs = new LinkedHashMap<>();
boolean hasGlobalWritetime = false;
boolean hasGlobalTTL = false;
for (CQLFragment variable : fieldsToVariables.values()) {
if (variable instanceof FunctionCall) {
FunctionCall functionCall = (FunctionCall) variable;
if (functionCall.getFunctionName().equals(WRITETIME)) {
for (CQLFragment arg : functionCall.getArgs()) {
if (arg.equals(STAR)) {
if (preserveTimestamp) {
throw new IllegalStateException("Invalid mapping: writetime(*) is not allowed when schema.preserveTimestamp is true.");
}
hasGlobalWritetime = true;
} else {
CQLWord col = (CQLWord) arg;
if (pks.contains(col)) {
throw new IllegalStateException("Invalid mapping: writetime() function arg must be either '*' or a non-primary key column name.");
}
if (fieldsToVariables.containsValue(col)) {
allSpecificVariables.add(col);
allSpecificVariables.add(functionCall);
specificWriteTimesAndTTLs.compute(col, (k, v) -> {
if (v == null) {
v = new WriteTimeAndTTL();
MappingField colField = fieldsToVariables.inverse().get(col).iterator().next();
v.value = colField instanceof CQLFragment ? (CQLFragment) colField : col;
}
MappingField writetimeField = fieldsToVariables.inverse().get(functionCall).iterator().next();
v.writetime = writetimeField instanceof CQLLiteral ? (CQLLiteral) writetimeField : CQLWord.fromInternal(functionCall.render(INTERNAL));
return v;
});
} else {
throw new IllegalStateException(String.format("Invalid mapping: target column %s must be present if %s is also present.", col.render(VARIABLE), functionCall.render(INTERNAL)));
}
}
}
} else if (functionCall.getFunctionName().equals(TTL)) {
for (CQLFragment arg : functionCall.getArgs()) {
if (arg.equals(STAR)) {
if (preserveTtl) {
throw new IllegalStateException("Invalid mapping: ttl(*) is not allowed when schema.preserveTtl is true.");
}
hasGlobalTTL = true;
} else {
CQLWord col = (CQLWord) arg;
if (pks.contains(col)) {
throw new IllegalStateException("Invalid mapping: ttl() function arg must be either '*' or a non-primary key column name.");
}
if (fieldsToVariables.containsValue(col)) {
allSpecificVariables.add(col);
allSpecificVariables.add(functionCall);
specificWriteTimesAndTTLs.compute((CQLWord) arg, (k, v) -> {
if (v == null) {
v = new WriteTimeAndTTL();
MappingField colField = fieldsToVariables.inverse().get(col).iterator().next();
v.value = colField instanceof CQLFragment ? (CQLFragment) colField : col;
}
MappingField ttlField = fieldsToVariables.inverse().get(functionCall).iterator().next();
v.ttl = ttlField instanceof CQLLiteral ? (CQLLiteral) ttlField : CQLWord.fromInternal(functionCall.render(INTERNAL));
return v;
});
} else {
throw new IllegalStateException(String.format("Invalid mapping: target column %s must be present if %s is also present.", col.render(VARIABLE), functionCall.render(INTERNAL)));
}
}
}
}
}
}
ImmutableMultimap.Builder<MappingField, CQLFragment> defaultFieldsToVariablesBuilder = ImmutableMultimap.builder();
for (Entry<MappingField, CQLFragment> entry : fieldsToVariables.entries()) {
CQLFragment variable = entry.getValue();
if (!allSpecificVariables.contains(variable)) {
defaultFieldsToVariablesBuilder.put(entry);
}
}
ImmutableMultimap<MappingField, CQLFragment> defaultFieldsToVariables = defaultFieldsToVariablesBuilder.build();
boolean hasRegularColumnsWithoutSpecificWritetimeAndTTL = defaultFieldsToVariables.values().stream().filter(CQLWord.class::isInstance).map(CQLWord.class::cast).anyMatch(variable -> !pks.contains(variable));
if (!hasRegularColumnsWithoutSpecificWritetimeAndTTL) {
if (hasGlobalWritetime) {
throw new IllegalStateException("Invalid mapping: writetime(*) function has no target column.");
}
if (hasGlobalTTL) {
throw new IllegalStateException("Invalid mapping: ttl(*) function has no target column.");
}
}
StringBuilder sb = new StringBuilder();
if (!hasRegularColumnsWithoutSpecificWritetimeAndTTL && specificWriteTimesAndTTLs.size() == 1) {
// edge case: there is only one regular column in the table,
// and it has specific writetime or tll: no need for a BATCH as there is only one child
// statement.
Entry<CQLWord, WriteTimeAndTTL> entry = specificWriteTimesAndTTLs.entrySet().iterator().next();
appendBatchChildQuery(sb, entry.getKey(), entry.getValue().value, entry.getValue().writetime, entry.getValue().ttl, pks);
} else {
sb.append("BEGIN UNLOGGED BATCH ");
// generate a first INSERT INTO child query similar to the ones generated for simple INSERTs.
if (hasRegularColumnsWithoutSpecificWritetimeAndTTL) {
sb.append(inferInsertQuery(defaultFieldsToVariables)).append("; ");
}
// generate a specific INSERT INTO query for that variable only + its TTL and/or writetime.
for (Entry<CQLWord, WriteTimeAndTTL> entry : specificWriteTimesAndTTLs.entrySet()) {
appendBatchChildQuery(sb, entry.getKey(), entry.getValue().value, entry.getValue().writetime, entry.getValue().ttl, pks);
sb.append("; ");
}
sb.append("APPLY BATCH");
}
return sb.toString();
}
use of com.datastax.oss.dsbulk.mapping.Mapping in project dsbulk by datastax.
the class SchemaSettings method createRecordMapper.
public RecordMapper createRecordMapper(CqlSession session, RecordMetadata recordMetadata, boolean batchingEnabled) throws IllegalArgumentException {
if (!schemaGenerationStrategy.isWriting() || !schemaGenerationStrategy.isMapping()) {
throw new IllegalStateException("Cannot create record mapper when schema generation strategy is " + schemaGenerationStrategy);
}
Mapping mapping = prepareStatementAndCreateMapping(session, batchingEnabled, EnumSet.noneOf(StatisticsMode.class));
ProtocolVersion protocolVersion = session.getContext().getProtocolVersion();
if (protocolVersion.getCode() < DefaultProtocolVersion.V4.getCode() && nullToUnset) {
LOGGER.warn(String.format("Protocol version in use (%s) does not support unset bound variables; " + "forcing schema.nullToUnset to false", protocolVersion));
nullToUnset = false;
}
return new DefaultRecordMapper(preparedStatements, partitionKeyVariables(), mutatesOnlyStaticColumns() ? Collections.emptySet() : clusteringColumnVariables(), protocolVersion, mapping, recordMetadata, nullToUnset, allowExtraFields, allowMissingFields);
}
use of com.datastax.oss.dsbulk.mapping.Mapping in project dsbulk by datastax.
the class SchemaSettings method prepareStatementAndCreateMapping.
@NonNull
private Mapping prepareStatementAndCreateMapping(CqlSession session, boolean batchingEnabled, EnumSet<StatisticsMode> modes) {
ImmutableMultimap<MappingField, CQLFragment> fieldsToVariables = null;
if (!config.hasPath(QUERY)) {
// in the absence of user-provided queries, create the mapping *before* query generation and
// preparation
List<CQLFragment> columns = table.getColumns().values().stream().filter(col -> !isDSESearchPseudoColumn(col)).flatMap(column -> {
CQLWord colName = CQLWord.fromCqlIdentifier(column.getName());
List<CQLFragment> cols = Lists.newArrayList(colName);
if (schemaGenerationStrategy.isMapping()) {
if (preserveTimestamp && checkWritetimeTtlSupported(column, WRITETIME)) {
cols.add(new FunctionCall(null, WRITETIME, colName));
}
if (preserveTtl && checkWritetimeTtlSupported(column, TTL)) {
cols.add(new FunctionCall(null, TTL, colName));
}
}
return cols.stream();
}).collect(Collectors.toList());
fieldsToVariables = createFieldsToVariablesMap(columns);
// query generation
if (schemaGenerationStrategy.isWriting()) {
if (isCounterTable()) {
query = inferUpdateCounterQuery(fieldsToVariables);
} else if (requiresBatchInsertQuery(fieldsToVariables)) {
query = inferBatchInsertQuery(fieldsToVariables);
} else {
query = inferInsertQuery(fieldsToVariables);
}
} else if (schemaGenerationStrategy.isReading() && schemaGenerationStrategy.isMapping()) {
query = inferReadQuery(fieldsToVariables);
} else if (schemaGenerationStrategy.isReading() && schemaGenerationStrategy.isCounting()) {
query = inferCountQuery(modes);
} else {
throw new IllegalStateException("Unsupported schema generation strategy: " + schemaGenerationStrategy);
}
LOGGER.debug("Inferred query: {}", query);
queryInspector = new QueryInspector(query);
// validate generated query
if (schemaGenerationStrategy.isWriting()) {
validatePrimaryKeyPresent(fieldsToVariables);
}
}
assert query != null;
assert queryInspector != null;
if (!queryInspector.getKeyspaceName().isPresent()) {
session.execute("USE " + keyspaceName);
}
// Transform user-provided queries before preparation
if (config.hasPath(QUERY)) {
if (schemaGenerationStrategy.isReading() && queryInspector.isParallelizable()) {
int whereClauseIndex = queryInspector.getFromClauseEndIndex() + 1;
StringBuilder sb = new StringBuilder(query.substring(0, whereClauseIndex));
appendTokenRangeRestriction(sb);
query = sb.append(query.substring(whereClauseIndex)).toString();
}
if (schemaGenerationStrategy.isCounting()) {
if (modes.contains(StatisticsMode.partitions) || modes.contains(StatisticsMode.ranges) || modes.contains(StatisticsMode.hosts)) {
throw new IllegalArgumentException(String.format("Cannot count with stats.modes = %s when schema.query is provided; " + "only stats.modes = [global] is allowed", modes));
}
// reduce row size by only selecting one column
StringBuilder sb = new StringBuilder("SELECT ");
sb.append(getGlobalCountSelector());
query = sb.append(' ').append(query.substring(queryInspector.getFromClauseStartIndex())).toString();
}
queryInspector = new QueryInspector(query);
}
if (batchingEnabled && queryInspector.isBatch()) {
preparedStatements = unwrapAndPrepareBatchChildStatements(session);
} else {
preparedStatements = Collections.singletonList(session.prepare(query));
}
if (config.hasPath(QUERY)) {
// in the presence of user-provided queries, create the mapping *after* query preparation
Stream<ColumnDefinitions> variables = getVariables();
fieldsToVariables = createFieldsToVariablesMap(variables.flatMap(defs -> StreamSupport.stream(defs.spliterator(), false)).map(def -> def.getName().asInternal()).map(CQLWord::fromInternal).collect(Collectors.toList()));
// validate user-provided query
if (schemaGenerationStrategy.isWriting()) {
if (mutatesOnlyStaticColumns()) {
// DAT-414: mutations that only affect static columns are allowed
// to skip the clustering columns, only the partition key should be present.
validatePartitionKeyPresent(fieldsToVariables);
} else {
validatePrimaryKeyPresent(fieldsToVariables);
}
}
}
assert fieldsToVariables != null;
return new DefaultMapping(transformFieldsToVariables(fieldsToVariables), codecFactory, transformWriteTimeVariables(queryInspector.getWriteTimeVariables()));
}
Aggregations