use of com.datastax.oss.dsbulk.mapping.CQLFragment in project dsbulk by datastax.
the class SchemaSettings method inferInsertQuery.
private String inferInsertQuery(ImmutableMultimap<MappingField, CQLFragment> fieldsToVariables) {
ImmutableMultimap.Builder<MappingField, CQLFragment> regularFieldsToVariablesBuilder = ImmutableMultimap.builder();
CQLFragment writetime = null;
CQLFragment ttl = null;
for (Entry<MappingField, CQLFragment> entry : fieldsToVariables.entries()) {
if (entry.getValue() instanceof FunctionCall) {
FunctionCall functionCall = (FunctionCall) entry.getValue();
if (functionCall.getFunctionName().equals(WRITETIME)) {
assert writetime == null;
if (entry.getKey() instanceof CQLLiteral) {
writetime = (CQLLiteral) entry.getKey();
} else {
writetime = CQLWord.fromInternal(functionCall.render(INTERNAL));
}
} else if (functionCall.getFunctionName().equals(TTL)) {
assert ttl == null;
if (entry.getKey() instanceof CQLLiteral) {
ttl = (CQLLiteral) entry.getKey();
} else {
ttl = CQLWord.fromInternal(functionCall.render(INTERNAL));
}
}
} else {
regularFieldsToVariablesBuilder.put(entry);
}
}
ImmutableMultimap<MappingField, CQLFragment> regularFieldsToVariables = regularFieldsToVariablesBuilder.build();
StringBuilder sb = new StringBuilder("INSERT INTO ");
sb.append(keyspaceName.render(VARIABLE)).append('.').append(tableName.render(VARIABLE)).append(" (");
appendColumnNames(regularFieldsToVariables, sb, VARIABLE);
sb.append(") VALUES (");
Set<CQLFragment> cols = maybeSortCols(regularFieldsToVariables);
Iterator<CQLFragment> it = cols.iterator();
while (it.hasNext()) {
CQLFragment col = it.next();
// for insert queries there can be only one field mapped to a given column
MappingField field = fieldsToVariables.inverse().get(col).iterator().next();
if (field instanceof CQLFragment) {
sb.append(((CQLFragment) field).render(NAMED_ASSIGNMENT));
} else {
sb.append(col.render(NAMED_ASSIGNMENT));
}
if (it.hasNext()) {
sb.append(", ");
}
}
sb.append(')');
appendWriteTimeAndTTL(sb, writetime, ttl);
return sb.toString();
}
use of com.datastax.oss.dsbulk.mapping.CQLFragment in project dsbulk by datastax.
the class SchemaSettings method init.
public void init(CqlSession session, ConvertingCodecFactory codecFactory, boolean indexedMappingSupported, boolean mappedMappingSupported) {
this.codecFactory = codecFactory;
try {
if (config.hasPath(KEYSPACE) && config.hasPath(GRAPH)) {
throw new IllegalArgumentException("Settings schema.keyspace and schema.graph are mutually exclusive");
}
if (config.hasPath(TABLE) && config.hasPath(VERTEX)) {
throw new IllegalArgumentException("Settings schema.table and schema.vertex are mutually exclusive");
}
if (config.hasPath(TABLE) && config.hasPath(EDGE)) {
throw new IllegalArgumentException("Settings schema.table and schema.edge are mutually exclusive");
}
if (config.hasPath(VERTEX) && config.hasPath(EDGE)) {
throw new IllegalArgumentException("Settings schema.vertex and schema.edge are mutually exclusive");
}
if (config.hasPath(EDGE)) {
if (!config.hasPath(FROM)) {
throw new IllegalArgumentException("Setting schema.from is required when schema.edge is specified");
}
if (!config.hasPath(TO)) {
throw new IllegalArgumentException("Setting schema.to is required when schema.edge is specified");
}
}
if (config.hasPath(QUERY) && (config.hasPath(TABLE) || config.hasPath(VERTEX) || config.hasPath(EDGE))) {
throw new IllegalArgumentException("Setting schema.query must not be defined if schema.table, schema.vertex or schema.edge are defined");
}
if ((!config.hasPath(KEYSPACE) && !config.hasPath(GRAPH)) && (config.hasPath(TABLE) || config.hasPath(VERTEX) || config.hasPath(EDGE))) {
throw new IllegalArgumentException("Settings schema.keyspace or schema.graph must be defined if schema.table, schema.vertex or schema.edge are defined");
}
if (config.hasPath(KEYSPACE)) {
keyspace = locateKeyspace(session.getMetadata(), config.getString(KEYSPACE));
} else if (config.hasPath(GRAPH)) {
keyspace = locateKeyspace(session.getMetadata(), config.getString(GRAPH));
}
if (keyspace != null) {
if (config.hasPath(TABLE)) {
table = locateTable(keyspace, config.getString(TABLE));
} else if (config.hasPath(VERTEX)) {
table = locateVertexTable(keyspace, config.getString(VERTEX));
} else if (config.hasPath(EDGE)) {
table = locateEdgeTable(keyspace, config.getString(EDGE), config.getString(FROM), config.getString(TO));
}
}
// Timestamp and TTL
ttlSeconds = config.getInt(QUERY_TTL);
if (config.hasPath(QUERY_TIMESTAMP)) {
String timestampStr = config.getString(QUERY_TIMESTAMP);
try {
ConvertingCodec<String, Instant> codec = codecFactory.createConvertingCodec(DataTypes.TIMESTAMP, GenericType.STRING, true);
Instant instant = codec.externalToInternal(timestampStr);
this.timestampMicros = instantToNumber(instant, MICROSECONDS, EPOCH);
} catch (Exception e) {
Object format = codecFactory.getContext().getAttribute(TIMESTAMP_PATTERN);
throw new IllegalArgumentException(String.format("Expecting schema.queryTimestamp to be in %s format but got '%s'", format, timestampStr));
}
} else {
this.timestampMicros = -1L;
}
preserveTimestamp = config.getBoolean(PRESERVE_TIMESTAMP);
preserveTtl = config.getBoolean(PRESERVE_TTL);
if (config.hasPath(QUERY)) {
query = config.getString(QUERY);
queryInspector = new QueryInspector(query);
if (queryInspector.getKeyspaceName().isPresent()) {
if (keyspace != null) {
throw new IllegalArgumentException("Setting schema.keyspace must not be provided when schema.query contains a keyspace-qualified statement");
}
CQLWord keyspaceName = queryInspector.getKeyspaceName().get();
keyspace = session.getMetadata().getKeyspace(keyspaceName.asIdentifier()).orElse(null);
if (keyspace == null) {
throw new IllegalArgumentException(String.format("Value for schema.query references a non-existent keyspace: %s", keyspaceName.render(VARIABLE)));
}
} else if (keyspace == null) {
throw new IllegalArgumentException("Setting schema.keyspace must be provided when schema.query does not contain a keyspace-qualified statement");
}
CQLWord tableName = queryInspector.getTableName();
table = keyspace.getTable(tableName.asIdentifier()).orElse(null);
if (table == null) {
table = keyspace.getView(tableName.asIdentifier()).orElse(null);
if (table == null) {
throw new IllegalArgumentException(String.format("Value for schema.query references a non-existent table or materialized view: %s", tableName.render(VARIABLE)));
}
}
// If a query is provided, ttl and timestamp must not be.
if (timestampMicros != -1 || ttlSeconds != -1) {
throw new IllegalArgumentException("Setting schema.query must not be defined if schema.queryTtl or schema.queryTimestamp is defined");
}
if (preserveTimestamp || preserveTtl) {
throw new IllegalArgumentException("Setting schema.query must not be defined if schema.preserveTimestamp or schema.preserveTtl is defined");
}
} else {
if (keyspace == null || table == null) {
// Either the keyspace and table must be present, or the query must be present.
throw new IllegalArgumentException("When schema.query is not defined, " + "then either schema.keyspace or schema.graph must be defined, " + "and either schema.table, schema.vertex or schema.edge must be defined");
}
}
assert keyspace != null;
assert table != null;
keyspaceName = CQLWord.fromCqlIdentifier(keyspace.getName());
tableName = CQLWord.fromCqlIdentifier(table.getName());
if (indexedMappingSupported && mappedMappingSupported) {
mappingPreference = MAPPED_OR_INDEXED;
} else if (indexedMappingSupported) {
mappingPreference = INDEXED_ONLY;
} else if (mappedMappingSupported) {
mappingPreference = MAPPED_ONLY;
} else if (schemaGenerationStrategy.isMapping()) {
throw new IllegalArgumentException("Connector must support at least one of indexed or mapped mappings");
}
if (config.hasPath(MAPPING)) {
if (!schemaGenerationStrategy.isMapping()) {
throw new IllegalArgumentException("Setting schema.mapping must not be defined when counting rows in a table");
}
Supplier<CQLWord> usingTimestampVariable = null;
Supplier<CQLWord> usingTTLVariable = null;
if (queryInspector != null) {
usingTimestampVariable = queryInspector.getUsingTimestampVariable()::get;
usingTTLVariable = queryInspector.getUsingTTLVariable()::get;
}
// TODO remove support for providing external variable names for the deprecated
// __ttl and __timestamp mapping tokens.
@SuppressWarnings("deprecation") MappingInspector mapping = new MappingInspector(config.getString(MAPPING), schemaGenerationStrategy.isWriting(), mappingPreference, usingTimestampVariable, usingTTLVariable);
this.mapping = mapping;
Set<MappingField> fields = mapping.getExplicitMappings().keySet();
Collection<CQLFragment> variables = mapping.getExplicitMappings().values();
if (schemaGenerationStrategy.isWriting()) {
// now() = c1 only allowed if schema.query not present
if (containsFunctionCalls(variables, WRITETIME_OR_TTL.negate())) {
throw new IllegalArgumentException("Misplaced function call detected on the right side of a mapping entry; " + "please review your schema.mapping setting");
}
if (query != null && containsFunctionCalls(variables, WRITETIME_OR_TTL)) {
throw new IllegalArgumentException("Setting schema.query must not be defined when loading if schema.mapping " + "contains a writetime or ttl function on the right side of a mapping entry");
}
if (query != null && containsFunctionCalls(fields)) {
throw new IllegalArgumentException("Setting schema.query must not be defined when loading if schema.mapping " + "contains a function on the left side of a mapping entry");
}
if (containsWritetimeOrTTLFunctionCalls(mapping.getExplicitMappings())) {
throw new IllegalArgumentException("Misplaced function call detected on the left side of a writetime or TTL mapping entry; " + "please review your schema.mapping setting");
}
// (text)'abc' = c1 only allowed if schema.query not present
if (containsConstantExpressions(variables)) {
throw new IllegalArgumentException("Misplaced constant expression detected on the right side of a mapping entry; " + "please review your schema.mapping setting");
}
if (query != null && containsConstantExpressions(fields)) {
throw new IllegalArgumentException("Setting schema.query must not be defined when loading if schema.mapping " + "contains a constant expression on the left side of a mapping entry");
}
}
if (schemaGenerationStrategy.isReading()) {
// f1 = now() only allowed if schema.query not present
if (containsFunctionCalls(fields)) {
throw new IllegalArgumentException("Misplaced function call detected on the left side of a mapping entry; " + "please review your schema.mapping setting");
}
if (query != null && containsFunctionCalls(variables)) {
throw new IllegalArgumentException("Setting schema.query must not be defined when unloading if schema.mapping " + "contains a function on the right side of a mapping entry");
}
// supported
if (containsConstantExpressions(fields)) {
throw new IllegalArgumentException("Misplaced constant expression detected on the left side of a mapping entry; " + "please review your schema.mapping setting");
}
if (containsConstantExpressions(variables)) {
if (query != null) {
throw new IllegalArgumentException("Setting schema.query must not be defined when unloading if schema.mapping " + "contains a constant expression on the right side of a mapping entry");
}
if (!checkLiteralSelectorsSupported(session)) {
throw new IllegalStateException("At least one constant expression appears on the right side of a mapping entry, " + "but the cluster does not support CQL literals in the SELECT clause; " + " please review your schema.mapping setting");
}
}
}
if ((preserveTimestamp || preserveTtl) && !mapping.isInferring()) {
throw new IllegalStateException("Setting schema.mapping must contain an inferring entry (e.g. '*=*') " + "when schema.preserveTimestamp or schema.preserveTtl is enabled");
}
} else {
mapping = new MappingInspector("*=*", schemaGenerationStrategy.isWriting(), mappingPreference);
}
// Misc
nullToUnset = config.getBoolean(NULL_TO_UNSET);
allowExtraFields = config.getBoolean(ALLOW_EXTRA_FIELDS);
allowMissingFields = config.getBoolean(ALLOW_MISSING_FIELDS);
splits = ConfigUtils.getThreads(config, SPLITS);
if (hasGraphOptions(config)) {
GraphUtils.checkGraphCompatibility(session);
if (!isGraph(keyspace)) {
throw new IllegalStateException("Graph operations requested but provided keyspace is not a graph: " + keyspaceName);
}
if (!isSupportedGraph(keyspace)) {
assert ((DseGraphKeyspaceMetadata) keyspace).getGraphEngine().isPresent();
throw new IllegalStateException(String.format("Graph operations requested but provided graph %s was created with an unsupported graph engine: %s", keyspaceName, ((DseGraphKeyspaceMetadata) keyspace).getGraphEngine().get()));
}
} else if (isGraph(keyspace)) {
if (isSupportedGraph(keyspace)) {
if (config.hasPath(KEYSPACE) || config.hasPath(TABLE)) {
LOGGER.warn("Provided keyspace is a graph; " + "instead of schema.keyspace and schema.table, please use graph-specific options " + "such as schema.graph, schema.vertex, schema.edge, schema.from and schema.to.");
}
} else {
if (schemaGenerationStrategy == SchemaGenerationStrategy.MAP_AND_WRITE) {
LOGGER.warn("Provided keyspace is a graph created with a legacy graph engine: " + ((DseGraphKeyspaceMetadata) keyspace).getGraphEngine().get() + "; attempting to load data into such a keyspace is not supported and " + "may put the graph in an inconsistent state.");
}
}
}
} catch (ConfigException e) {
throw ConfigUtils.convertConfigException(e, "dsbulk.schema");
}
}
use of com.datastax.oss.dsbulk.mapping.CQLFragment in project dsbulk by datastax.
the class SchemaSettings method prepareStatementAndCreateMapping.
@NonNull
private Mapping prepareStatementAndCreateMapping(CqlSession session, boolean batchingEnabled, EnumSet<StatisticsMode> modes) {
ImmutableMultimap<MappingField, CQLFragment> fieldsToVariables = null;
if (!config.hasPath(QUERY)) {
// in the absence of user-provided queries, create the mapping *before* query generation and
// preparation
List<CQLFragment> columns = table.getColumns().values().stream().filter(col -> !isDSESearchPseudoColumn(col)).flatMap(column -> {
CQLWord colName = CQLWord.fromCqlIdentifier(column.getName());
List<CQLFragment> cols = Lists.newArrayList(colName);
if (schemaGenerationStrategy.isMapping()) {
if (preserveTimestamp && checkWritetimeTtlSupported(column, WRITETIME)) {
cols.add(new FunctionCall(null, WRITETIME, colName));
}
if (preserveTtl && checkWritetimeTtlSupported(column, TTL)) {
cols.add(new FunctionCall(null, TTL, colName));
}
}
return cols.stream();
}).collect(Collectors.toList());
fieldsToVariables = createFieldsToVariablesMap(columns);
// query generation
if (schemaGenerationStrategy.isWriting()) {
if (isCounterTable()) {
query = inferUpdateCounterQuery(fieldsToVariables);
} else if (requiresBatchInsertQuery(fieldsToVariables)) {
query = inferBatchInsertQuery(fieldsToVariables);
} else {
query = inferInsertQuery(fieldsToVariables);
}
} else if (schemaGenerationStrategy.isReading() && schemaGenerationStrategy.isMapping()) {
query = inferReadQuery(fieldsToVariables);
} else if (schemaGenerationStrategy.isReading() && schemaGenerationStrategy.isCounting()) {
query = inferCountQuery(modes);
} else {
throw new IllegalStateException("Unsupported schema generation strategy: " + schemaGenerationStrategy);
}
LOGGER.debug("Inferred query: {}", query);
queryInspector = new QueryInspector(query);
// validate generated query
if (schemaGenerationStrategy.isWriting()) {
validatePrimaryKeyPresent(fieldsToVariables);
}
}
assert query != null;
assert queryInspector != null;
if (!queryInspector.getKeyspaceName().isPresent()) {
session.execute("USE " + keyspaceName);
}
// Transform user-provided queries before preparation
if (config.hasPath(QUERY)) {
if (schemaGenerationStrategy.isReading() && queryInspector.isParallelizable()) {
int whereClauseIndex = queryInspector.getFromClauseEndIndex() + 1;
StringBuilder sb = new StringBuilder(query.substring(0, whereClauseIndex));
appendTokenRangeRestriction(sb);
query = sb.append(query.substring(whereClauseIndex)).toString();
}
if (schemaGenerationStrategy.isCounting()) {
if (modes.contains(StatisticsMode.partitions) || modes.contains(StatisticsMode.ranges) || modes.contains(StatisticsMode.hosts)) {
throw new IllegalArgumentException(String.format("Cannot count with stats.modes = %s when schema.query is provided; " + "only stats.modes = [global] is allowed", modes));
}
// reduce row size by only selecting one column
StringBuilder sb = new StringBuilder("SELECT ");
sb.append(getGlobalCountSelector());
query = sb.append(' ').append(query.substring(queryInspector.getFromClauseStartIndex())).toString();
}
queryInspector = new QueryInspector(query);
}
if (batchingEnabled && queryInspector.isBatch()) {
preparedStatements = unwrapAndPrepareBatchChildStatements(session);
} else {
preparedStatements = Collections.singletonList(session.prepare(query));
}
if (config.hasPath(QUERY)) {
// in the presence of user-provided queries, create the mapping *after* query preparation
Stream<ColumnDefinitions> variables = getVariables();
fieldsToVariables = createFieldsToVariablesMap(variables.flatMap(defs -> StreamSupport.stream(defs.spliterator(), false)).map(def -> def.getName().asInternal()).map(CQLWord::fromInternal).collect(Collectors.toList()));
// validate user-provided query
if (schemaGenerationStrategy.isWriting()) {
if (mutatesOnlyStaticColumns()) {
// DAT-414: mutations that only affect static columns are allowed
// to skip the clustering columns, only the partition key should be present.
validatePartitionKeyPresent(fieldsToVariables);
} else {
validatePrimaryKeyPresent(fieldsToVariables);
}
}
}
assert fieldsToVariables != null;
return new DefaultMapping(transformFieldsToVariables(fieldsToVariables), codecFactory, transformWriteTimeVariables(queryInspector.getWriteTimeVariables()));
}
use of com.datastax.oss.dsbulk.mapping.CQLFragment in project dsbulk by datastax.
the class SchemaSettings method appendColumnNames.
private void appendColumnNames(ImmutableMultimap<MappingField, CQLFragment> fieldsToVariables, StringBuilder sb, CQLRenderMode mode) {
// de-dup in case the mapping has both indexed and mapped entries
// for the same bound variable
Set<CQLFragment> cols = maybeSortCols(fieldsToVariables);
Iterator<CQLFragment> it = cols.iterator();
while (it.hasNext()) {
// this assumes that the variable name found in the mapping
// corresponds to a CQL column having the exact same name.
CQLFragment col = it.next();
sb.append(col.render(mode));
if (it.hasNext()) {
sb.append(", ");
}
}
}
use of com.datastax.oss.dsbulk.mapping.CQLFragment in project dsbulk by datastax.
the class SchemaSettings method validateKeyPresent.
private void validateKeyPresent(ImmutableMultimap<MappingField, CQLFragment> fieldsToVariables, List<ColumnMetadata> columns) {
Collection<CQLFragment> mappingVariables = fieldsToVariables.values();
Map<CQLWord, CQLFragment> queryVariables = queryInspector.getAssignments();
for (ColumnMetadata pk : columns) {
CQLWord pkVariable = CQLWord.fromCqlIdentifier(pk.getName());
CQLFragment queryVariable = queryVariables.get(pkVariable);
// the provided query did not contain such column
if (queryVariable == null) {
throw new IllegalArgumentException("Missing required primary key column " + pkVariable.render(VARIABLE) + " from schema.mapping or schema.query");
}
// if the PK is mapped to a function or a literal in the query (DAT-326)
if (queryVariable instanceof CQLWord) {
// the mapping did not contain such column
if (!mappingVariables.contains(queryVariable)) {
throw new IllegalArgumentException("Missing required primary key column " + pkVariable.render(VARIABLE) + " from schema.mapping");
}
}
}
}
Aggregations