use of org.apache.asterix.metadata.entities.Index in project asterixdb by apache.
the class DatasetDataSource method buildDatasourceScanRuntime.
@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildDatasourceScanRuntime(MetadataProvider metadataProvider, IDataSource<DataSourceId> dataSource, List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables, boolean projectPushed, List<LogicalVariable> minFilterVars, List<LogicalVariable> maxFilterVars, IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context, JobSpecification jobSpec, Object implConfig) throws AlgebricksException {
switch(dataset.getDatasetType()) {
case EXTERNAL:
Dataset externalDataset = ((DatasetDataSource) dataSource).getDataset();
String itemTypeName = externalDataset.getItemTypeName();
IAType itemType = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), externalDataset.getItemTypeDataverseName(), itemTypeName).getDatatype();
ExternalDatasetDetails edd = (ExternalDatasetDetails) externalDataset.getDatasetDetails();
IAdapterFactory adapterFactory = metadataProvider.getConfiguredAdapterFactory(externalDataset, edd.getAdapter(), edd.getProperties(), (ARecordType) itemType, null);
return metadataProvider.buildExternalDatasetDataScannerRuntime(jobSpec, itemType, adapterFactory, NonTaggedDataFormat.INSTANCE);
case INTERNAL:
DataSourceId id = getId();
String dataverseName = id.getDataverseName();
String datasetName = id.getDatasourceName();
Index primaryIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, datasetName);
int[] minFilterFieldIndexes = createFilterIndexes(minFilterVars, opSchema);
int[] maxFilterFieldIndexes = createFilterIndexes(maxFilterVars, opSchema);
return metadataProvider.buildBtreeRuntime(jobSpec, opSchema, typeEnv, context, true, false, ((DatasetDataSource) dataSource).getDataset(), primaryIndex.getIndexName(), null, null, true, true, minFilterFieldIndexes, maxFilterFieldIndexes);
default:
throw new AlgebricksException("Unknown datasource type");
}
}
use of org.apache.asterix.metadata.entities.Index in project asterixdb by apache.
the class IntroduceSecondaryIndexInsertDeleteRule method rewritePost.
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
AbstractLogicalOperator op0 = (AbstractLogicalOperator) opRef.getValue();
if (op0.getOperatorTag() != LogicalOperatorTag.DELEGATE_OPERATOR && op0.getOperatorTag() != LogicalOperatorTag.SINK) {
return false;
}
if (op0.getOperatorTag() == LogicalOperatorTag.DELEGATE_OPERATOR) {
DelegateOperator eOp = (DelegateOperator) op0;
if (!(eOp.getDelegate() instanceof CommitOperator)) {
return false;
}
}
AbstractLogicalOperator op1 = (AbstractLogicalOperator) op0.getInputs().get(0).getValue();
if (op1.getOperatorTag() != LogicalOperatorTag.INSERT_DELETE_UPSERT) {
return false;
}
/** find the record variable */
InsertDeleteUpsertOperator primaryIndexModificationOp = (InsertDeleteUpsertOperator) op0.getInputs().get(0).getValue();
boolean isBulkload = primaryIndexModificationOp.isBulkload();
ILogicalExpression newRecordExpr = primaryIndexModificationOp.getPayloadExpression().getValue();
List<Mutable<ILogicalExpression>> newMetaExprs = primaryIndexModificationOp.getAdditionalNonFilteringExpressions();
LogicalVariable newRecordVar;
LogicalVariable newMetaVar = null;
/**
* inputOp is the assign operator which extracts primary keys from the input
* variables (record or meta)
*/
AbstractLogicalOperator inputOp = (AbstractLogicalOperator) primaryIndexModificationOp.getInputs().get(0).getValue();
newRecordVar = getRecordVar(context, inputOp, newRecordExpr, 0);
if (newMetaExprs != null && !newMetaExprs.isEmpty()) {
if (newMetaExprs.size() > 1) {
throw new AlgebricksException("Number of meta records can't be more than 1. Number of meta records found = " + newMetaExprs.size());
}
newMetaVar = getRecordVar(context, inputOp, newMetaExprs.get(0).getValue(), 1);
}
/*
* At this point, we have the record variable and the insert/delete/upsert operator
* Note: We have two operators:
* 1. An InsertDeleteOperator (primary)
* 2. An IndexInsertDeleteOperator (secondary)
* The current primaryIndexModificationOp is of the first type
*/
DataSource datasetSource = (DataSource) primaryIndexModificationOp.getDataSource();
MetadataProvider mp = (MetadataProvider) context.getMetadataProvider();
String dataverseName = datasetSource.getId().getDataverseName();
String datasetName = datasetSource.getId().getDatasourceName();
Dataset dataset = mp.findDataset(dataverseName, datasetName);
if (dataset == null) {
throw new AlgebricksException("Unknown dataset " + datasetName + " in dataverse " + dataverseName);
}
if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
return false;
}
// Create operators for secondary index insert / delete.
String itemTypeName = dataset.getItemTypeName();
IAType itemType = mp.findType(dataset.getItemTypeDataverseName(), itemTypeName);
if (itemType.getTypeTag() != ATypeTag.OBJECT) {
throw new AlgebricksException("Only record types can be indexed.");
}
ARecordType recType = (ARecordType) itemType;
// meta type
ARecordType metaType = null;
if (dataset.hasMetaPart()) {
metaType = (ARecordType) mp.findType(dataset.getMetaItemTypeDataverseName(), dataset.getMetaItemTypeName());
}
List<Index> indexes = mp.getDatasetIndexes(dataset.getDataverseName(), dataset.getDatasetName());
// Set the top operator pointer to the primary IndexInsertDeleteOperator
ILogicalOperator currentTop = primaryIndexModificationOp;
boolean hasSecondaryIndex = false;
// Put an n-gram or a keyword index in the later stage of index-update,
// since TokenizeOperator needs to be involved.
Collections.sort(indexes, (o1, o2) -> o1.getIndexType().ordinal() - o2.getIndexType().ordinal());
// At this point, we have the data type info, and the indexes info as well
int secondaryIndexTotalCnt = indexes.size() - 1;
if (secondaryIndexTotalCnt > 0) {
op0.getInputs().clear();
} else {
return false;
}
// Initialize inputs to the SINK operator Op0 (The SINK) is now without input
// Prepare filtering field information (This is the filter created using the "filter with" key word in the
// create dataset ddl)
List<String> filteringFields = ((InternalDatasetDetails) dataset.getDatasetDetails()).getFilterField();
List<LogicalVariable> filteringVars;
List<Mutable<ILogicalExpression>> filteringExpressions = null;
if (filteringFields != null) {
// The filter field var already exists. we can simply get it from the insert op
filteringVars = new ArrayList<>();
filteringExpressions = new ArrayList<>();
for (Mutable<ILogicalExpression> filteringExpression : primaryIndexModificationOp.getAdditionalFilteringExpressions()) {
filteringExpression.getValue().getUsedVariables(filteringVars);
for (LogicalVariable var : filteringVars) {
filteringExpressions.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(var)));
}
}
}
// Replicate Operator is applied only when doing the bulk-load.
ReplicateOperator replicateOp = null;
if (secondaryIndexTotalCnt > 1 && primaryIndexModificationOp.isBulkload()) {
// Split the logical plan into "each secondary index update branch"
// to replicate each <PK,OBJECT> pair.
replicateOp = new ReplicateOperator(secondaryIndexTotalCnt);
replicateOp.getInputs().add(new MutableObject<ILogicalOperator>(currentTop));
replicateOp.setExecutionMode(ExecutionMode.PARTITIONED);
context.computeAndSetTypeEnvironmentForOperator(replicateOp);
currentTop = replicateOp;
}
/*
* The two maps are used to store variables to which [casted] field access is assigned.
* One for the beforeOp record and the other for the new record.
* There are two uses for these maps:
* 1. used for shared fields in indexes with overlapping keys.
* 2. used for setting variables of secondary keys for each secondary index operator.
*/
Map<IndexFieldId, LogicalVariable> fieldVarsForBeforeOperation = new HashMap<>();
Map<IndexFieldId, LogicalVariable> fieldVarsForNewRecord = new HashMap<>();
/*
* if the index is enforcing field types (For open indexes), We add a cast
* operator to ensure type safety
*/
try {
if (primaryIndexModificationOp.getOperation() == Kind.INSERT || primaryIndexModificationOp.getOperation() == Kind.UPSERT || /* Actually, delete should not be here but it is now until issue
* https://issues.apache.org/jira/browse/ASTERIXDB-1507
* is solved
*/
primaryIndexModificationOp.getOperation() == Kind.DELETE) {
injectFieldAccessesForIndexes(context, dataset, indexes, fieldVarsForNewRecord, recType, metaType, newRecordVar, newMetaVar, primaryIndexModificationOp, false);
if (replicateOp != null) {
context.computeAndSetTypeEnvironmentForOperator(replicateOp);
}
}
if (primaryIndexModificationOp.getOperation() == Kind.UPSERT) /* Actually, delete should be here but it is not until issue
* https://issues.apache.org/jira/browse/ASTERIXDB-1507
* is solved
*/
{
List<LogicalVariable> beforeOpMetaVars = primaryIndexModificationOp.getBeforeOpAdditionalNonFilteringVars();
LogicalVariable beforeOpMetaVar = beforeOpMetaVars == null ? null : beforeOpMetaVars.get(0);
currentTop = injectFieldAccessesForIndexes(context, dataset, indexes, fieldVarsForBeforeOperation, recType, metaType, primaryIndexModificationOp.getBeforeOpRecordVar(), beforeOpMetaVar, currentTop, true);
}
} catch (AsterixException e) {
throw new AlgebricksException(e);
}
// At first, op1 is the index insert op insertOp
for (Index index : indexes) {
if (!index.isSecondaryIndex()) {
continue;
}
hasSecondaryIndex = true;
// Get the secondary fields names and types
List<List<String>> secondaryKeyFields = index.getKeyFieldNames();
List<LogicalVariable> secondaryKeyVars = new ArrayList<>();
List<Mutable<ILogicalExpression>> secondaryExpressions = new ArrayList<>();
List<Mutable<ILogicalExpression>> beforeOpSecondaryExpressions = new ArrayList<>();
ILogicalOperator replicateOutput;
for (int i = 0; i < secondaryKeyFields.size(); i++) {
IndexFieldId indexFieldId = new IndexFieldId(index.getKeyFieldSourceIndicators().get(i), secondaryKeyFields.get(i));
LogicalVariable skVar = fieldVarsForNewRecord.get(indexFieldId);
secondaryKeyVars.add(skVar);
secondaryExpressions.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(skVar)));
if (primaryIndexModificationOp.getOperation() == Kind.UPSERT) {
beforeOpSecondaryExpressions.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(fieldVarsForBeforeOperation.get(indexFieldId))));
}
}
IndexInsertDeleteUpsertOperator indexUpdate;
if (index.getIndexType() != IndexType.RTREE) {
// Create an expression per key
Mutable<ILogicalExpression> filterExpression = (primaryIndexModificationOp.getOperation() == Kind.UPSERT) ? null : createFilterExpression(secondaryKeyVars, context.getOutputTypeEnvironment(currentTop), index.isEnforcingKeyFileds());
DataSourceIndex dataSourceIndex = new DataSourceIndex(index, dataverseName, datasetName, mp);
// and index type is keyword or n-gram.
if (index.getIndexType() != IndexType.BTREE && primaryIndexModificationOp.isBulkload()) {
// Note: Bulk load case, we don't need to take care of it for upsert operation
// Check whether the index is length-partitioned or not.
// If partitioned, [input variables to TokenizeOperator,
// token, number of token] pairs will be generated and
// fed into the IndexInsertDeleteOperator.
// If not, [input variables, token] pairs will be generated
// and fed into the IndexInsertDeleteOperator.
// Input variables are passed since TokenizeOperator is not an
// filtering operator.
boolean isPartitioned = index.getIndexType() == IndexType.LENGTH_PARTITIONED_WORD_INVIX || index.getIndexType() == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX;
// Create a new logical variable - token
List<LogicalVariable> tokenizeKeyVars = new ArrayList<>();
List<Mutable<ILogicalExpression>> tokenizeKeyExprs = new ArrayList<>();
LogicalVariable tokenVar = context.newVar();
tokenizeKeyVars.add(tokenVar);
tokenizeKeyExprs.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(tokenVar)));
// Check the field type of the secondary key.
IAType secondaryKeyType;
Pair<IAType, Boolean> keyPairType = Index.getNonNullableOpenFieldType(index.getKeyFieldTypes().get(0), secondaryKeyFields.get(0), recType);
secondaryKeyType = keyPairType.first;
List<Object> varTypes = new ArrayList<>();
varTypes.add(NonTaggedFormatUtil.getTokenType(secondaryKeyType));
// The type is short, and this does not contain type info.
if (isPartitioned) {
LogicalVariable lengthVar = context.newVar();
tokenizeKeyVars.add(lengthVar);
tokenizeKeyExprs.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(lengthVar)));
varTypes.add(BuiltinType.SHORTWITHOUTTYPEINFO);
}
// TokenizeOperator to tokenize [SK, PK] pairs
TokenizeOperator tokenUpdate = new TokenizeOperator(dataSourceIndex, primaryIndexModificationOp.getPrimaryKeyExpressions(), secondaryExpressions, tokenizeKeyVars, filterExpression, primaryIndexModificationOp.getOperation(), primaryIndexModificationOp.isBulkload(), isPartitioned, varTypes);
tokenUpdate.getInputs().add(new MutableObject<ILogicalOperator>(currentTop));
context.computeAndSetTypeEnvironmentForOperator(tokenUpdate);
replicateOutput = tokenUpdate;
indexUpdate = new IndexInsertDeleteUpsertOperator(dataSourceIndex, primaryIndexModificationOp.getPrimaryKeyExpressions(), tokenizeKeyExprs, filterExpression, primaryIndexModificationOp.getOperation(), primaryIndexModificationOp.isBulkload(), primaryIndexModificationOp.getAdditionalNonFilteringExpressions() == null ? 0 : primaryIndexModificationOp.getAdditionalNonFilteringExpressions().size());
indexUpdate.setAdditionalFilteringExpressions(filteringExpressions);
indexUpdate.getInputs().add(new MutableObject<ILogicalOperator>(tokenUpdate));
} else {
// When TokenizeOperator is not needed
indexUpdate = new IndexInsertDeleteUpsertOperator(dataSourceIndex, primaryIndexModificationOp.getPrimaryKeyExpressions(), secondaryExpressions, filterExpression, primaryIndexModificationOp.getOperation(), primaryIndexModificationOp.isBulkload(), primaryIndexModificationOp.getAdditionalNonFilteringExpressions() == null ? 0 : primaryIndexModificationOp.getAdditionalNonFilteringExpressions().size());
indexUpdate.setAdditionalFilteringExpressions(filteringExpressions);
replicateOutput = indexUpdate;
// We add the necessary expressions for upsert
if (primaryIndexModificationOp.getOperation() == Kind.UPSERT) {
indexUpdate.setBeforeOpSecondaryKeyExprs(beforeOpSecondaryExpressions);
if (filteringFields != null) {
indexUpdate.setBeforeOpAdditionalFilteringExpression(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(primaryIndexModificationOp.getBeforeOpFilterVar())));
}
}
indexUpdate.getInputs().add(new MutableObject<ILogicalOperator>(currentTop));
}
} else {
// Get type, dimensions and number of keys
Pair<IAType, Boolean> keyPairType = Index.getNonNullableOpenFieldType(index.getKeyFieldTypes().get(0), secondaryKeyFields.get(0), recType);
IAType spatialType = keyPairType.first;
boolean isPointMBR = spatialType.getTypeTag() == ATypeTag.POINT || spatialType.getTypeTag() == ATypeTag.POINT3D;
int dimension = NonTaggedFormatUtil.getNumDimensions(spatialType.getTypeTag());
int numKeys = (isPointMBR && isBulkload) ? dimension : dimension * 2;
// Get variables and expressions
List<LogicalVariable> keyVarList = new ArrayList<>();
List<Mutable<ILogicalExpression>> keyExprList = new ArrayList<>();
for (int i = 0; i < numKeys; i++) {
LogicalVariable keyVar = context.newVar();
keyVarList.add(keyVar);
AbstractFunctionCallExpression createMBR = new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.CREATE_MBR));
createMBR.getArguments().add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(secondaryKeyVars.get(0))));
createMBR.getArguments().add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(new AInt32(dimension)))));
createMBR.getArguments().add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(new AInt32(i)))));
keyExprList.add(new MutableObject<ILogicalExpression>(createMBR));
}
secondaryExpressions.clear();
for (LogicalVariable secondaryKeyVar : keyVarList) {
secondaryExpressions.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(secondaryKeyVar)));
}
if (isPointMBR && isBulkload) {
//createFieldPermutationForBulkLoadOp(int) for more details.
for (LogicalVariable secondaryKeyVar : keyVarList) {
secondaryExpressions.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(secondaryKeyVar)));
}
}
AssignOperator assignCoordinates = new AssignOperator(keyVarList, keyExprList);
assignCoordinates.getInputs().add(new MutableObject<ILogicalOperator>(currentTop));
context.computeAndSetTypeEnvironmentForOperator(assignCoordinates);
replicateOutput = assignCoordinates;
Mutable<ILogicalExpression> filterExpression = null;
AssignOperator originalAssignCoordinates = null;
// We do something similar for beforeOp key if the operation is an upsert
if (primaryIndexModificationOp.getOperation() == Kind.UPSERT) {
List<LogicalVariable> originalKeyVarList = new ArrayList<>();
List<Mutable<ILogicalExpression>> originalKeyExprList = new ArrayList<>();
// we don't do any filtering since nulls are expected here and there
for (int i = 0; i < numKeys; i++) {
LogicalVariable keyVar = context.newVar();
originalKeyVarList.add(keyVar);
AbstractFunctionCallExpression createMBR = new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.CREATE_MBR));
createMBR.getArguments().add(beforeOpSecondaryExpressions.get(0));
createMBR.getArguments().add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(new AInt32(dimension)))));
createMBR.getArguments().add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(new AInt32(i)))));
originalKeyExprList.add(new MutableObject<ILogicalExpression>(createMBR));
}
beforeOpSecondaryExpressions.clear();
for (LogicalVariable secondaryKeyVar : originalKeyVarList) {
beforeOpSecondaryExpressions.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(secondaryKeyVar)));
}
originalAssignCoordinates = new AssignOperator(originalKeyVarList, originalKeyExprList);
originalAssignCoordinates.getInputs().add(new MutableObject<ILogicalOperator>(assignCoordinates));
context.computeAndSetTypeEnvironmentForOperator(originalAssignCoordinates);
} else {
// We must enforce the filter if the originating spatial type is
// nullable.
boolean forceFilter = keyPairType.second;
filterExpression = createFilterExpression(keyVarList, context.getOutputTypeEnvironment(assignCoordinates), forceFilter);
}
DataSourceIndex dataSourceIndex = new DataSourceIndex(index, dataverseName, datasetName, mp);
indexUpdate = new IndexInsertDeleteUpsertOperator(dataSourceIndex, primaryIndexModificationOp.getPrimaryKeyExpressions(), secondaryExpressions, filterExpression, primaryIndexModificationOp.getOperation(), primaryIndexModificationOp.isBulkload(), primaryIndexModificationOp.getAdditionalNonFilteringExpressions() == null ? 0 : primaryIndexModificationOp.getAdditionalNonFilteringExpressions().size());
indexUpdate.setAdditionalFilteringExpressions(filteringExpressions);
if (primaryIndexModificationOp.getOperation() == Kind.UPSERT) {
// set before op secondary key expressions
if (filteringFields != null) {
indexUpdate.setBeforeOpAdditionalFilteringExpression(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(primaryIndexModificationOp.getBeforeOpFilterVar())));
}
// set filtering expressions
indexUpdate.setBeforeOpSecondaryKeyExprs(beforeOpSecondaryExpressions);
// assign --> assign beforeOp values --> secondary index upsert
indexUpdate.getInputs().add(new MutableObject<ILogicalOperator>(originalAssignCoordinates));
} else {
indexUpdate.getInputs().add(new MutableObject<ILogicalOperator>(assignCoordinates));
}
}
context.computeAndSetTypeEnvironmentForOperator(indexUpdate);
if (!primaryIndexModificationOp.isBulkload() || secondaryIndexTotalCnt == 1) {
currentTop = indexUpdate;
} else {
replicateOp.getOutputs().add(new MutableObject<>(replicateOutput));
}
if (primaryIndexModificationOp.isBulkload()) {
// For bulk load, we connect all fanned out insert operator to a single SINK operator
op0.getInputs().add(new MutableObject<ILogicalOperator>(indexUpdate));
}
}
if (!hasSecondaryIndex) {
return false;
}
if (!primaryIndexModificationOp.isBulkload()) {
// If this is an upsert, we need to
// Remove the current input to the SINK operator (It is actually already removed above)
op0.getInputs().clear();
// Connect the last index update to the SINK
op0.getInputs().add(new MutableObject<ILogicalOperator>(currentTop));
}
return true;
}
use of org.apache.asterix.metadata.entities.Index in project asterixdb by apache.
the class IntroduceSecondaryIndexInsertDeleteRule method injectFieldAccessesForIndexes.
private ILogicalOperator injectFieldAccessesForIndexes(IOptimizationContext context, Dataset dataset, List<Index> indexes, Map<IndexFieldId, LogicalVariable> fieldAccessVars, ARecordType recType, ARecordType metaType, LogicalVariable recordVar, LogicalVariable metaVar, ILogicalOperator currentTop, boolean afterOp) throws AlgebricksException {
List<LogicalVariable> vars = new ArrayList<>();
List<Mutable<ILogicalExpression>> exprs = new ArrayList<>();
for (Index index : indexes) {
if (index.isPrimaryIndex()) {
continue;
}
List<IAType> skTypes = index.getKeyFieldTypes();
List<List<String>> skNames = index.getKeyFieldNames();
List<Integer> indicators = index.getKeyFieldSourceIndicators();
for (int i = 0; i < index.getKeyFieldNames().size(); i++) {
IndexFieldId indexFieldId = new IndexFieldId(indicators.get(i), skNames.get(i));
if (fieldAccessVars.containsKey(indexFieldId)) {
// already handled in a different index
continue;
}
ARecordType sourceType = dataset.hasMetaPart() ? indicators.get(i).intValue() == Index.RECORD_INDICATOR ? recType : metaType : recType;
LogicalVariable sourceVar = dataset.hasMetaPart() ? indicators.get(i).intValue() == Index.RECORD_INDICATOR ? recordVar : metaVar : recordVar;
LogicalVariable fieldVar = context.newVar();
// create record variable ref
Mutable<ILogicalExpression> varRef = new MutableObject<>(new VariableReferenceExpression(sourceVar));
IAType fieldType = sourceType.getSubFieldType(indexFieldId.fieldName);
AbstractFunctionCallExpression theFieldAccessFunc;
if (fieldType == null) {
// Open field. must prevent inlining to maintain the cast before the primaryOp and
// make handling of records with incorrect value type for this field easier and cleaner
context.addNotToBeInlinedVar(fieldVar);
// create field access
AbstractFunctionCallExpression fieldAccessFunc = getOpenOrNestedFieldAccessFunction(varRef, indexFieldId.fieldName);
// create cast
theFieldAccessFunc = new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.CAST_TYPE));
// The first argument is the field
theFieldAccessFunc.getArguments().add(new MutableObject<ILogicalExpression>(fieldAccessFunc));
TypeCastUtils.setRequiredAndInputTypes(theFieldAccessFunc, skTypes.get(i), BuiltinType.ANY);
} else {
// Get the desired field position
int pos = indexFieldId.fieldName.size() > 1 ? -1 : sourceType.getFieldIndex(indexFieldId.fieldName.get(0));
// Field not found --> This is either an open field or a nested field. it can't be accessed by index
theFieldAccessFunc = (pos == -1) ? getOpenOrNestedFieldAccessFunction(varRef, indexFieldId.fieldName) : getClosedFieldAccessFunction(varRef, pos);
}
vars.add(fieldVar);
exprs.add(new MutableObject<ILogicalExpression>(theFieldAccessFunc));
fieldAccessVars.put(indexFieldId, fieldVar);
}
}
// AssignOperator assigns secondary keys to their vars
AssignOperator castedFieldAssignOperator = new AssignOperator(vars, exprs);
return introduceNewOp(context, currentTop, castedFieldAssignOperator, afterOp);
}
use of org.apache.asterix.metadata.entities.Index in project asterixdb by apache.
the class AbstractIntroduceAccessMethodRule method pruneIndexCandidates.
/**
* Removes irrelevant access methods candidates, based on whether the
* expressions in the query match those in the index. For example, some
* index may require all its expressions to be matched, and some indexes may
* only require a match on a prefix of fields to be applicable. This methods
* removes all index candidates indexExprs that are definitely not
* applicable according to the expressions involved.
*
* @throws AlgebricksException
*/
public void pruneIndexCandidates(IAccessMethod accessMethod, AccessMethodAnalysisContext analysisCtx, IOptimizationContext context, IVariableTypeEnvironment typeEnvironment) throws AlgebricksException {
Iterator<Map.Entry<Index, List<Pair<Integer, Integer>>>> indexExprAndVarIt = analysisCtx.getIteratorForIndexExprsAndVars();
// Used to keep track of matched expressions (added for prefix search)
int numMatchedKeys = 0;
ArrayList<Integer> matchedExpressions = new ArrayList<>();
while (indexExprAndVarIt.hasNext()) {
Map.Entry<Index, List<Pair<Integer, Integer>>> indexExprAndVarEntry = indexExprAndVarIt.next();
Index index = indexExprAndVarEntry.getKey();
boolean allUsed = true;
int lastFieldMatched = -1;
boolean foundKeyField = false;
matchedExpressions.clear();
numMatchedKeys = 0;
for (int i = 0; i < index.getKeyFieldNames().size(); i++) {
List<String> keyField = index.getKeyFieldNames().get(i);
final IAType keyType = index.getKeyFieldTypes().get(i);
Iterator<Pair<Integer, Integer>> exprsAndVarIter = indexExprAndVarEntry.getValue().iterator();
while (exprsAndVarIter.hasNext()) {
final Pair<Integer, Integer> exprAndVarIdx = exprsAndVarIter.next();
final IOptimizableFuncExpr optFuncExpr = analysisCtx.getMatchedFuncExpr(exprAndVarIdx.first);
// expr and continue.
if (!accessMethod.exprIsOptimizable(index, optFuncExpr)) {
exprsAndVarIter.remove();
continue;
}
boolean typeMatch = true;
//Prune indexes based on field types
List<IAType> matchedTypes = new ArrayList<>();
//retrieve types of expressions joined/selected with an indexed field
for (int j = 0; j < optFuncExpr.getNumLogicalVars(); j++) {
if (j != exprAndVarIdx.second) {
matchedTypes.add(optFuncExpr.getFieldType(j));
}
}
if (matchedTypes.size() < 2 && optFuncExpr.getNumLogicalVars() == 1) {
matchedTypes.add((IAType) ExpressionTypeComputer.INSTANCE.getType(optFuncExpr.getConstantExpr(0), context.getMetadataProvider(), typeEnvironment));
}
//infer type of logicalExpr based on index keyType
matchedTypes.add((IAType) ExpressionTypeComputer.INSTANCE.getType(optFuncExpr.getLogicalExpr(exprAndVarIdx.second), null, new IVariableTypeEnvironment() {
@Override
public Object getVarType(LogicalVariable var) throws AlgebricksException {
if (var.equals(optFuncExpr.getSourceVar(exprAndVarIdx.second))) {
return keyType;
}
throw new IllegalArgumentException();
}
@Override
public Object getVarType(LogicalVariable var, List<LogicalVariable> nonNullVariables, List<List<LogicalVariable>> correlatedNullableVariableLists) throws AlgebricksException {
if (var.equals(optFuncExpr.getSourceVar(exprAndVarIdx.second))) {
return keyType;
}
throw new IllegalArgumentException();
}
@Override
public void setVarType(LogicalVariable var, Object type) {
throw new IllegalArgumentException();
}
@Override
public Object getType(ILogicalExpression expr) throws AlgebricksException {
return ExpressionTypeComputer.INSTANCE.getType(expr, null, this);
}
@Override
public boolean substituteProducedVariable(LogicalVariable v1, LogicalVariable v2) throws AlgebricksException {
throw new IllegalArgumentException();
}
}));
//for the case when jaccard similarity is measured between ordered & unordered lists
boolean jaccardSimilarity = optFuncExpr.getFuncExpr().getFunctionIdentifier().getName().startsWith("similarity-jaccard-check");
// Full-text search consideration: an (un)ordered list of string type can be compatible with string
// type. i.e. an (un)ordered list can be provided as arguments to a string type field index.
List<IAType> elementTypes = matchedTypes;
if (optFuncExpr.getFuncExpr().getFunctionIdentifier() == BuiltinFunctions.FULLTEXT_CONTAINS || optFuncExpr.getFuncExpr().getFunctionIdentifier() == BuiltinFunctions.FULLTEXT_CONTAINS_WO_OPTION) {
for (int j = 0; j < matchedTypes.size(); j++) {
if (matchedTypes.get(j).getTypeTag() == ATypeTag.ARRAY || matchedTypes.get(j).getTypeTag() == ATypeTag.MULTISET) {
elementTypes.set(j, ((AbstractCollectionType) matchedTypes.get(j)).getItemType());
}
}
}
for (int j = 0; j < matchedTypes.size(); j++) {
for (int k = j + 1; k < matchedTypes.size(); k++) {
typeMatch &= isMatched(elementTypes.get(j), elementTypes.get(k), jaccardSimilarity);
}
}
// Check if any field name in the optFuncExpr matches.
if (optFuncExpr.findFieldName(keyField) != -1) {
foundKeyField = typeMatch && optFuncExpr.getOperatorSubTree(exprAndVarIdx.second).hasDataSourceScan();
if (foundKeyField) {
matchedExpressions.add(exprAndVarIdx.first);
numMatchedKeys++;
if (lastFieldMatched == i - 1) {
lastFieldMatched = i;
}
break;
}
}
}
if (!foundKeyField) {
allUsed = false;
// if any expression was matched, remove the non-matched expressions, otherwise the index is unusable
if (lastFieldMatched >= 0) {
exprsAndVarIter = indexExprAndVarEntry.getValue().iterator();
while (exprsAndVarIter.hasNext()) {
if (!matchedExpressions.contains(exprsAndVarIter.next().first)) {
exprsAndVarIter.remove();
}
}
}
break;
}
}
// are not, remove this candidate.
if (!allUsed && accessMethod.matchAllIndexExprs()) {
indexExprAndVarIt.remove();
continue;
}
// A prefix of the index exprs may have been matched.
if (accessMethod.matchPrefixIndexExprs()) {
if (lastFieldMatched < 0) {
indexExprAndVarIt.remove();
continue;
}
}
analysisCtx.putNumberOfMatchedKeys(index, Integer.valueOf(numMatchedKeys));
}
}
use of org.apache.asterix.metadata.entities.Index in project asterixdb by apache.
the class QueryTranslator method handleCreateIndexStatement.
protected void handleCreateIndexStatement(MetadataProvider metadataProvider, Statement stmt, IHyracksClientConnection hcc) throws Exception {
ProgressState progress = ProgressState.NO_PROGRESS;
CreateIndexStatement stmtCreateIndex = (CreateIndexStatement) stmt;
String dataverseName = getActiveDataverse(stmtCreateIndex.getDataverseName());
String datasetName = stmtCreateIndex.getDatasetName().getValue();
List<Integer> keySourceIndicators = stmtCreateIndex.getFieldSourceIndicators();
MetadataTransactionContext mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
boolean bActiveTxn = true;
metadataProvider.setMetadataTxnContext(mdTxnCtx);
MetadataLockManager.INSTANCE.createIndexBegin(metadataProvider.getLocks(), dataverseName, dataverseName + "." + datasetName);
String indexName = null;
JobSpecification spec = null;
Dataset ds = null;
// For external datasets
List<ExternalFile> externalFilesSnapshot = null;
boolean firstExternalDatasetIndex = false;
boolean filesIndexReplicated = false;
Index filesIndex = null;
boolean datasetLocked = false;
Index index = null;
try {
ds = metadataProvider.findDataset(dataverseName, datasetName);
if (ds == null) {
throw new AlgebricksException("There is no dataset with this name " + datasetName + " in dataverse " + dataverseName);
}
indexName = stmtCreateIndex.getIndexName().getValue();
index = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
Datatype dt = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), ds.getItemTypeDataverseName(), ds.getItemTypeName());
ARecordType aRecordType = (ARecordType) dt.getDatatype();
ARecordType metaRecordType = null;
if (ds.hasMetaPart()) {
Datatype metaDt = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), ds.getMetaItemTypeDataverseName(), ds.getMetaItemTypeName());
metaRecordType = (ARecordType) metaDt.getDatatype();
}
List<List<String>> indexFields = new ArrayList<>();
List<IAType> indexFieldTypes = new ArrayList<>();
int keyIndex = 0;
for (Pair<List<String>, TypeExpression> fieldExpr : stmtCreateIndex.getFieldExprs()) {
IAType fieldType = null;
ARecordType subType = KeyFieldTypeUtil.chooseSource(keySourceIndicators, keyIndex, aRecordType, metaRecordType);
boolean isOpen = subType.isOpen();
int i = 0;
if (fieldExpr.first.size() > 1 && !isOpen) {
while (i < fieldExpr.first.size() - 1 && !isOpen) {
subType = (ARecordType) subType.getFieldType(fieldExpr.first.get(i));
i++;
isOpen = subType.isOpen();
}
}
if (fieldExpr.second == null) {
fieldType = subType.getSubFieldType(fieldExpr.first.subList(i, fieldExpr.first.size()));
} else {
if (!stmtCreateIndex.isEnforced()) {
throw new AlgebricksException("Cannot create typed index on \"" + fieldExpr.first + "\" field without enforcing it's type");
}
if (!isOpen) {
throw new AlgebricksException("Typed index on \"" + fieldExpr.first + "\" field could be created only for open datatype");
}
if (stmtCreateIndex.hasMetaField()) {
throw new AlgebricksException("Typed open index can only be created on the record part");
}
Map<TypeSignature, IAType> typeMap = TypeTranslator.computeTypes(mdTxnCtx, fieldExpr.second, indexName, dataverseName);
TypeSignature typeSignature = new TypeSignature(dataverseName, indexName);
fieldType = typeMap.get(typeSignature);
}
if (fieldType == null) {
throw new AlgebricksException("Unknown type " + (fieldExpr.second == null ? fieldExpr.first : fieldExpr.second));
}
indexFields.add(fieldExpr.first);
indexFieldTypes.add(fieldType);
++keyIndex;
}
ValidateUtil.validateKeyFields(aRecordType, metaRecordType, indexFields, keySourceIndicators, indexFieldTypes, stmtCreateIndex.getIndexType());
if (index != null) {
if (stmtCreateIndex.getIfNotExists()) {
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
return;
} else {
throw new AlgebricksException("An index with this name " + indexName + " already exists.");
}
}
// error message and stop.
if (stmtCreateIndex.getIndexType() == IndexType.SINGLE_PARTITION_WORD_INVIX || stmtCreateIndex.getIndexType() == IndexType.SINGLE_PARTITION_NGRAM_INVIX || stmtCreateIndex.getIndexType() == IndexType.LENGTH_PARTITIONED_WORD_INVIX || stmtCreateIndex.getIndexType() == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX) {
List<List<String>> partitioningKeys = ds.getPrimaryKeys();
for (List<String> partitioningKey : partitioningKeys) {
IAType keyType = aRecordType.getSubFieldType(partitioningKey);
ITypeTraits typeTrait = TypeTraitProvider.INSTANCE.getTypeTrait(keyType);
// If it is not a fixed length
if (typeTrait.getFixedLength() < 0) {
throw new AlgebricksException("The keyword or ngram index -" + indexName + " cannot be created on the dataset -" + datasetName + " due to its variable-length primary key field - " + partitioningKey);
}
}
}
if (ds.getDatasetType() == DatasetType.INTERNAL) {
validateIfResourceIsActiveInFeed(ds);
} else {
// Check if the dataset is indexible
if (!ExternalIndexingOperations.isIndexible((ExternalDatasetDetails) ds.getDatasetDetails())) {
throw new AlgebricksException("dataset using " + ((ExternalDatasetDetails) ds.getDatasetDetails()).getAdapter() + " Adapter can't be indexed");
}
// Check if the name of the index is valid
if (!ExternalIndexingOperations.isValidIndexName(datasetName, indexName)) {
throw new AlgebricksException("external dataset index name is invalid");
}
// Check if the files index exist
filesIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
firstExternalDatasetIndex = filesIndex == null;
// Lock external dataset
ExternalDatasetsRegistry.INSTANCE.buildIndexBegin(ds, firstExternalDatasetIndex);
datasetLocked = true;
if (firstExternalDatasetIndex) {
// Verify that no one has created an index before we acquire the lock
filesIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
if (filesIndex != null) {
ExternalDatasetsRegistry.INSTANCE.buildIndexEnd(ds, firstExternalDatasetIndex);
firstExternalDatasetIndex = false;
ExternalDatasetsRegistry.INSTANCE.buildIndexBegin(ds, firstExternalDatasetIndex);
}
}
if (firstExternalDatasetIndex) {
// Get snapshot from External File System
externalFilesSnapshot = ExternalIndexingOperations.getSnapshotFromExternalFileSystem(ds);
// Add an entry for the files index
filesIndex = new Index(dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName), IndexType.BTREE, ExternalIndexingOperations.FILE_INDEX_FIELD_NAMES, null, ExternalIndexingOperations.FILE_INDEX_FIELD_TYPES, false, false, MetadataUtil.PENDING_ADD_OP);
MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), filesIndex);
// Add files to the external files index
for (ExternalFile file : externalFilesSnapshot) {
MetadataManager.INSTANCE.addExternalFile(mdTxnCtx, file);
}
// This is the first index for the external dataset, replicate the files index
spec = ExternalIndexingOperations.buildFilesIndexCreateJobSpec(ds, externalFilesSnapshot, metadataProvider);
if (spec == null) {
throw new CompilationException("Failed to create job spec for replicating Files Index For external dataset");
}
filesIndexReplicated = true;
JobUtils.runJob(hcc, spec, true);
}
}
// check whether there exists another enforced index on the same field
if (stmtCreateIndex.isEnforced()) {
List<Index> indexes = MetadataManager.INSTANCE.getDatasetIndexes(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName);
for (Index existingIndex : indexes) {
if (existingIndex.getKeyFieldNames().equals(indexFields) && !existingIndex.getKeyFieldTypes().equals(indexFieldTypes) && existingIndex.isEnforcingKeyFileds()) {
throw new CompilationException("Cannot create index " + indexName + " , enforced index " + existingIndex.getIndexName() + " on field \"" + StringUtils.join(indexFields, ',') + "\" is already defined with type \"" + existingIndex.getKeyFieldTypes() + "\"");
}
}
}
// #. add a new index with PendingAddOp
index = new Index(dataverseName, datasetName, indexName, stmtCreateIndex.getIndexType(), indexFields, keySourceIndicators, indexFieldTypes, stmtCreateIndex.getGramLength(), stmtCreateIndex.isEnforced(), false, MetadataUtil.PENDING_ADD_OP);
MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), index);
// #. prepare to create the index artifact in NC.
spec = IndexUtil.buildSecondaryIndexCreationJobSpec(ds, index, metadataProvider);
if (spec == null) {
throw new CompilationException("Failed to create job spec for creating index '" + stmtCreateIndex.getDatasetName() + "." + stmtCreateIndex.getIndexName() + "'");
}
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
bActiveTxn = false;
progress = ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA;
// #. create the index artifact in NC.
JobUtils.runJob(hcc, spec, true);
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
bActiveTxn = true;
metadataProvider.setMetadataTxnContext(mdTxnCtx);
// #. load data into the index in NC.
spec = IndexUtil.buildSecondaryIndexLoadingJobSpec(ds, index, metadataProvider);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
bActiveTxn = false;
JobUtils.runJob(hcc, spec, true);
// #. begin new metadataTxn
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
bActiveTxn = true;
metadataProvider.setMetadataTxnContext(mdTxnCtx);
// #. add another new index with PendingNoOp after deleting the index with PendingAddOp
MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
index.setPendingOp(MetadataUtil.PENDING_NO_OP);
MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), index);
// PendingAddOp
if (firstExternalDatasetIndex) {
MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, filesIndex.getIndexName());
filesIndex.setPendingOp(MetadataUtil.PENDING_NO_OP);
MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), filesIndex);
// update transaction timestamp
((ExternalDatasetDetails) ds.getDatasetDetails()).setRefreshTimestamp(new Date());
MetadataManager.INSTANCE.updateDataset(mdTxnCtx, ds);
}
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
} catch (Exception e) {
if (bActiveTxn) {
abort(e, e, mdTxnCtx);
}
// If files index was replicated for external dataset, it should be cleaned up on NC side
if (filesIndexReplicated) {
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
bActiveTxn = true;
try {
JobSpecification jobSpec = ExternalIndexingOperations.buildDropFilesIndexJobSpec(metadataProvider, ds);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
bActiveTxn = false;
JobUtils.runJob(hcc, jobSpec, true);
} catch (Exception e2) {
e.addSuppressed(e2);
if (bActiveTxn) {
abort(e, e2, mdTxnCtx);
}
}
}
if (progress == ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA) {
// #. execute compensation operations
// remove the index in NC
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
bActiveTxn = true;
metadataProvider.setMetadataTxnContext(mdTxnCtx);
try {
JobSpecification jobSpec = IndexUtil.buildDropIndexJobSpec(index, metadataProvider, ds);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
bActiveTxn = false;
JobUtils.runJob(hcc, jobSpec, true);
} catch (Exception e2) {
e.addSuppressed(e2);
if (bActiveTxn) {
abort(e, e2, mdTxnCtx);
}
}
if (firstExternalDatasetIndex) {
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
metadataProvider.setMetadataTxnContext(mdTxnCtx);
try {
// Drop External Files from metadata
MetadataManager.INSTANCE.dropDatasetExternalFiles(mdTxnCtx, ds);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
} catch (Exception e2) {
e.addSuppressed(e2);
abort(e, e2, mdTxnCtx);
throw new IllegalStateException("System is inconsistent state: pending files for(" + dataverseName + "." + datasetName + ") couldn't be removed from the metadata", e);
}
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
metadataProvider.setMetadataTxnContext(mdTxnCtx);
try {
// Drop the files index from metadata
MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
} catch (Exception e2) {
e.addSuppressed(e2);
abort(e, e2, mdTxnCtx);
throw new IllegalStateException("System is inconsistent state: pending index(" + dataverseName + "." + datasetName + "." + IndexingConstants.getFilesIndexName(datasetName) + ") couldn't be removed from the metadata", e);
}
}
// remove the record from the metadata.
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
metadataProvider.setMetadataTxnContext(mdTxnCtx);
try {
MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
} catch (Exception e2) {
e.addSuppressed(e2);
abort(e, e2, mdTxnCtx);
throw new IllegalStateException("System is in inconsistent state: pending index(" + dataverseName + "." + datasetName + "." + indexName + ") couldn't be removed from the metadata", e);
}
}
throw e;
} finally {
metadataProvider.getLocks().unlock();
if (datasetLocked) {
ExternalDatasetsRegistry.INSTANCE.buildIndexEnd(ds, firstExternalDatasetIndex);
}
}
}
Aggregations