use of org.apache.asterix.om.types.IAType in project asterixdb by apache.
the class UnnestToDataScanRule method handleFunction.
protected boolean handleFunction(Mutable<ILogicalOperator> opRef, IOptimizationContext context, UnnestOperator unnest, AbstractFunctionCallExpression f) throws AlgebricksException {
FunctionIdentifier fid = f.getFunctionIdentifier();
if (fid.equals(BuiltinFunctions.DATASET)) {
if (unnest.getPositionalVariable() != null) {
// TODO remove this after enabling the support of positional variables in data scan
throw new AlgebricksException("No positional variables are allowed over datasets.");
}
ILogicalExpression expr = f.getArguments().get(0).getValue();
if (expr.getExpressionTag() != LogicalExpressionTag.CONSTANT) {
return false;
}
ConstantExpression ce = (ConstantExpression) expr;
IAlgebricksConstantValue acv = ce.getValue();
if (!(acv instanceof AsterixConstantValue)) {
return false;
}
AsterixConstantValue acv2 = (AsterixConstantValue) acv;
if (acv2.getObject().getType().getTypeTag() != ATypeTag.STRING) {
return false;
}
String datasetArg = ((AString) acv2.getObject()).getStringValue();
MetadataProvider metadataProvider = (MetadataProvider) context.getMetadataProvider();
Pair<String, String> datasetReference = parseDatasetReference(metadataProvider, datasetArg);
String dataverseName = datasetReference.first;
String datasetName = datasetReference.second;
Dataset dataset = metadataProvider.findDataset(dataverseName, datasetName);
if (dataset == null) {
throw new AlgebricksException("Could not find dataset " + datasetName + " in dataverse " + dataverseName);
}
DataSourceId asid = new DataSourceId(dataverseName, datasetName);
List<LogicalVariable> variables = new ArrayList<>();
if (dataset.getDatasetType() == DatasetType.INTERNAL) {
int numPrimaryKeys = dataset.getPrimaryKeys().size();
for (int i = 0; i < numPrimaryKeys; i++) {
variables.add(context.newVar());
}
}
variables.add(unnest.getVariable());
DataSource dataSource = metadataProvider.findDataSource(asid);
boolean hasMeta = dataSource.hasMeta();
if (hasMeta) {
variables.add(context.newVar());
}
DataSourceScanOperator scan = new DataSourceScanOperator(variables, dataSource);
List<Mutable<ILogicalOperator>> scanInpList = scan.getInputs();
scanInpList.addAll(unnest.getInputs());
opRef.setValue(scan);
addPrimaryKey(variables, dataSource, context);
context.computeAndSetTypeEnvironmentForOperator(scan);
// Adds equivalence classes --- one equivalent class between a primary key
// variable and a record field-access expression.
IAType[] schemaTypes = dataSource.getSchemaTypes();
ARecordType recordType = (ARecordType) (hasMeta ? schemaTypes[schemaTypes.length - 2] : schemaTypes[schemaTypes.length - 1]);
ARecordType metaRecordType = (ARecordType) (hasMeta ? schemaTypes[schemaTypes.length - 1] : null);
EquivalenceClassUtils.addEquivalenceClassesForPrimaryIndexAccess(scan, variables, recordType, metaRecordType, dataset, context);
return true;
} else if (fid.equals(BuiltinFunctions.FEED_COLLECT)) {
if (unnest.getPositionalVariable() != null) {
throw new AlgebricksException("No positional variables are allowed over feeds.");
}
String dataverse = ConstantExpressionUtil.getStringArgument(f, 0);
String sourceFeedName = ConstantExpressionUtil.getStringArgument(f, 1);
String getTargetFeed = ConstantExpressionUtil.getStringArgument(f, 2);
String subscriptionLocation = ConstantExpressionUtil.getStringArgument(f, 3);
String targetDataset = ConstantExpressionUtil.getStringArgument(f, 4);
String outputType = ConstantExpressionUtil.getStringArgument(f, 5);
MetadataProvider metadataProvider = (MetadataProvider) context.getMetadataProvider();
DataSourceId asid = new DataSourceId(dataverse, getTargetFeed);
String policyName = metadataProvider.getConfig().get(FeedActivityDetails.FEED_POLICY_NAME);
FeedPolicyEntity policy = metadataProvider.findFeedPolicy(dataverse, policyName);
if (policy == null) {
policy = BuiltinFeedPolicies.getFeedPolicy(policyName);
if (policy == null) {
throw new AlgebricksException("Unknown feed policy:" + policyName);
}
}
ArrayList<LogicalVariable> feedDataScanOutputVariables = new ArrayList<>();
String csLocations = metadataProvider.getConfig().get(FeedActivityDetails.COLLECT_LOCATIONS);
List<LogicalVariable> pkVars = new ArrayList<>();
FeedDataSource ds = createFeedDataSource(asid, targetDataset, sourceFeedName, subscriptionLocation, metadataProvider, policy, outputType, csLocations, unnest.getVariable(), context, pkVars);
// The order for feeds is <Record-Meta-PK>
feedDataScanOutputVariables.add(unnest.getVariable());
// Does it produce meta?
if (ds.hasMeta()) {
feedDataScanOutputVariables.add(context.newVar());
}
// Does it produce pk?
if (ds.isChange()) {
feedDataScanOutputVariables.addAll(pkVars);
}
DataSourceScanOperator scan = new DataSourceScanOperator(feedDataScanOutputVariables, ds);
List<Mutable<ILogicalOperator>> scanInpList = scan.getInputs();
scanInpList.addAll(unnest.getInputs());
opRef.setValue(scan);
context.computeAndSetTypeEnvironmentForOperator(scan);
return true;
}
return false;
}
use of org.apache.asterix.om.types.IAType in project asterixdb by apache.
the class AbstractIntroduceAccessMethodRule method matchVarsFromOptFuncExprToDataSourceScan.
private void matchVarsFromOptFuncExprToDataSourceScan(IOptimizableFuncExpr optFuncExpr, int optFuncExprIndex, List<Index> datasetIndexes, List<LogicalVariable> dsVarList, OptimizableOperatorSubTree subTree, AccessMethodAnalysisContext analysisCtx, IOptimizationContext context, boolean fromAdditionalDataSource) throws AlgebricksException {
for (int varIndex = 0; varIndex < dsVarList.size(); varIndex++) {
LogicalVariable var = dsVarList.get(varIndex);
int funcVarIndex = optFuncExpr.findLogicalVar(var);
// No matching var in optFuncExpr.
if (funcVarIndex == -1) {
continue;
}
// The variable value is one of the partitioning fields.
List<String> fieldName = null;
IAType fieldType = null;
List<List<String>> subTreePKs = null;
if (!fromAdditionalDataSource) {
subTreePKs = subTree.getDataset().getPrimaryKeys();
// Check whether this variable is PK, not a record variable.
if (varIndex <= subTreePKs.size() - 1) {
fieldName = subTreePKs.get(varIndex);
fieldType = (IAType) context.getOutputTypeEnvironment(subTree.getDataSourceRef().getValue()).getVarType(var);
}
} else {
// Need to check additional dataset one by one
for (int i = 0; i < subTree.getIxJoinOuterAdditionalDatasets().size(); i++) {
if (subTree.getIxJoinOuterAdditionalDatasets().get(i) != null) {
subTreePKs = subTree.getIxJoinOuterAdditionalDatasets().get(i).getPrimaryKeys();
// Check whether this variable is PK, not a record variable.
if (subTreePKs.contains(var) && varIndex <= subTreePKs.size() - 1) {
fieldName = subTreePKs.get(varIndex);
fieldType = (IAType) context.getOutputTypeEnvironment(subTree.getIxJoinOuterAdditionalDataSourceRefs().get(i).getValue()).getVarType(var);
break;
}
}
}
}
// Set the fieldName in the corresponding matched function
// expression, and remember matching subtree.
optFuncExpr.setFieldName(funcVarIndex, fieldName);
optFuncExpr.setOptimizableSubTree(funcVarIndex, subTree);
optFuncExpr.setSourceVar(funcVarIndex, var);
optFuncExpr.setLogicalExpr(funcVarIndex, new VariableReferenceExpression(var));
setTypeTag(context, subTree, optFuncExpr, funcVarIndex);
if (subTree.hasDataSourceScan()) {
fillIndexExprs(datasetIndexes, fieldName, fieldType, optFuncExpr, optFuncExprIndex, funcVarIndex, subTree, analysisCtx);
}
}
}
use of org.apache.asterix.om.types.IAType in project asterixdb by apache.
the class AbstractIntroduceAccessMethodRule method analyzeUnnestOp.
private void analyzeUnnestOp(UnnestOperator unnestOp, IOptimizableFuncExpr optFuncExpr, OptimizableOperatorSubTree subTree, int assignOrUnnestIndex, LogicalVariable datasetRecordVar, LogicalVariable datasetMetaVar, IOptimizationContext context, List<Index> datasetIndexes, int optFuncExprIndex, AccessMethodAnalysisContext analysisCtx) throws AlgebricksException {
LogicalVariable var = unnestOp.getVariable();
int funcVarIndex = optFuncExpr.findLogicalVar(var);
// No matching var in optFuncExpr.
if (funcVarIndex == -1) {
return;
}
// At this point we have matched the optimizable func expr
// at optFuncExprIndex to an unnest variable.
// Remember matching subtree.
optFuncExpr.setOptimizableSubTree(funcVarIndex, subTree);
List<String> fieldName = null;
if (subTree.getDataSourceType() == DataSourceType.COLLECTION_SCAN) {
optFuncExpr.setLogicalExpr(funcVarIndex, new VariableReferenceExpression(var));
} else {
fieldName = getFieldNameFromSubTree(optFuncExpr, subTree, assignOrUnnestIndex, 0, subTree.getRecordType(), funcVarIndex, optFuncExpr.getFuncExpr().getArguments().get(funcVarIndex).getValue(), datasetRecordVar, subTree.getMetaRecordType(), datasetMetaVar);
if (fieldName.isEmpty()) {
return;
}
}
IAType fieldType = (IAType) context.getOutputTypeEnvironment(unnestOp).getType(optFuncExpr.getLogicalExpr(funcVarIndex));
// Set the fieldName in the corresponding matched function
// expression.
optFuncExpr.setFieldName(funcVarIndex, fieldName);
optFuncExpr.setFieldType(funcVarIndex, fieldType);
setTypeTag(context, subTree, optFuncExpr, funcVarIndex);
if (subTree.hasDataSource()) {
fillIndexExprs(datasetIndexes, fieldName, fieldType, optFuncExpr, optFuncExprIndex, funcVarIndex, subTree, analysisCtx);
}
}
use of org.apache.asterix.om.types.IAType in project asterixdb by apache.
the class AbstractIntroduceAccessMethodRule method pruneIndexCandidates.
/**
* Removes irrelevant access methods candidates, based on whether the
* expressions in the query match those in the index. For example, some
* index may require all its expressions to be matched, and some indexes may
* only require a match on a prefix of fields to be applicable. This methods
* removes all index candidates indexExprs that are definitely not
* applicable according to the expressions involved.
*
* @throws AlgebricksException
*/
public void pruneIndexCandidates(IAccessMethod accessMethod, AccessMethodAnalysisContext analysisCtx, IOptimizationContext context, IVariableTypeEnvironment typeEnvironment) throws AlgebricksException {
Iterator<Map.Entry<Index, List<Pair<Integer, Integer>>>> indexExprAndVarIt = analysisCtx.getIteratorForIndexExprsAndVars();
// Used to keep track of matched expressions (added for prefix search)
int numMatchedKeys = 0;
ArrayList<Integer> matchedExpressions = new ArrayList<>();
while (indexExprAndVarIt.hasNext()) {
Map.Entry<Index, List<Pair<Integer, Integer>>> indexExprAndVarEntry = indexExprAndVarIt.next();
Index index = indexExprAndVarEntry.getKey();
boolean allUsed = true;
int lastFieldMatched = -1;
boolean foundKeyField = false;
matchedExpressions.clear();
numMatchedKeys = 0;
for (int i = 0; i < index.getKeyFieldNames().size(); i++) {
List<String> keyField = index.getKeyFieldNames().get(i);
final IAType keyType = index.getKeyFieldTypes().get(i);
Iterator<Pair<Integer, Integer>> exprsAndVarIter = indexExprAndVarEntry.getValue().iterator();
while (exprsAndVarIter.hasNext()) {
final Pair<Integer, Integer> exprAndVarIdx = exprsAndVarIter.next();
final IOptimizableFuncExpr optFuncExpr = analysisCtx.getMatchedFuncExpr(exprAndVarIdx.first);
// expr and continue.
if (!accessMethod.exprIsOptimizable(index, optFuncExpr)) {
exprsAndVarIter.remove();
continue;
}
boolean typeMatch = true;
//Prune indexes based on field types
List<IAType> matchedTypes = new ArrayList<>();
//retrieve types of expressions joined/selected with an indexed field
for (int j = 0; j < optFuncExpr.getNumLogicalVars(); j++) {
if (j != exprAndVarIdx.second) {
matchedTypes.add(optFuncExpr.getFieldType(j));
}
}
if (matchedTypes.size() < 2 && optFuncExpr.getNumLogicalVars() == 1) {
matchedTypes.add((IAType) ExpressionTypeComputer.INSTANCE.getType(optFuncExpr.getConstantExpr(0), context.getMetadataProvider(), typeEnvironment));
}
//infer type of logicalExpr based on index keyType
matchedTypes.add((IAType) ExpressionTypeComputer.INSTANCE.getType(optFuncExpr.getLogicalExpr(exprAndVarIdx.second), null, new IVariableTypeEnvironment() {
@Override
public Object getVarType(LogicalVariable var) throws AlgebricksException {
if (var.equals(optFuncExpr.getSourceVar(exprAndVarIdx.second))) {
return keyType;
}
throw new IllegalArgumentException();
}
@Override
public Object getVarType(LogicalVariable var, List<LogicalVariable> nonNullVariables, List<List<LogicalVariable>> correlatedNullableVariableLists) throws AlgebricksException {
if (var.equals(optFuncExpr.getSourceVar(exprAndVarIdx.second))) {
return keyType;
}
throw new IllegalArgumentException();
}
@Override
public void setVarType(LogicalVariable var, Object type) {
throw new IllegalArgumentException();
}
@Override
public Object getType(ILogicalExpression expr) throws AlgebricksException {
return ExpressionTypeComputer.INSTANCE.getType(expr, null, this);
}
@Override
public boolean substituteProducedVariable(LogicalVariable v1, LogicalVariable v2) throws AlgebricksException {
throw new IllegalArgumentException();
}
}));
//for the case when jaccard similarity is measured between ordered & unordered lists
boolean jaccardSimilarity = optFuncExpr.getFuncExpr().getFunctionIdentifier().getName().startsWith("similarity-jaccard-check");
// Full-text search consideration: an (un)ordered list of string type can be compatible with string
// type. i.e. an (un)ordered list can be provided as arguments to a string type field index.
List<IAType> elementTypes = matchedTypes;
if (optFuncExpr.getFuncExpr().getFunctionIdentifier() == BuiltinFunctions.FULLTEXT_CONTAINS || optFuncExpr.getFuncExpr().getFunctionIdentifier() == BuiltinFunctions.FULLTEXT_CONTAINS_WO_OPTION) {
for (int j = 0; j < matchedTypes.size(); j++) {
if (matchedTypes.get(j).getTypeTag() == ATypeTag.ARRAY || matchedTypes.get(j).getTypeTag() == ATypeTag.MULTISET) {
elementTypes.set(j, ((AbstractCollectionType) matchedTypes.get(j)).getItemType());
}
}
}
for (int j = 0; j < matchedTypes.size(); j++) {
for (int k = j + 1; k < matchedTypes.size(); k++) {
typeMatch &= isMatched(elementTypes.get(j), elementTypes.get(k), jaccardSimilarity);
}
}
// Check if any field name in the optFuncExpr matches.
if (optFuncExpr.findFieldName(keyField) != -1) {
foundKeyField = typeMatch && optFuncExpr.getOperatorSubTree(exprAndVarIdx.second).hasDataSourceScan();
if (foundKeyField) {
matchedExpressions.add(exprAndVarIdx.first);
numMatchedKeys++;
if (lastFieldMatched == i - 1) {
lastFieldMatched = i;
}
break;
}
}
}
if (!foundKeyField) {
allUsed = false;
// if any expression was matched, remove the non-matched expressions, otherwise the index is unusable
if (lastFieldMatched >= 0) {
exprsAndVarIter = indexExprAndVarEntry.getValue().iterator();
while (exprsAndVarIter.hasNext()) {
if (!matchedExpressions.contains(exprsAndVarIter.next().first)) {
exprsAndVarIter.remove();
}
}
}
break;
}
}
// are not, remove this candidate.
if (!allUsed && accessMethod.matchAllIndexExprs()) {
indexExprAndVarIt.remove();
continue;
}
// A prefix of the index exprs may have been matched.
if (accessMethod.matchPrefixIndexExprs()) {
if (lastFieldMatched < 0) {
indexExprAndVarIt.remove();
continue;
}
}
analysisCtx.putNumberOfMatchedKeys(index, Integer.valueOf(numMatchedKeys));
}
}
use of org.apache.asterix.om.types.IAType in project asterixdb by apache.
the class TweetParser method writeRecord.
public void writeRecord(JsonNode obj, DataOutput out, ARecordType curRecType) throws IOException {
IAType[] curTypes = null;
String[] curFNames = null;
int fieldN;
int attrIdx;
int expectedFieldsCount = 0;
ArrayBackedValueStorage fieldValueBuffer = getTempBuffer();
ArrayBackedValueStorage fieldNameBuffer = getTempBuffer();
IARecordBuilder recBuilder = getRecordBuilder();
if (curRecType != null) {
curTypes = curRecType.getFieldTypes();
curFNames = curRecType.getFieldNames();
for (IAType curType : curTypes) {
if (!(curType instanceof AUnionType)) {
expectedFieldsCount++;
}
}
}
recBuilder.reset(curRecType);
recBuilder.init();
if (curRecType != null && !curRecType.isOpen()) {
// closed record type
fieldN = curFNames.length;
for (int iter1 = 0; iter1 < fieldN; iter1++) {
fieldValueBuffer.reset();
DataOutput fieldOutput = fieldValueBuffer.getDataOutput();
if (obj.get(curFNames[iter1]).isNull() && !(curTypes[iter1] instanceof AUnionType)) {
if (curRecType.isClosedField(curFNames[iter1])) {
throw new RuntimeDataException(ErrorCode.PARSER_TWEET_PARSER_CLOSED_FIELD_NULL, curFNames[iter1]);
} else {
continue;
}
} else {
if (writeField(obj.get(curFNames[iter1]), curTypes[iter1], fieldOutput)) {
recBuilder.addField(iter1, fieldValueBuffer);
}
}
}
} else {
//open record type
int closedFieldCount = 0;
IAType curFieldType = null;
String attrName;
Iterator<String> iter = obj.fieldNames();
while (iter.hasNext()) {
attrName = iter.next();
if (obj.get(attrName) == null || obj.get(attrName).isNull() || obj.size() == 0) {
continue;
}
attrIdx = checkAttrNameIdx(curFNames, attrName);
if (curRecType != null) {
curFieldType = curRecType.getFieldType(attrName);
}
fieldValueBuffer.reset();
fieldNameBuffer.reset();
DataOutput fieldOutput = fieldValueBuffer.getDataOutput();
if (writeField(obj.get(attrName), curFieldType, fieldOutput)) {
if (attrIdx == -1) {
aString.setValue(attrName);
stringSerde.serialize(aString, fieldNameBuffer.getDataOutput());
recBuilder.addField(fieldNameBuffer, fieldValueBuffer);
} else {
recBuilder.addField(attrIdx, fieldValueBuffer);
closedFieldCount++;
}
}
}
if (curRecType != null && closedFieldCount < expectedFieldsCount) {
throw new HyracksDataException("Non-null field is null");
}
}
recBuilder.write(out, true);
}
Aggregations