use of org.apache.asterix.common.config.DatasetConfig.IndexType in project asterixdb by apache.
the class IndexTupleTranslator method getMetadataEntityFromTuple.
@Override
public Index getMetadataEntityFromTuple(ITupleReference frameTuple) throws MetadataException, HyracksDataException {
byte[] serRecord = frameTuple.getFieldData(INDEX_PAYLOAD_TUPLE_FIELD_INDEX);
int recordStartOffset = frameTuple.getFieldStart(INDEX_PAYLOAD_TUPLE_FIELD_INDEX);
int recordLength = frameTuple.getFieldLength(INDEX_PAYLOAD_TUPLE_FIELD_INDEX);
ByteArrayInputStream stream = new ByteArrayInputStream(serRecord, recordStartOffset, recordLength);
DataInput in = new DataInputStream(stream);
ARecord rec = recordSerde.deserialize(in);
String dvName = ((AString) rec.getValueByPos(MetadataRecordTypes.INDEX_ARECORD_DATAVERSENAME_FIELD_INDEX)).getStringValue();
String dsName = ((AString) rec.getValueByPos(MetadataRecordTypes.INDEX_ARECORD_DATASETNAME_FIELD_INDEX)).getStringValue();
String indexName = ((AString) rec.getValueByPos(MetadataRecordTypes.INDEX_ARECORD_INDEXNAME_FIELD_INDEX)).getStringValue();
IndexType indexStructure = IndexType.valueOf(((AString) rec.getValueByPos(MetadataRecordTypes.INDEX_ARECORD_INDEXSTRUCTURE_FIELD_INDEX)).getStringValue());
IACursor fieldNameCursor = ((AOrderedList) rec.getValueByPos(MetadataRecordTypes.INDEX_ARECORD_SEARCHKEY_FIELD_INDEX)).getCursor();
List<List<String>> searchKey = new ArrayList<>();
AOrderedList fieldNameList;
while (fieldNameCursor.next()) {
fieldNameList = (AOrderedList) fieldNameCursor.get();
IACursor nestedFieldNameCursor = (fieldNameList.getCursor());
List<String> nestedFieldName = new ArrayList<>();
while (nestedFieldNameCursor.next()) {
nestedFieldName.add(((AString) nestedFieldNameCursor.get()).getStringValue());
}
searchKey.add(nestedFieldName);
}
int indexKeyTypeFieldPos = rec.getType().getFieldIndex(INDEX_SEARCHKEY_TYPE_FIELD_NAME);
IACursor fieldTypeCursor = new ACollectionCursor();
if (indexKeyTypeFieldPos > 0) {
fieldTypeCursor = ((AOrderedList) rec.getValueByPos(indexKeyTypeFieldPos)).getCursor();
}
List<IAType> searchKeyType = new ArrayList<>(searchKey.size());
while (fieldTypeCursor.next()) {
String typeName = ((AString) fieldTypeCursor.get()).getStringValue();
IAType fieldType = BuiltinTypeMap.getTypeFromTypeName(metadataNode, jobId, dvName, typeName, false);
searchKeyType.add(fieldType);
}
int isEnforcedFieldPos = rec.getType().getFieldIndex(INDEX_ISENFORCED_FIELD_NAME);
Boolean isEnforcingKeys = false;
if (isEnforcedFieldPos > 0) {
isEnforcingKeys = ((ABoolean) rec.getValueByPos(isEnforcedFieldPos)).getBoolean();
}
Boolean isPrimaryIndex = ((ABoolean) rec.getValueByPos(MetadataRecordTypes.INDEX_ARECORD_ISPRIMARY_FIELD_INDEX)).getBoolean();
int pendingOp = ((AInt32) rec.getValueByPos(MetadataRecordTypes.INDEX_ARECORD_PENDINGOP_FIELD_INDEX)).getIntegerValue();
// Check if there is a gram length as well.
int gramLength = -1;
int gramLenPos = rec.getType().getFieldIndex(GRAM_LENGTH_FIELD_NAME);
if (gramLenPos >= 0) {
gramLength = ((AInt32) rec.getValueByPos(gramLenPos)).getIntegerValue();
}
// Read a field-source-indicator field.
List<Integer> keyFieldSourceIndicator = new ArrayList<>();
int keyFieldSourceIndicatorIndex = rec.getType().getFieldIndex(INDEX_SEARCHKEY_SOURCE_INDICATOR_FIELD_NAME);
if (keyFieldSourceIndicatorIndex >= 0) {
IACursor cursor = ((AOrderedList) rec.getValueByPos(keyFieldSourceIndicatorIndex)).getCursor();
while (cursor.next()) {
keyFieldSourceIndicator.add((int) ((AInt8) cursor.get()).getByteValue());
}
} else {
for (int index = 0; index < searchKey.size(); ++index) {
keyFieldSourceIndicator.add(0);
}
}
// index key type information is not persisted, thus we extract type information from the record metadata
if (searchKeyType.isEmpty()) {
try {
Dataset dSet = metadataNode.getDataset(jobId, dvName, dsName);
String datatypeName = dSet.getItemTypeName();
String datatypeDataverseName = dSet.getItemTypeDataverseName();
ARecordType recordDt = (ARecordType) metadataNode.getDatatype(jobId, datatypeDataverseName, datatypeName).getDatatype();
String metatypeName = dSet.getMetaItemTypeName();
String metatypeDataverseName = dSet.getMetaItemTypeDataverseName();
ARecordType metaDt = null;
if (metatypeName != null && metatypeDataverseName != null) {
metaDt = (ARecordType) metadataNode.getDatatype(jobId, metatypeDataverseName, metatypeName).getDatatype();
}
try {
searchKeyType = KeyFieldTypeUtil.getKeyTypes(recordDt, metaDt, searchKey, keyFieldSourceIndicator);
} catch (AlgebricksException e) {
throw new MetadataException(e);
}
} catch (RemoteException re) {
throw HyracksDataException.create(re);
}
}
return new Index(dvName, dsName, indexName, indexStructure, searchKey, keyFieldSourceIndicator, searchKeyType, gramLength, isEnforcingKeys, isPrimaryIndex, pendingOp);
}
use of org.apache.asterix.common.config.DatasetConfig.IndexType in project asterixdb by apache.
the class InvertedIndexResourceFactoryProvider method getTokenizerFactory.
private static IBinaryTokenizerFactory getTokenizerFactory(Dataset dataset, Index index, ARecordType recordType, ARecordType metaType) throws AlgebricksException {
int numPrimaryKeys = dataset.getPrimaryKeys().size();
int numSecondaryKeys = index.getKeyFieldNames().size();
IndexType indexType = index.getIndexType();
// Sanity checks.
if (numPrimaryKeys > 1) {
throw new CompilationException(ErrorCode.COMPILATION_ILLEGAL_INDEX_FOR_DATASET_WITH_COMPOSITE_PRIMARY_INDEX, indexType, RecordUtil.toFullyQualifiedName(dataset.getDataverseName(), dataset.getDatasetName()));
}
if (numSecondaryKeys > 1) {
throw new CompilationException(ErrorCode.COMPILATION_ILLEGAL_INDEX_NUM_OF_FIELD, numSecondaryKeys, indexType, 1);
}
ARecordType sourceType;
List<Integer> keySourceIndicators = index.getKeyFieldSourceIndicators();
if (keySourceIndicators == null || keySourceIndicators.get(0) == 0) {
sourceType = recordType;
} else {
sourceType = metaType;
}
Pair<IAType, Boolean> keyTypePair = Index.getNonNullableOpenFieldType(index.getKeyFieldTypes().get(0), index.getKeyFieldNames().get(0), sourceType);
IAType secondaryKeyType = keyTypePair.first;
// and add the choice to the index metadata.
return NonTaggedFormatUtil.getBinaryTokenizerFactory(secondaryKeyType.getTypeTag(), indexType, index.getGramLength());
}
use of org.apache.asterix.common.config.DatasetConfig.IndexType in project asterixdb by apache.
the class InvertedIndexResourceFactoryProvider method getTokenTypeTraits.
private static ITypeTraits[] getTokenTypeTraits(Dataset dataset, Index index, ARecordType recordType, ARecordType metaType) throws AlgebricksException {
int numPrimaryKeys = dataset.getPrimaryKeys().size();
int numSecondaryKeys = index.getKeyFieldNames().size();
IndexType indexType = index.getIndexType();
// Sanity checks.
if (numPrimaryKeys > 1) {
throw new CompilationException(ErrorCode.COMPILATION_ILLEGAL_INDEX_FOR_DATASET_WITH_COMPOSITE_PRIMARY_INDEX, indexType, RecordUtil.toFullyQualifiedName(dataset.getDataverseName(), dataset.getDatasetName()));
}
if (numSecondaryKeys > 1) {
throw new CompilationException(ErrorCode.COMPILATION_ILLEGAL_INDEX_NUM_OF_FIELD, numSecondaryKeys, indexType, 1);
}
boolean isPartitioned = indexType == IndexType.LENGTH_PARTITIONED_WORD_INVIX || indexType == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX;
ARecordType sourceType;
List<Integer> keySourceIndicators = index.getKeyFieldSourceIndicators();
if (keySourceIndicators == null || keySourceIndicators.get(0) == 0) {
sourceType = recordType;
} else {
sourceType = metaType;
}
Pair<IAType, Boolean> keyTypePair = Index.getNonNullableOpenFieldType(index.getKeyFieldTypes().get(0), index.getKeyFieldNames().get(0), sourceType);
IAType secondaryKeyType = keyTypePair.first;
int numTokenFields = (!isPartitioned) ? numSecondaryKeys : numSecondaryKeys + 1;
ITypeTraits[] tokenTypeTraits = new ITypeTraits[numTokenFields];
tokenTypeTraits[0] = NonTaggedFormatUtil.getTokenTypeTrait(secondaryKeyType);
if (isPartitioned) {
// The partitioning field is hardcoded to be a short *without* an Asterix type tag.
tokenTypeTraits[1] = ShortPointable.TYPE_TRAITS;
}
return tokenTypeTraits;
}
use of org.apache.asterix.common.config.DatasetConfig.IndexType in project asterixdb by apache.
the class SetAsterixPhysicalOperatorsRule method computeDefaultPhysicalOp.
private static void computeDefaultPhysicalOp(AbstractLogicalOperator op, IOptimizationContext context) throws AlgebricksException {
PhysicalOptimizationConfig physicalOptimizationConfig = context.getPhysicalOptimizationConfig();
if (op.getOperatorTag().equals(LogicalOperatorTag.GROUP)) {
GroupByOperator gby = (GroupByOperator) op;
if (gby.getNestedPlans().size() == 1) {
ILogicalPlan p0 = gby.getNestedPlans().get(0);
if (p0.getRoots().size() == 1) {
Mutable<ILogicalOperator> r0 = p0.getRoots().get(0);
if (((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().equals(LogicalOperatorTag.AGGREGATE)) {
AggregateOperator aggOp = (AggregateOperator) r0.getValue();
boolean serializable = true;
for (Mutable<ILogicalExpression> exprRef : aggOp.getExpressions()) {
AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) exprRef.getValue();
if (!BuiltinFunctions.isAggregateFunctionSerializable(expr.getFunctionIdentifier())) {
serializable = false;
break;
}
}
if ((gby.getAnnotations().get(OperatorAnnotations.USE_HASH_GROUP_BY) == Boolean.TRUE || gby.getAnnotations().get(OperatorAnnotations.USE_EXTERNAL_GROUP_BY) == Boolean.TRUE)) {
boolean setToExternalGby = false;
if (serializable) {
// if serializable, use external group-by
// now check whether the serialized version aggregation function has corresponding intermediate agg
boolean hasIntermediateAgg = true;
IMergeAggregationExpressionFactory mergeAggregationExpressionFactory = context.getMergeAggregationExpressionFactory();
List<LogicalVariable> originalVariables = aggOp.getVariables();
List<Mutable<ILogicalExpression>> aggExprs = aggOp.getExpressions();
int aggNum = aggExprs.size();
for (int i = 0; i < aggNum; i++) {
AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) aggExprs.get(i).getValue();
AggregateFunctionCallExpression serialAggExpr = BuiltinFunctions.makeSerializableAggregateFunctionExpression(expr.getFunctionIdentifier(), expr.getArguments());
if (mergeAggregationExpressionFactory.createMergeAggregation(originalVariables.get(i), serialAggExpr, context) == null) {
hasIntermediateAgg = false;
break;
}
}
// Check whether there are multiple aggregates in the sub plan.
// Currently, we don't support multiple aggregates in one external group-by.
boolean multipleAggOpsFound = false;
ILogicalOperator r1Logical = aggOp;
while (r1Logical.hasInputs()) {
r1Logical = r1Logical.getInputs().get(0).getValue();
if (r1Logical.getOperatorTag() == LogicalOperatorTag.AGGREGATE) {
multipleAggOpsFound = true;
break;
}
}
if (hasIntermediateAgg && !multipleAggOpsFound) {
for (int i = 0; i < aggNum; i++) {
AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) aggExprs.get(i).getValue();
AggregateFunctionCallExpression serialAggExpr = BuiltinFunctions.makeSerializableAggregateFunctionExpression(expr.getFunctionIdentifier(), expr.getArguments());
aggOp.getExpressions().get(i).setValue(serialAggExpr);
}
ExternalGroupByPOperator externalGby = new ExternalGroupByPOperator(gby.getGroupByList(), physicalOptimizationConfig.getMaxFramesExternalGroupBy(), (long) physicalOptimizationConfig.getMaxFramesExternalGroupBy() * physicalOptimizationConfig.getFrameSize());
generateMergeAggregationExpressions(gby, context);
op.setPhysicalOperator(externalGby);
setToExternalGby = true;
}
}
if (!setToExternalGby) {
// if not serializable or no intermediate agg, use pre-clustered group-by
List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> gbyList = gby.getGroupByList();
List<LogicalVariable> columnList = new ArrayList<LogicalVariable>(gbyList.size());
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gbyList) {
ILogicalExpression expr = p.second.getValue();
if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
columnList.add(varRef.getVariableReference());
}
}
op.setPhysicalOperator(new PreclusteredGroupByPOperator(columnList, gby.isGroupAll()));
}
}
} else if (((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().equals(LogicalOperatorTag.RUNNINGAGGREGATE)) {
List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> gbyList = gby.getGroupByList();
List<LogicalVariable> columnList = new ArrayList<LogicalVariable>(gbyList.size());
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gbyList) {
ILogicalExpression expr = p.second.getValue();
if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
columnList.add(varRef.getVariableReference());
}
}
op.setPhysicalOperator(new PreclusteredGroupByPOperator(columnList, gby.isGroupAll()));
} else {
throw new AlgebricksException("Unsupported nested operator within a group-by: " + ((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().name());
}
}
}
}
if (op.getPhysicalOperator() == null) {
switch(op.getOperatorTag()) {
case INNERJOIN:
{
JoinUtils.setJoinAlgorithmAndExchangeAlgo((InnerJoinOperator) op, context);
break;
}
case LEFTOUTERJOIN:
{
JoinUtils.setJoinAlgorithmAndExchangeAlgo((LeftOuterJoinOperator) op, context);
break;
}
case UNNEST_MAP:
case LEFT_OUTER_UNNEST_MAP:
{
ILogicalExpression unnestExpr = null;
unnestExpr = ((AbstractUnnestMapOperator) op).getExpressionRef().getValue();
if (unnestExpr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
AbstractFunctionCallExpression f = (AbstractFunctionCallExpression) unnestExpr;
FunctionIdentifier fid = f.getFunctionIdentifier();
if (!fid.equals(BuiltinFunctions.INDEX_SEARCH)) {
throw new IllegalStateException();
}
AccessMethodJobGenParams jobGenParams = new AccessMethodJobGenParams();
jobGenParams.readFromFuncArgs(f.getArguments());
MetadataProvider mp = (MetadataProvider) context.getMetadataProvider();
DataSourceId dataSourceId = new DataSourceId(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
Dataset dataset = mp.findDataset(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
IDataSourceIndex<String, DataSourceId> dsi = mp.findDataSourceIndex(jobGenParams.getIndexName(), dataSourceId);
INodeDomain storageDomain = mp.findNodeDomain(dataset.getNodeGroupName());
if (dsi == null) {
throw new AlgebricksException("Could not find index " + jobGenParams.getIndexName() + " for dataset " + dataSourceId);
}
IndexType indexType = jobGenParams.getIndexType();
boolean requiresBroadcast = jobGenParams.getRequiresBroadcast();
switch(indexType) {
case BTREE:
{
BTreeJobGenParams btreeJobGenParams = new BTreeJobGenParams();
btreeJobGenParams.readFromFuncArgs(f.getArguments());
op.setPhysicalOperator(new BTreeSearchPOperator(dsi, storageDomain, requiresBroadcast, btreeJobGenParams.isPrimaryIndex(), btreeJobGenParams.isEqCondition(), btreeJobGenParams.getLowKeyVarList(), btreeJobGenParams.getHighKeyVarList()));
break;
}
case RTREE:
{
op.setPhysicalOperator(new RTreeSearchPOperator(dsi, storageDomain, requiresBroadcast));
break;
}
case SINGLE_PARTITION_WORD_INVIX:
case SINGLE_PARTITION_NGRAM_INVIX:
{
op.setPhysicalOperator(new InvertedIndexPOperator(dsi, storageDomain, requiresBroadcast, false));
break;
}
case LENGTH_PARTITIONED_WORD_INVIX:
case LENGTH_PARTITIONED_NGRAM_INVIX:
{
op.setPhysicalOperator(new InvertedIndexPOperator(dsi, storageDomain, requiresBroadcast, true));
break;
}
default:
{
throw new NotImplementedException(indexType + " indexes are not implemented.");
}
}
}
break;
}
}
}
if (op.hasNestedPlans()) {
AbstractOperatorWithNestedPlans nested = (AbstractOperatorWithNestedPlans) op;
for (ILogicalPlan p : nested.getNestedPlans()) {
setPhysicalOperators(p, context);
}
}
for (Mutable<ILogicalOperator> opRef : op.getInputs()) {
computeDefaultPhysicalOp((AbstractLogicalOperator) opRef.getValue(), context);
}
}
use of org.apache.asterix.common.config.DatasetConfig.IndexType in project asterixdb by apache.
the class AbstractIntroduceAccessMethodRule method chooseAllIndexes.
/**
* Choose all indexes that match the given access method. These indexes will be used as index-search
* to replace the given predicates in a SELECT operator. Also, if there are multiple same type of indexes
* on the same field, only of them will be chosen. Allowed cases (AccessMethod, IndexType) are:
* [BTreeAccessMethod , IndexType.BTREE], [RTreeAccessMethod , IndexType.RTREE],
* [InvertedIndexAccessMethod, IndexType.SINGLE_PARTITION_WORD_INVIX || SINGLE_PARTITION_NGRAM_INVIX ||
* LENGTH_PARTITIONED_WORD_INVIX || LENGTH_PARTITIONED_NGRAM_INVIX]
*/
protected List<Pair<IAccessMethod, Index>> chooseAllIndexes(Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs) {
List<Pair<IAccessMethod, Index>> result = new ArrayList<>();
// Use variables (fields) to the index types map to check which type of indexes are applied for the vars.
Map<List<Pair<Integer, Integer>>, List<IndexType>> resultVarsToIndexTypesMap = new HashMap<>();
Iterator<Map.Entry<IAccessMethod, AccessMethodAnalysisContext>> amIt = analyzedAMs.entrySet().iterator();
while (amIt.hasNext()) {
Map.Entry<IAccessMethod, AccessMethodAnalysisContext> amEntry = amIt.next();
AccessMethodAnalysisContext analysisCtx = amEntry.getValue();
Iterator<Map.Entry<Index, List<Pair<Integer, Integer>>>> indexIt = analysisCtx.getIteratorForIndexExprsAndVars();
while (indexIt.hasNext()) {
Map.Entry<Index, List<Pair<Integer, Integer>>> indexEntry = indexIt.next();
IAccessMethod chosenAccessMethod = amEntry.getKey();
Index chosenIndex = indexEntry.getKey();
IndexType indexType = chosenIndex.getIndexType();
boolean isKeywordOrNgramIndexChosen = indexType == IndexType.LENGTH_PARTITIONED_WORD_INVIX || indexType == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX || indexType == IndexType.SINGLE_PARTITION_WORD_INVIX || indexType == IndexType.SINGLE_PARTITION_NGRAM_INVIX;
if ((chosenAccessMethod == BTreeAccessMethod.INSTANCE && indexType == IndexType.BTREE) || (chosenAccessMethod == RTreeAccessMethod.INSTANCE && indexType == IndexType.RTREE) || (chosenAccessMethod == InvertedIndexAccessMethod.INSTANCE && isKeywordOrNgramIndexChosen)) {
if (resultVarsToIndexTypesMap.containsKey(indexEntry.getValue())) {
List<IndexType> appliedIndexTypes = resultVarsToIndexTypesMap.get(indexEntry.getValue());
if (!appliedIndexTypes.contains(indexType)) {
appliedIndexTypes.add(indexType);
result.add(new Pair<>(chosenAccessMethod, chosenIndex));
}
} else {
List<IndexType> addedIndexTypes = new ArrayList<>();
addedIndexTypes.add(indexType);
resultVarsToIndexTypesMap.put(indexEntry.getValue(), addedIndexTypes);
result.add(new Pair<>(chosenAccessMethod, chosenIndex));
}
}
}
}
return result;
}
Aggregations