Search in sources :

Example 66 with ITypeTraits

use of org.apache.hyracks.api.dataflow.value.ITypeTraits in project asterixdb by apache.

the class SecondaryBTreeOperationsHelper method setSecondaryRecDescAndComparators.

@Override
@SuppressWarnings("rawtypes")
protected void setSecondaryRecDescAndComparators() throws AlgebricksException {
    int numSecondaryKeys = index.getKeyFieldNames().size();
    secondaryFieldAccessEvalFactories = new IScalarEvaluatorFactory[numSecondaryKeys + numFilterFields];
    secondaryComparatorFactories = new IBinaryComparatorFactory[numSecondaryKeys + numPrimaryKeys];
    secondaryBloomFilterKeyFields = new int[numSecondaryKeys];
    ISerializerDeserializer[] secondaryRecFields = new ISerializerDeserializer[numPrimaryKeys + numSecondaryKeys + numFilterFields];
    ISerializerDeserializer[] enforcedRecFields = new ISerializerDeserializer[1 + numPrimaryKeys + (dataset.hasMetaPart() ? 1 : 0) + numFilterFields];
    ITypeTraits[] enforcedTypeTraits = new ITypeTraits[1 + numPrimaryKeys + (dataset.hasMetaPart() ? 1 : 0) + numFilterFields];
    secondaryTypeTraits = new ITypeTraits[numSecondaryKeys + numPrimaryKeys];
    ISerializerDeserializerProvider serdeProvider = metadataProvider.getFormat().getSerdeProvider();
    ITypeTraitProvider typeTraitProvider = metadataProvider.getFormat().getTypeTraitProvider();
    IBinaryComparatorFactoryProvider comparatorFactoryProvider = metadataProvider.getFormat().getBinaryComparatorFactoryProvider();
    // Record column is 0 for external datasets, numPrimaryKeys for internal ones
    int recordColumn = dataset.getDatasetType() == DatasetType.INTERNAL ? numPrimaryKeys : 0;
    boolean isEnforcingKeyTypes = index.isEnforcingKeyFileds();
    for (int i = 0; i < numSecondaryKeys; i++) {
        ARecordType sourceType;
        int sourceColumn;
        List<Integer> keySourceIndicators = index.getKeyFieldSourceIndicators();
        if (keySourceIndicators == null || keySourceIndicators.get(i) == 0) {
            sourceType = itemType;
            sourceColumn = recordColumn;
        } else {
            sourceType = metaType;
            sourceColumn = recordColumn + 1;
        }
        secondaryFieldAccessEvalFactories[i] = metadataProvider.getFormat().getFieldAccessEvaluatorFactory(isEnforcingKeyTypes ? enforcedItemType : sourceType, index.getKeyFieldNames().get(i), sourceColumn);
        Pair<IAType, Boolean> keyTypePair = Index.getNonNullableOpenFieldType(index.getKeyFieldTypes().get(i), index.getKeyFieldNames().get(i), sourceType);
        IAType keyType = keyTypePair.first;
        anySecondaryKeyIsNullable = anySecondaryKeyIsNullable || keyTypePair.second;
        ISerializerDeserializer keySerde = serdeProvider.getSerializerDeserializer(keyType);
        secondaryRecFields[i] = keySerde;
        secondaryComparatorFactories[i] = comparatorFactoryProvider.getBinaryComparatorFactory(keyType, true);
        secondaryTypeTraits[i] = typeTraitProvider.getTypeTrait(keyType);
        secondaryBloomFilterKeyFields[i] = i;
    }
    if (dataset.getDatasetType() == DatasetType.INTERNAL) {
        // Add serializers and comparators for primary index fields.
        for (int i = 0; i < numPrimaryKeys; i++) {
            secondaryRecFields[numSecondaryKeys + i] = primaryRecDesc.getFields()[i];
            enforcedRecFields[i] = primaryRecDesc.getFields()[i];
            secondaryTypeTraits[numSecondaryKeys + i] = primaryRecDesc.getTypeTraits()[i];
            enforcedTypeTraits[i] = primaryRecDesc.getTypeTraits()[i];
            secondaryComparatorFactories[numSecondaryKeys + i] = primaryComparatorFactories[i];
        }
    } else {
        // Add serializers and comparators for RID fields.
        for (int i = 0; i < numPrimaryKeys; i++) {
            secondaryRecFields[numSecondaryKeys + i] = IndexingConstants.getSerializerDeserializer(i);
            enforcedRecFields[i] = IndexingConstants.getSerializerDeserializer(i);
            secondaryTypeTraits[numSecondaryKeys + i] = IndexingConstants.getTypeTraits(i);
            enforcedTypeTraits[i] = IndexingConstants.getTypeTraits(i);
            secondaryComparatorFactories[numSecondaryKeys + i] = IndexingConstants.getComparatorFactory(i);
        }
    }
    enforcedRecFields[numPrimaryKeys] = serdeProvider.getSerializerDeserializer(itemType);
    enforcedTypeTraits[numPrimaryKeys] = typeTraitProvider.getTypeTrait(itemType);
    if (dataset.hasMetaPart()) {
        enforcedRecFields[numPrimaryKeys + 1] = serdeProvider.getSerializerDeserializer(metaType);
        enforcedTypeTraits[numPrimaryKeys + 1] = typeTraitProvider.getTypeTrait(metaType);
    }
    if (numFilterFields > 0) {
        secondaryFieldAccessEvalFactories[numSecondaryKeys] = metadataProvider.getFormat().getFieldAccessEvaluatorFactory(itemType, filterFieldName, numPrimaryKeys);
        Pair<IAType, Boolean> keyTypePair = Index.getNonNullableKeyFieldType(filterFieldName, itemType);
        IAType type = keyTypePair.first;
        ISerializerDeserializer serde = serdeProvider.getSerializerDeserializer(type);
        secondaryRecFields[numPrimaryKeys + numSecondaryKeys] = serde;
        enforcedRecFields[numPrimaryKeys + 1 + (dataset.hasMetaPart() ? 1 : 0)] = serde;
        enforcedTypeTraits[numPrimaryKeys + 1 + (dataset.hasMetaPart() ? 1 : 0)] = typeTraitProvider.getTypeTrait(type);
    }
    secondaryRecDesc = new RecordDescriptor(secondaryRecFields, secondaryTypeTraits);
    enforcedRecDesc = new RecordDescriptor(enforcedRecFields, enforcedTypeTraits);
}
Also used : ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IBinaryComparatorFactoryProvider(org.apache.hyracks.algebricks.data.IBinaryComparatorFactoryProvider) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) ITypeTraitProvider(org.apache.hyracks.algebricks.data.ITypeTraitProvider) ISerializerDeserializerProvider(org.apache.hyracks.algebricks.data.ISerializerDeserializerProvider) ARecordType(org.apache.asterix.om.types.ARecordType) IAType(org.apache.asterix.om.types.IAType)

Example 67 with ITypeTraits

use of org.apache.hyracks.api.dataflow.value.ITypeTraits in project asterixdb by apache.

the class InvertedIndexResourceFactoryProvider method getResourceFactory.

@Override
public IResourceFactory getResourceFactory(MetadataProvider mdProvider, Dataset dataset, Index index, ARecordType recordType, ARecordType metaType, ILSMMergePolicyFactory mergePolicyFactory, Map<String, String> mergePolicyProperties, ITypeTraits[] filterTypeTraits, IBinaryComparatorFactory[] filterCmpFactories) throws AlgebricksException {
    // Get basic info
    List<List<String>> primaryKeys = dataset.getPrimaryKeys();
    List<List<String>> secondaryKeys = index.getKeyFieldNames();
    List<String> filterFieldName = DatasetUtil.getFilterField(dataset);
    int numPrimaryKeys = primaryKeys.size();
    int numSecondaryKeys = secondaryKeys.size();
    // Validate
    if (dataset.getDatasetType() != DatasetType.INTERNAL) {
        throw new CompilationException(ErrorCode.COMPILATION_INDEX_TYPE_NOT_SUPPORTED_FOR_DATASET_TYPE, index.getIndexType().name(), dataset.getDatasetType());
    }
    if (numPrimaryKeys > 1) {
        throw new AsterixException("Cannot create inverted index on dataset with composite primary key.");
    }
    if (numSecondaryKeys > 1) {
        throw new AsterixException("Cannot create composite inverted index on multiple fields.");
    }
    boolean isPartitioned = index.getIndexType() == IndexType.LENGTH_PARTITIONED_WORD_INVIX || index.getIndexType() == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX;
    int numTokenKeyPairFields = (!isPartitioned) ? 1 + numPrimaryKeys : 2 + numPrimaryKeys;
    int[] invertedIndexFields = null;
    int[] secondaryFilterFieldsForNonBulkLoadOps = null;
    int[] invertedIndexFieldsForNonBulkLoadOps = null;
    int[] secondaryFilterFields = null;
    if (filterFieldName != null) {
        invertedIndexFields = new int[numTokenKeyPairFields];
        for (int i = 0; i < invertedIndexFields.length; i++) {
            invertedIndexFields[i] = i;
        }
        secondaryFilterFieldsForNonBulkLoadOps = new int[filterFieldName.size()];
        secondaryFilterFieldsForNonBulkLoadOps[0] = numSecondaryKeys + numPrimaryKeys;
        invertedIndexFieldsForNonBulkLoadOps = new int[numSecondaryKeys + numPrimaryKeys];
        for (int i = 0; i < invertedIndexFieldsForNonBulkLoadOps.length; i++) {
            invertedIndexFieldsForNonBulkLoadOps[i] = i;
        }
        secondaryFilterFields = new int[filterFieldName.size()];
        secondaryFilterFields[0] = numTokenKeyPairFields - numPrimaryKeys + numPrimaryKeys;
    }
    IStorageComponentProvider storageComponentProvider = mdProvider.getStorageComponentProvider();
    IStorageManager storageManager = storageComponentProvider.getStorageManager();
    ILSMOperationTrackerFactory opTrackerFactory = dataset.getIndexOperationTrackerFactory(index);
    ILSMIOOperationCallbackFactory ioOpCallbackFactory = dataset.getIoOperationCallbackFactory(index);
    IMetadataPageManagerFactory metadataPageManagerFactory = storageComponentProvider.getMetadataPageManagerFactory();
    AsterixVirtualBufferCacheProvider vbcProvider = new AsterixVirtualBufferCacheProvider(dataset.getDatasetId());
    ILSMIOOperationSchedulerProvider ioSchedulerProvider = storageComponentProvider.getIoOperationSchedulerProvider();
    boolean durable = !dataset.isTemp();
    double bloomFilterFalsePositiveRate = mdProvider.getStorageProperties().getBloomFilterFalsePositiveRate();
    ITypeTraits[] typeTraits = getInvListTypeTraits(mdProvider, dataset, recordType, metaType);
    IBinaryComparatorFactory[] cmpFactories = getInvListComparatorFactories(mdProvider, dataset, recordType, metaType);
    ITypeTraits[] tokenTypeTraits = getTokenTypeTraits(dataset, index, recordType, metaType);
    IBinaryComparatorFactory[] tokenCmpFactories = getTokenComparatorFactories(dataset, index, recordType, metaType);
    IBinaryTokenizerFactory tokenizerFactory = getTokenizerFactory(dataset, index, recordType, metaType);
    return new LSMInvertedIndexLocalResourceFactory(storageManager, typeTraits, cmpFactories, filterTypeTraits, filterCmpFactories, secondaryFilterFields, opTrackerFactory, ioOpCallbackFactory, metadataPageManagerFactory, vbcProvider, ioSchedulerProvider, mergePolicyFactory, mergePolicyProperties, durable, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, isPartitioned, invertedIndexFields, secondaryFilterFieldsForNonBulkLoadOps, invertedIndexFieldsForNonBulkLoadOps, bloomFilterFalsePositiveRate);
}
Also used : CompilationException(org.apache.asterix.common.exceptions.CompilationException) ILSMIOOperationCallbackFactory(org.apache.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory) LSMInvertedIndexLocalResourceFactory(org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexLocalResourceFactory) IStorageComponentProvider(org.apache.asterix.common.context.IStorageComponentProvider) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) IMetadataPageManagerFactory(org.apache.hyracks.storage.am.common.api.IMetadataPageManagerFactory) AsterixVirtualBufferCacheProvider(org.apache.asterix.common.context.AsterixVirtualBufferCacheProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) ILSMOperationTrackerFactory(org.apache.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerFactory) IStorageManager(org.apache.hyracks.storage.common.IStorageManager) AsterixException(org.apache.asterix.common.exceptions.AsterixException) ILSMIOOperationSchedulerProvider(org.apache.hyracks.storage.am.lsm.common.api.ILSMIOOperationSchedulerProvider) IBinaryTokenizerFactory(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory) List(java.util.List)

Aggregations

ITypeTraits (org.apache.hyracks.api.dataflow.value.ITypeTraits)67 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)45 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)34 Test (org.junit.Test)22 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)21 ArrayTupleReference (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference)18 ITreeIndex (org.apache.hyracks.storage.am.common.api.ITreeIndex)17 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)16 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)15 IIndexAccessor (org.apache.hyracks.storage.common.IIndexAccessor)15 IAType (org.apache.asterix.om.types.IAType)14 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)11 IPrimitiveValueProviderFactory (org.apache.hyracks.storage.am.common.api.IPrimitiveValueProviderFactory)10 List (java.util.List)9 CompilationException (org.apache.asterix.common.exceptions.CompilationException)9 ARecordType (org.apache.asterix.om.types.ARecordType)8 ITypeTraitProvider (org.apache.hyracks.algebricks.data.ITypeTraitProvider)8 IStorageManager (org.apache.hyracks.storage.common.IStorageManager)8 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)7 IIndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)7