Search in sources :

Example 11 with IStorageComponentProvider

use of org.apache.asterix.common.context.IStorageComponentProvider in project asterixdb by apache.

the class InvertedIndexResourceFactoryProvider method getResourceFactory.

@Override
public IResourceFactory getResourceFactory(MetadataProvider mdProvider, Dataset dataset, Index index, ARecordType recordType, ARecordType metaType, ILSMMergePolicyFactory mergePolicyFactory, Map<String, String> mergePolicyProperties, ITypeTraits[] filterTypeTraits, IBinaryComparatorFactory[] filterCmpFactories) throws AlgebricksException {
    // Get basic info
    List<List<String>> primaryKeys = dataset.getPrimaryKeys();
    List<List<String>> secondaryKeys = index.getKeyFieldNames();
    List<String> filterFieldName = DatasetUtil.getFilterField(dataset);
    int numPrimaryKeys = primaryKeys.size();
    int numSecondaryKeys = secondaryKeys.size();
    // Validate
    if (dataset.getDatasetType() != DatasetType.INTERNAL) {
        throw new CompilationException(ErrorCode.COMPILATION_INDEX_TYPE_NOT_SUPPORTED_FOR_DATASET_TYPE, index.getIndexType().name(), dataset.getDatasetType());
    }
    if (numPrimaryKeys > 1) {
        throw new AsterixException("Cannot create inverted index on dataset with composite primary key.");
    }
    if (numSecondaryKeys > 1) {
        throw new AsterixException("Cannot create composite inverted index on multiple fields.");
    }
    boolean isPartitioned = index.getIndexType() == IndexType.LENGTH_PARTITIONED_WORD_INVIX || index.getIndexType() == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX;
    int numTokenKeyPairFields = (!isPartitioned) ? 1 + numPrimaryKeys : 2 + numPrimaryKeys;
    int[] invertedIndexFields = null;
    int[] secondaryFilterFieldsForNonBulkLoadOps = null;
    int[] invertedIndexFieldsForNonBulkLoadOps = null;
    int[] secondaryFilterFields = null;
    if (filterFieldName != null) {
        invertedIndexFields = new int[numTokenKeyPairFields];
        for (int i = 0; i < invertedIndexFields.length; i++) {
            invertedIndexFields[i] = i;
        }
        secondaryFilterFieldsForNonBulkLoadOps = new int[filterFieldName.size()];
        secondaryFilterFieldsForNonBulkLoadOps[0] = numSecondaryKeys + numPrimaryKeys;
        invertedIndexFieldsForNonBulkLoadOps = new int[numSecondaryKeys + numPrimaryKeys];
        for (int i = 0; i < invertedIndexFieldsForNonBulkLoadOps.length; i++) {
            invertedIndexFieldsForNonBulkLoadOps[i] = i;
        }
        secondaryFilterFields = new int[filterFieldName.size()];
        secondaryFilterFields[0] = numTokenKeyPairFields - numPrimaryKeys + numPrimaryKeys;
    }
    IStorageComponentProvider storageComponentProvider = mdProvider.getStorageComponentProvider();
    IStorageManager storageManager = storageComponentProvider.getStorageManager();
    ILSMOperationTrackerFactory opTrackerFactory = dataset.getIndexOperationTrackerFactory(index);
    ILSMIOOperationCallbackFactory ioOpCallbackFactory = dataset.getIoOperationCallbackFactory(index);
    IMetadataPageManagerFactory metadataPageManagerFactory = storageComponentProvider.getMetadataPageManagerFactory();
    AsterixVirtualBufferCacheProvider vbcProvider = new AsterixVirtualBufferCacheProvider(dataset.getDatasetId());
    ILSMIOOperationSchedulerProvider ioSchedulerProvider = storageComponentProvider.getIoOperationSchedulerProvider();
    boolean durable = !dataset.isTemp();
    double bloomFilterFalsePositiveRate = mdProvider.getStorageProperties().getBloomFilterFalsePositiveRate();
    ITypeTraits[] typeTraits = getInvListTypeTraits(mdProvider, dataset, recordType, metaType);
    IBinaryComparatorFactory[] cmpFactories = getInvListComparatorFactories(mdProvider, dataset, recordType, metaType);
    ITypeTraits[] tokenTypeTraits = getTokenTypeTraits(dataset, index, recordType, metaType);
    IBinaryComparatorFactory[] tokenCmpFactories = getTokenComparatorFactories(dataset, index, recordType, metaType);
    IBinaryTokenizerFactory tokenizerFactory = getTokenizerFactory(dataset, index, recordType, metaType);
    return new LSMInvertedIndexLocalResourceFactory(storageManager, typeTraits, cmpFactories, filterTypeTraits, filterCmpFactories, secondaryFilterFields, opTrackerFactory, ioOpCallbackFactory, metadataPageManagerFactory, vbcProvider, ioSchedulerProvider, mergePolicyFactory, mergePolicyProperties, durable, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, isPartitioned, invertedIndexFields, secondaryFilterFieldsForNonBulkLoadOps, invertedIndexFieldsForNonBulkLoadOps, bloomFilterFalsePositiveRate);
}
Also used : CompilationException(org.apache.asterix.common.exceptions.CompilationException) ILSMIOOperationCallbackFactory(org.apache.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory) LSMInvertedIndexLocalResourceFactory(org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexLocalResourceFactory) IStorageComponentProvider(org.apache.asterix.common.context.IStorageComponentProvider) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) IMetadataPageManagerFactory(org.apache.hyracks.storage.am.common.api.IMetadataPageManagerFactory) AsterixVirtualBufferCacheProvider(org.apache.asterix.common.context.AsterixVirtualBufferCacheProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) ILSMOperationTrackerFactory(org.apache.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerFactory) IStorageManager(org.apache.hyracks.storage.common.IStorageManager) AsterixException(org.apache.asterix.common.exceptions.AsterixException) ILSMIOOperationSchedulerProvider(org.apache.hyracks.storage.am.lsm.common.api.ILSMIOOperationSchedulerProvider) IBinaryTokenizerFactory(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory) List(java.util.List)

Example 12 with IStorageComponentProvider

use of org.apache.asterix.common.context.IStorageComponentProvider in project asterixdb by apache.

the class ExternalIndexingOperations method buildFilesIndexCreateJobSpec.

public static JobSpecification buildFilesIndexCreateJobSpec(Dataset dataset, List<ExternalFile> externalFilesSnapshot, MetadataProvider metadataProvider) throws AlgebricksException {
    IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider();
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    Pair<ILSMMergePolicyFactory, Map<String, String>> compactionInfo = DatasetUtil.getMergePolicyFactory(dataset, metadataProvider.getMetadataTxnContext());
    ILSMMergePolicyFactory mergePolicyFactory = compactionInfo.first;
    Map<String, String> mergePolicyProperties = compactionInfo.second;
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, IndexingConstants.getFilesIndexName(dataset.getDatasetName()));
    IFileSplitProvider secondaryFileSplitProvider = secondarySplitsAndConstraint.first;
    String fileIndexName = IndexingConstants.getFilesIndexName(dataset.getDatasetName());
    Index fileIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataset.getDataverseName(), dataset.getDatasetName(), fileIndexName);
    ARecordType recordType = (ARecordType) metadataProvider.findType(dataset.getItemTypeDataverseName(), dataset.getItemTypeName());
    IResourceFactory resourceFactory = dataset.getResourceFactory(metadataProvider, fileIndex, recordType, null, mergePolicyFactory, mergePolicyProperties);
    IIndexBuilderFactory indexBuilderFactory = new IndexBuilderFactory(storageComponentProvider.getStorageManager(), secondaryFileSplitProvider, resourceFactory, !dataset.isTemp());
    IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(storageComponentProvider.getStorageManager(), secondaryFileSplitProvider);
    ExternalFilesIndexCreateOperatorDescriptor externalFilesOp = new ExternalFilesIndexCreateOperatorDescriptor(spec, indexBuilderFactory, dataflowHelperFactory, externalFilesSnapshot);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, externalFilesOp, secondarySplitsAndConstraint.second);
    spec.addRoot(externalFilesOp);
    spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    return spec;
}
Also used : IStorageComponentProvider(org.apache.asterix.common.context.IStorageComponentProvider) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IIndexBuilderFactory(org.apache.hyracks.storage.am.common.api.IIndexBuilderFactory) IndexBuilderFactory(org.apache.hyracks.storage.am.common.build.IndexBuilderFactory) Index(org.apache.asterix.metadata.entities.Index) ExternalFilesIndexCreateOperatorDescriptor(org.apache.asterix.external.operators.ExternalFilesIndexCreateOperatorDescriptor) IIndexBuilderFactory(org.apache.hyracks.storage.am.common.api.IIndexBuilderFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Map(java.util.Map) ARecordType(org.apache.asterix.om.types.ARecordType) IResourceFactory(org.apache.hyracks.storage.common.IResourceFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) ILSMMergePolicyFactory(org.apache.hyracks.storage.am.lsm.common.api.ILSMMergePolicyFactory)

Aggregations

IStorageComponentProvider (org.apache.asterix.common.context.IStorageComponentProvider)12 ITypeTraits (org.apache.hyracks.api.dataflow.value.ITypeTraits)6 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)5 List (java.util.List)4 AsterixVirtualBufferCacheProvider (org.apache.asterix.common.context.AsterixVirtualBufferCacheProvider)4 CompilationException (org.apache.asterix.common.exceptions.CompilationException)4 JobSpecification (org.apache.hyracks.api.job.JobSpecification)4 ILSMIOOperationCallbackFactory (org.apache.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory)4 ILSMOperationTrackerFactory (org.apache.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerFactory)4 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)3 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)3 ConnectorPolicyAssignmentPolicy (org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy)3 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)3 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)3 IMetadataPageManagerFactory (org.apache.hyracks.storage.am.common.api.IMetadataPageManagerFactory)3 IIndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)3 IndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)3 ILSMIOOperationSchedulerProvider (org.apache.hyracks.storage.am.lsm.common.api.ILSMIOOperationSchedulerProvider)3 IStorageManager (org.apache.hyracks.storage.common.IStorageManager)3 AsterixException (org.apache.asterix.common.exceptions.AsterixException)2