Search in sources :

Example 6 with IStorageManager

use of org.apache.hyracks.storage.common.IStorageManager in project asterixdb by apache.

the class ExternalIndexingOperations method buildAbortOp.

public static JobSpecification buildAbortOp(Dataset ds, List<Index> indexes, MetadataProvider metadataProvider) throws AlgebricksException {
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    IStorageManager storageMgr = metadataProvider.getStorageComponentProvider().getStorageManager();
    ArrayList<IIndexDataflowHelperFactory> treeDataflowHelperFactories = new ArrayList<>();
    AlgebricksPartitionConstraint constraints = null;
    for (Index index : indexes) {
        IFileSplitProvider indexSplitProvider;
        if (isValidIndexName(index.getDatasetName(), index.getIndexName())) {
            Pair<IFileSplitProvider, AlgebricksPartitionConstraint> sAndConstraints = metadataProvider.getSplitProviderAndConstraints(ds, index.getIndexName());
            indexSplitProvider = sAndConstraints.first;
            constraints = sAndConstraints.second;
        } else {
            indexSplitProvider = metadataProvider.getSplitProviderAndConstraints(ds, IndexingConstants.getFilesIndexName(ds.getDatasetName())).first;
        }
        IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storageMgr, indexSplitProvider);
        treeDataflowHelperFactories.add(indexDataflowHelperFactory);
    }
    ExternalDatasetIndexesAbortOperatorDescriptor op = new ExternalDatasetIndexesAbortOperatorDescriptor(spec, treeDataflowHelperFactories);
    spec.addRoot(op);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, op, constraints);
    spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    return spec;
}
Also used : IStorageManager(org.apache.hyracks.storage.common.IStorageManager) ExternalDatasetIndexesAbortOperatorDescriptor(org.apache.asterix.external.operators.ExternalDatasetIndexesAbortOperatorDescriptor) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) ArrayList(java.util.ArrayList) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) Index(org.apache.asterix.metadata.entities.Index) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)

Example 7 with IStorageManager

use of org.apache.hyracks.storage.common.IStorageManager in project asterixdb by apache.

the class SecondaryIndexBulkLoadExample method createJob.

private static JobSpecification createJob(Options options) {
    JobSpecification spec = new JobSpecification(options.frameSize);
    String[] splitNCs = options.ncs.split(",");
    IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
    // schema of tuples that we are retrieving from the primary index
    RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { // we will use this as payload in secondary index
    IntegerSerializerDeserializer.INSTANCE, // we will use this ask key in secondary index
    new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
    int primaryFieldCount = 4;
    ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
    primaryTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
    primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
    primaryTypeTraits[2] = IntegerPointable.TYPE_TRAITS;
    primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
    // comparators for sort fields and BTree fields
    IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[2];
    comparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
    comparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    // use a disk-order scan to read primary index
    IFileSplitProvider primarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.primaryBTreeName);
    IIndexDataflowHelperFactory primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
    TreeIndexDiskOrderScanOperatorDescriptor btreeScanOp = new TreeIndexDiskOrderScanOperatorDescriptor(spec, recDesc, primaryHelperFactory, NoOpOperationCallbackFactory.INSTANCE);
    JobHelper.createPartitionConstraint(spec, btreeScanOp, splitNCs);
    // sort the tuples as preparation for bulk load into secondary index
    // fields to sort on
    int[] sortFields = { 1, 0 };
    ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, options.sbSize, sortFields, comparatorFactories, recDesc);
    JobHelper.createPartitionConstraint(spec, sorter, splitNCs);
    // tuples to be put into B-Tree shall have 2 fields
    int secondaryFieldCount = 2;
    ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
    secondaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
    secondaryTypeTraits[1] = IntegerPointable.TYPE_TRAITS;
    // the B-Tree expects its keyfields to be at the front of its input
    // tuple
    int[] fieldPermutation = { 1, 0 };
    IFileSplitProvider btreeSplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.secondaryBTreeName);
    IIndexDataflowHelperFactory secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, btreeSplitProvider);
    TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, null, fieldPermutation, 0.7f, false, 1000L, true, secondaryHelperFactory);
    JobHelper.createPartitionConstraint(spec, btreeBulkLoad, splitNCs);
    NullSinkOperatorDescriptor nsOpDesc = new NullSinkOperatorDescriptor(spec);
    JobHelper.createPartitionConstraint(spec, nsOpDesc, splitNCs);
    // connect the ops
    spec.connect(new OneToOneConnectorDescriptor(spec), btreeScanOp, 0, sorter, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, btreeBulkLoad, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), btreeBulkLoad, 0, nsOpDesc, 0);
    spec.addRoot(nsOpDesc);
    return spec;
}
Also used : NullSinkOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.NullSinkOperatorDescriptor) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) TreeIndexDiskOrderScanOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexDiskOrderScanOperatorDescriptor) IStorageManager(org.apache.hyracks.storage.common.IStorageManager) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)

Example 8 with IStorageManager

use of org.apache.hyracks.storage.common.IStorageManager in project asterixdb by apache.

the class PrimaryIndexSearchExample method createJob.

private static JobSpecification createJob(Options options) throws HyracksDataException {
    JobSpecification spec = new JobSpecification(options.frameSize);
    String[] splitNCs = options.ncs.split(",");
    int fieldCount = 4;
    ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
    typeTraits[0] = IntegerPointable.TYPE_TRAITS;
    typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
    typeTraits[2] = IntegerPointable.TYPE_TRAITS;
    typeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
    // comparators for btree
    IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[1];
    comparatorFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    // create roviders for B-Tree
    IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
    // schema of tuples coming out of primary index
    RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
    // build tuple containing low and high search keys
    // high
    ArrayTupleBuilder tb = new ArrayTupleBuilder(comparatorFactories.length * 2);
    // key
    // and
    // low
    // key
    DataOutput dos = tb.getDataOutput();
    tb.reset();
    // low key
    IntegerSerializerDeserializer.INSTANCE.serialize(100, dos);
    tb.addFieldEndOffset();
    // build
    IntegerSerializerDeserializer.INSTANCE.serialize(200, dos);
    // high key
    tb.addFieldEndOffset();
    ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
    ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
    JobHelper.createPartitionConstraint(spec, keyProviderOp, splitNCs);
    // low key is in field 0 of tuples going
    int[] lowKeyFields = { 0 };
    // into search op
    // low key is in field 1 of tuples going
    int[] highKeyFields = { 1 };
    // into search op
    IFileSplitProvider btreeSplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.btreeName);
    IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(storageManager, btreeSplitProvider);
    BTreeSearchOperatorDescriptor btreeSearchOp = new BTreeSearchOperatorDescriptor(spec, recDesc, lowKeyFields, highKeyFields, true, true, dataflowHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    JobHelper.createPartitionConstraint(spec, btreeSearchOp, splitNCs);
    // have each node print the results of its respective B-Tree
    PrinterOperatorDescriptor printer = new PrinterOperatorDescriptor(spec);
    JobHelper.createPartitionConstraint(spec, printer, splitNCs);
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, btreeSearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), btreeSearchOp, 0, printer, 0);
    spec.addRoot(printer);
    return spec;
}
Also used : DataOutput(java.io.DataOutput) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) BTreeSearchOperatorDescriptor(org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IStorageManager(org.apache.hyracks.storage.common.IStorageManager) ConstantTupleSourceOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) PrinterOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.PrinterOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)

Example 9 with IStorageManager

use of org.apache.hyracks.storage.common.IStorageManager in project asterixdb by apache.

the class RTreeResourceFactoryProvider method getResourceFactory.

@Override
public IResourceFactory getResourceFactory(MetadataProvider mdProvider, Dataset dataset, Index index, ARecordType recordType, ARecordType metaType, ILSMMergePolicyFactory mergePolicyFactory, Map<String, String> mergePolicyProperties, ITypeTraits[] filterTypeTraits, IBinaryComparatorFactory[] filterCmpFactories) throws AlgebricksException {
    if (index.getKeyFieldNames().size() != 1) {
        throw new CompilationException(ErrorCode.COMPILATION_ILLEGAL_INDEX_NUM_OF_FIELD, index.getKeyFieldNames().size(), index.getIndexType(), 1);
    }
    IAType spatialType = Index.getNonNullableOpenFieldType(index.getKeyFieldTypes().get(0), index.getKeyFieldNames().get(0), recordType).first;
    if (spatialType == null) {
        throw new CompilationException(ErrorCode.COMPILATION_FIELD_NOT_FOUND, StringUtils.join(index.getKeyFieldNames().get(0), '.'));
    }
    List<List<String>> primaryKeyFields = dataset.getPrimaryKeys();
    int numPrimaryKeys = primaryKeyFields.size();
    ITypeTraits[] primaryTypeTraits = null;
    IBinaryComparatorFactory[] primaryComparatorFactories = null;
    IStorageComponentProvider storageComponentProvider = mdProvider.getStorageComponentProvider();
    if (dataset.getDatasetType() == DatasetType.INTERNAL) {
        primaryTypeTraits = new ITypeTraits[numPrimaryKeys + 1 + (dataset.hasMetaPart() ? 1 : 0)];
        primaryComparatorFactories = new IBinaryComparatorFactory[numPrimaryKeys];
        List<Integer> indicators = null;
        if (dataset.hasMetaPart()) {
            indicators = ((InternalDatasetDetails) dataset.getDatasetDetails()).getKeySourceIndicator();
        }
        for (int i = 0; i < numPrimaryKeys; i++) {
            IAType keyType = (indicators == null || indicators.get(i) == 0) ? recordType.getSubFieldType(primaryKeyFields.get(i)) : metaType.getSubFieldType(primaryKeyFields.get(i));
            primaryComparatorFactories[i] = storageComponentProvider.getComparatorFactoryProvider().getBinaryComparatorFactory(keyType, true);
            primaryTypeTraits[i] = storageComponentProvider.getTypeTraitProvider().getTypeTrait(keyType);
        }
        primaryTypeTraits[numPrimaryKeys] = storageComponentProvider.getTypeTraitProvider().getTypeTrait(recordType);
        if (dataset.hasMetaPart()) {
            primaryTypeTraits[numPrimaryKeys + 1] = storageComponentProvider.getTypeTraitProvider().getTypeTrait(recordType);
        }
    }
    boolean isPointMBR = spatialType.getTypeTag() == ATypeTag.POINT || spatialType.getTypeTag() == ATypeTag.POINT3D;
    int numDimensions = NonTaggedFormatUtil.getNumDimensions(spatialType.getTypeTag());
    int numNestedSecondaryKeyFields = numDimensions * 2;
    IBinaryComparatorFactory[] secondaryComparatorFactories = new IBinaryComparatorFactory[numNestedSecondaryKeyFields];
    IPrimitiveValueProviderFactory[] valueProviderFactories = new IPrimitiveValueProviderFactory[numNestedSecondaryKeyFields];
    ITypeTraits[] secondaryTypeTraits = new ITypeTraits[numNestedSecondaryKeyFields + numPrimaryKeys];
    IAType nestedKeyType = NonTaggedFormatUtil.getNestedSpatialType(spatialType.getTypeTag());
    ATypeTag keyType = nestedKeyType.getTypeTag();
    for (int i = 0; i < numNestedSecondaryKeyFields; i++) {
        secondaryComparatorFactories[i] = storageComponentProvider.getComparatorFactoryProvider().getBinaryComparatorFactory(nestedKeyType, true);
        secondaryTypeTraits[i] = storageComponentProvider.getTypeTraitProvider().getTypeTrait(nestedKeyType);
        valueProviderFactories[i] = storageComponentProvider.getPrimitiveValueProviderFactory();
    }
    for (int i = 0; i < numPrimaryKeys; i++) {
        secondaryTypeTraits[numNestedSecondaryKeyFields + i] = (dataset.getDatasetType() == DatasetType.INTERNAL) ? primaryTypeTraits[i] : IndexingConstants.getTypeTraits(i);
    }
    int[] rtreeFields = null;
    if (filterTypeTraits != null && filterTypeTraits.length > 0) {
        rtreeFields = new int[numNestedSecondaryKeyFields + numPrimaryKeys];
        for (int i = 0; i < rtreeFields.length; i++) {
            rtreeFields[i] = i;
        }
    }
    IStorageManager storageManager = storageComponentProvider.getStorageManager();
    ILSMOperationTrackerFactory opTrackerFactory = dataset.getIndexOperationTrackerFactory(index);
    ILSMIOOperationCallbackFactory ioOpCallbackFactory = dataset.getIoOperationCallbackFactory(index);
    IMetadataPageManagerFactory metadataPageManagerFactory = storageComponentProvider.getMetadataPageManagerFactory();
    ILSMIOOperationSchedulerProvider ioSchedulerProvider = storageComponentProvider.getIoOperationSchedulerProvider();
    boolean durable = !dataset.isTemp();
    ILinearizeComparatorFactory linearizeCmpFactory = MetadataProvider.proposeLinearizer(keyType, secondaryComparatorFactories.length);
    ITypeTraits[] typeTraits = getTypeTraits(mdProvider, dataset, index, recordType, metaType);
    IBinaryComparatorFactory[] rtreeCmpFactories = getCmpFactories(mdProvider, index, recordType, metaType);
    int[] secondaryFilterFields = (filterTypeTraits != null && filterTypeTraits.length > 0) ? new int[] { numNestedSecondaryKeyFields + numPrimaryKeys } : null;
    IBinaryComparatorFactory[] btreeCompFactories = dataset.getDatasetType() == DatasetType.EXTERNAL ? IndexingConstants.getBuddyBtreeComparatorFactories() : getComparatorFactoriesForDeletedKeyBTree(secondaryTypeTraits, primaryComparatorFactories, secondaryComparatorFactories);
    if (dataset.getDatasetType() == DatasetType.INTERNAL) {
        AsterixVirtualBufferCacheProvider vbcProvider = new AsterixVirtualBufferCacheProvider(dataset.getDatasetId());
        return new LSMRTreeWithAntiMatterLocalResourceFactory(storageManager, typeTraits, rtreeCmpFactories, filterTypeTraits, filterCmpFactories, secondaryFilterFields, opTrackerFactory, ioOpCallbackFactory, metadataPageManagerFactory, vbcProvider, ioSchedulerProvider, mergePolicyFactory, mergePolicyProperties, durable, valueProviderFactories, rTreePolicyType, linearizeCmpFactory, rtreeFields, isPointMBR, btreeCompFactories);
    } else {
        return new ExternalRTreeLocalResourceFactory(storageManager, typeTraits, rtreeCmpFactories, filterTypeTraits, filterCmpFactories, secondaryFilterFields, opTrackerFactory, ioOpCallbackFactory, metadataPageManagerFactory, ioSchedulerProvider, mergePolicyFactory, mergePolicyProperties, durable, btreeCompFactories, valueProviderFactories, rTreePolicyType, linearizeCmpFactory, rtreeFields, new int[] { numNestedSecondaryKeyFields }, isPointMBR, mdProvider.getStorageProperties().getBloomFilterFalsePositiveRate());
    }
}
Also used : AsterixVirtualBufferCacheProvider(org.apache.asterix.common.context.AsterixVirtualBufferCacheProvider) ILSMIOOperationSchedulerProvider(org.apache.hyracks.storage.am.lsm.common.api.ILSMIOOperationSchedulerProvider) List(java.util.List) CompilationException(org.apache.asterix.common.exceptions.CompilationException) ILSMIOOperationCallbackFactory(org.apache.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory) IStorageComponentProvider(org.apache.asterix.common.context.IStorageComponentProvider) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) IPrimitiveValueProviderFactory(org.apache.hyracks.storage.am.common.api.IPrimitiveValueProviderFactory) ILinearizeComparatorFactory(org.apache.hyracks.api.dataflow.value.ILinearizeComparatorFactory) IMetadataPageManagerFactory(org.apache.hyracks.storage.am.common.api.IMetadataPageManagerFactory) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) ILSMOperationTrackerFactory(org.apache.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerFactory) IStorageManager(org.apache.hyracks.storage.common.IStorageManager) ATypeTag(org.apache.asterix.om.types.ATypeTag) LSMRTreeWithAntiMatterLocalResourceFactory(org.apache.hyracks.storage.am.lsm.rtree.dataflow.LSMRTreeWithAntiMatterLocalResourceFactory) ExternalRTreeLocalResourceFactory(org.apache.hyracks.storage.am.lsm.rtree.dataflow.ExternalRTreeLocalResourceFactory) IAType(org.apache.asterix.om.types.IAType)

Example 10 with IStorageManager

use of org.apache.hyracks.storage.common.IStorageManager in project asterixdb by apache.

the class InvertedIndexResourceFactoryProvider method getResourceFactory.

@Override
public IResourceFactory getResourceFactory(MetadataProvider mdProvider, Dataset dataset, Index index, ARecordType recordType, ARecordType metaType, ILSMMergePolicyFactory mergePolicyFactory, Map<String, String> mergePolicyProperties, ITypeTraits[] filterTypeTraits, IBinaryComparatorFactory[] filterCmpFactories) throws AlgebricksException {
    // Get basic info
    List<List<String>> primaryKeys = dataset.getPrimaryKeys();
    List<List<String>> secondaryKeys = index.getKeyFieldNames();
    List<String> filterFieldName = DatasetUtil.getFilterField(dataset);
    int numPrimaryKeys = primaryKeys.size();
    int numSecondaryKeys = secondaryKeys.size();
    // Validate
    if (dataset.getDatasetType() != DatasetType.INTERNAL) {
        throw new CompilationException(ErrorCode.COMPILATION_INDEX_TYPE_NOT_SUPPORTED_FOR_DATASET_TYPE, index.getIndexType().name(), dataset.getDatasetType());
    }
    if (numPrimaryKeys > 1) {
        throw new AsterixException("Cannot create inverted index on dataset with composite primary key.");
    }
    if (numSecondaryKeys > 1) {
        throw new AsterixException("Cannot create composite inverted index on multiple fields.");
    }
    boolean isPartitioned = index.getIndexType() == IndexType.LENGTH_PARTITIONED_WORD_INVIX || index.getIndexType() == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX;
    int numTokenKeyPairFields = (!isPartitioned) ? 1 + numPrimaryKeys : 2 + numPrimaryKeys;
    int[] invertedIndexFields = null;
    int[] secondaryFilterFieldsForNonBulkLoadOps = null;
    int[] invertedIndexFieldsForNonBulkLoadOps = null;
    int[] secondaryFilterFields = null;
    if (filterFieldName != null) {
        invertedIndexFields = new int[numTokenKeyPairFields];
        for (int i = 0; i < invertedIndexFields.length; i++) {
            invertedIndexFields[i] = i;
        }
        secondaryFilterFieldsForNonBulkLoadOps = new int[filterFieldName.size()];
        secondaryFilterFieldsForNonBulkLoadOps[0] = numSecondaryKeys + numPrimaryKeys;
        invertedIndexFieldsForNonBulkLoadOps = new int[numSecondaryKeys + numPrimaryKeys];
        for (int i = 0; i < invertedIndexFieldsForNonBulkLoadOps.length; i++) {
            invertedIndexFieldsForNonBulkLoadOps[i] = i;
        }
        secondaryFilterFields = new int[filterFieldName.size()];
        secondaryFilterFields[0] = numTokenKeyPairFields - numPrimaryKeys + numPrimaryKeys;
    }
    IStorageComponentProvider storageComponentProvider = mdProvider.getStorageComponentProvider();
    IStorageManager storageManager = storageComponentProvider.getStorageManager();
    ILSMOperationTrackerFactory opTrackerFactory = dataset.getIndexOperationTrackerFactory(index);
    ILSMIOOperationCallbackFactory ioOpCallbackFactory = dataset.getIoOperationCallbackFactory(index);
    IMetadataPageManagerFactory metadataPageManagerFactory = storageComponentProvider.getMetadataPageManagerFactory();
    AsterixVirtualBufferCacheProvider vbcProvider = new AsterixVirtualBufferCacheProvider(dataset.getDatasetId());
    ILSMIOOperationSchedulerProvider ioSchedulerProvider = storageComponentProvider.getIoOperationSchedulerProvider();
    boolean durable = !dataset.isTemp();
    double bloomFilterFalsePositiveRate = mdProvider.getStorageProperties().getBloomFilterFalsePositiveRate();
    ITypeTraits[] typeTraits = getInvListTypeTraits(mdProvider, dataset, recordType, metaType);
    IBinaryComparatorFactory[] cmpFactories = getInvListComparatorFactories(mdProvider, dataset, recordType, metaType);
    ITypeTraits[] tokenTypeTraits = getTokenTypeTraits(dataset, index, recordType, metaType);
    IBinaryComparatorFactory[] tokenCmpFactories = getTokenComparatorFactories(dataset, index, recordType, metaType);
    IBinaryTokenizerFactory tokenizerFactory = getTokenizerFactory(dataset, index, recordType, metaType);
    return new LSMInvertedIndexLocalResourceFactory(storageManager, typeTraits, cmpFactories, filterTypeTraits, filterCmpFactories, secondaryFilterFields, opTrackerFactory, ioOpCallbackFactory, metadataPageManagerFactory, vbcProvider, ioSchedulerProvider, mergePolicyFactory, mergePolicyProperties, durable, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, isPartitioned, invertedIndexFields, secondaryFilterFieldsForNonBulkLoadOps, invertedIndexFieldsForNonBulkLoadOps, bloomFilterFalsePositiveRate);
}
Also used : CompilationException(org.apache.asterix.common.exceptions.CompilationException) ILSMIOOperationCallbackFactory(org.apache.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory) LSMInvertedIndexLocalResourceFactory(org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexLocalResourceFactory) IStorageComponentProvider(org.apache.asterix.common.context.IStorageComponentProvider) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) IMetadataPageManagerFactory(org.apache.hyracks.storage.am.common.api.IMetadataPageManagerFactory) AsterixVirtualBufferCacheProvider(org.apache.asterix.common.context.AsterixVirtualBufferCacheProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) ILSMOperationTrackerFactory(org.apache.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerFactory) IStorageManager(org.apache.hyracks.storage.common.IStorageManager) AsterixException(org.apache.asterix.common.exceptions.AsterixException) ILSMIOOperationSchedulerProvider(org.apache.hyracks.storage.am.lsm.common.api.ILSMIOOperationSchedulerProvider) IBinaryTokenizerFactory(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory) List(java.util.List)

Aggregations

IStorageManager (org.apache.hyracks.storage.common.IStorageManager)12 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)9 IIndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)9 IndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)9 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)8 ITypeTraits (org.apache.hyracks.api.dataflow.value.ITypeTraits)8 JobSpecification (org.apache.hyracks.api.job.JobSpecification)8 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)6 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)5 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)5 Index (org.apache.asterix.metadata.entities.Index)4 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)4 ArrayList (java.util.ArrayList)3 AsterixVirtualBufferCacheProvider (org.apache.asterix.common.context.AsterixVirtualBufferCacheProvider)3 IStorageComponentProvider (org.apache.asterix.common.context.IStorageComponentProvider)3 CompilationException (org.apache.asterix.common.exceptions.CompilationException)3 ConnectorPolicyAssignmentPolicy (org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy)3 NullSinkOperatorDescriptor (org.apache.hyracks.dataflow.std.misc.NullSinkOperatorDescriptor)3 BTreeSearchOperatorDescriptor (org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor)3 IMetadataPageManagerFactory (org.apache.hyracks.storage.am.common.api.IMetadataPageManagerFactory)3