Search in sources :

Example 1 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class InvertedIndexPOperator method buildInvertedIndexRuntime.

public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildInvertedIndexRuntime(MetadataProvider metadataProvider, JobGenContext context, JobSpecification jobSpec, AbstractUnnestMapOperator unnestMap, IOperatorSchema opSchema, boolean retainInput, boolean retainMissing, String datasetName, Dataset dataset, String indexName, ATypeTag searchKeyType, int[] keyFields, SearchModifierType searchModifierType, IAlgebricksConstantValue similarityThreshold, int[] minFilterFieldIndexes, int[] maxFilterFieldIndexes, boolean isFullTextSearchQuery) throws AlgebricksException {
    try {
        IAObject simThresh = ((AsterixConstantValue) similarityThreshold).getObject();
        int numPrimaryKeys = dataset.getPrimaryKeys().size();
        Index secondaryIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataset.getDataverseName(), dataset.getDatasetName(), indexName);
        if (secondaryIndex == null) {
            throw new AlgebricksException("Code generation error: no index " + indexName + " for dataset " + datasetName);
        }
        IVariableTypeEnvironment typeEnv = context.getTypeEnvironment(unnestMap);
        RecordDescriptor outputRecDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context);
        Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName);
        // TODO: Here we assume there is only one search key field.
        int queryField = keyFields[0];
        // Get tokenizer and search modifier factories.
        IInvertedIndexSearchModifierFactory searchModifierFactory = InvertedIndexAccessMethod.getSearchModifierFactory(searchModifierType, simThresh, secondaryIndex);
        IBinaryTokenizerFactory queryTokenizerFactory = InvertedIndexAccessMethod.getBinaryTokenizerFactory(searchModifierType, searchKeyType, secondaryIndex);
        IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), secondarySplitsAndConstraint.first);
        LSMInvertedIndexSearchOperatorDescriptor invIndexSearchOp = new LSMInvertedIndexSearchOperatorDescriptor(jobSpec, outputRecDesc, queryField, dataflowHelperFactory, queryTokenizerFactory, searchModifierFactory, retainInput, retainMissing, context.getMissingWriterFactory(), dataset.getSearchCallbackFactory(metadataProvider.getStorageComponentProvider(), secondaryIndex, ((JobEventListenerFactory) jobSpec.getJobletEventListenerFactory()).getJobId(), IndexOperation.SEARCH, null), minFilterFieldIndexes, maxFilterFieldIndexes, isFullTextSearchQuery, numPrimaryKeys, false);
        return new Pair<>(invIndexSearchOp, secondarySplitsAndConstraint.second);
    } catch (MetadataException e) {
        throw new AlgebricksException(e);
    }
}
Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IAObject(org.apache.asterix.om.base.IAObject) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) Index(org.apache.asterix.metadata.entities.Index) IDataSourceIndex(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex) IInvertedIndexSearchModifierFactory(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifierFactory) JobEventListenerFactory(org.apache.asterix.runtime.job.listener.JobEventListenerFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) MetadataException(org.apache.asterix.metadata.MetadataException) LSMInvertedIndexSearchOperatorDescriptor(org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorDescriptor) AsterixConstantValue(org.apache.asterix.om.constants.AsterixConstantValue) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IBinaryTokenizerFactory(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Example 2 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class CommitPOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    MetadataProvider metadataProvider = (MetadataProvider) context.getMetadataProvider();
    RecordDescriptor recDesc = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), propagatedSchema, context);
    int[] primaryKeyFields = JobGenHelper.variablesToFieldIndexes(primaryKeyLogicalVars, inputSchemas[0]);
    //get dataset splits
    IPushRuntimeFactory runtime = dataset.getCommitRuntimeFactory(metadataProvider, jobId, primaryKeyFields, isSink);
    builder.contributeMicroOperator(op, runtime, recDesc);
    ILogicalOperator src = op.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src, 0, op, 0);
}
Also used : MetadataProvider(org.apache.asterix.metadata.declared.MetadataProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) IPushRuntimeFactory(org.apache.hyracks.algebricks.runtime.base.IPushRuntimeFactory)

Example 3 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class FeedDataSource method buildDatasourceScanRuntime.

@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildDatasourceScanRuntime(MetadataProvider metadataProvider, IDataSource<DataSourceId> dataSource, List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables, boolean projectPushed, List<LogicalVariable> minFilterVars, List<LogicalVariable> maxFilterVars, IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context, JobSpecification jobSpec, Object implConfig) throws AlgebricksException {
    try {
        ARecordType feedOutputType = (ARecordType) itemType;
        ISerializerDeserializer payloadSerde = NonTaggedDataFormat.INSTANCE.getSerdeProvider().getSerializerDeserializer(feedOutputType);
        ArrayList<ISerializerDeserializer> serdes = new ArrayList<>();
        serdes.add(payloadSerde);
        if (metaItemType != null) {
            serdes.add(SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(metaItemType));
        }
        if (pkTypes != null) {
            for (IAType type : pkTypes) {
                serdes.add(SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(type));
            }
        }
        RecordDescriptor feedDesc = new RecordDescriptor(serdes.toArray(new ISerializerDeserializer[serdes.size()]));
        FeedPolicyEntity feedPolicy = (FeedPolicyEntity) getProperties().get(BuiltinFeedPolicies.CONFIG_FEED_POLICY_KEY);
        if (feedPolicy == null) {
            throw new AlgebricksException("Feed not configured with a policy");
        }
        feedPolicy.getProperties().put(BuiltinFeedPolicies.CONFIG_FEED_POLICY_KEY, feedPolicy.getPolicyName());
        FeedConnectionId feedConnectionId = new FeedConnectionId(getId().getDataverseName(), getId().getDatasourceName(), getTargetDataset());
        FeedCollectOperatorDescriptor feedCollector = new FeedCollectOperatorDescriptor(jobSpec, feedConnectionId, feedOutputType, feedDesc, feedPolicy.getProperties(), getLocation());
        return new Pair<>(feedCollector, new AlgebricksAbsolutePartitionConstraint(getLocations()));
    } catch (Exception e) {
        throw new AlgebricksException(e);
    }
}
Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ArrayList(java.util.ArrayList) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) FeedCollectOperatorDescriptor(org.apache.asterix.external.operators.FeedCollectOperatorDescriptor) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) FeedPolicyEntity(org.apache.asterix.metadata.entities.FeedPolicyEntity) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) FeedConnectionId(org.apache.asterix.external.feed.management.FeedConnectionId) ARecordType(org.apache.asterix.om.types.ARecordType) IAType(org.apache.asterix.om.types.IAType) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Example 4 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class LoadableDataSource method buildDatasourceScanRuntime.

@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildDatasourceScanRuntime(MetadataProvider metadataProvider, IDataSource<DataSourceId> dataSource, List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables, boolean projectPushed, List<LogicalVariable> minFilterVars, List<LogicalVariable> maxFilterVars, IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context, JobSpecification jobSpec, Object implConfig) throws AlgebricksException {
    LoadableDataSource alds = (LoadableDataSource) dataSource;
    ARecordType itemType = (ARecordType) alds.getLoadedType();
    IAdapterFactory adapterFactory = metadataProvider.getConfiguredAdapterFactory(alds.getTargetDataset(), alds.getAdapter(), alds.getAdapterProperties(), itemType, null);
    RecordDescriptor rDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context);
    return metadataProvider.buildLoadableDatasetScan(jobSpec, adapterFactory, rDesc);
}
Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IAdapterFactory(org.apache.asterix.external.api.IAdapterFactory) ARecordType(org.apache.asterix.om.types.ARecordType)

Example 5 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class MetadataProvider method buildExternalDataLookupRuntime.

public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildExternalDataLookupRuntime(JobSpecification jobSpec, Dataset dataset, int[] ridIndexes, boolean retainInput, IVariableTypeEnvironment typeEnv, IOperatorSchema opSchema, JobGenContext context, MetadataProvider metadataProvider, boolean retainMissing) throws AlgebricksException {
    try {
        // Get data type
        ARecordType itemType = (ARecordType) MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), dataset.getDataverseName(), dataset.getItemTypeName()).getDatatype();
        ExternalDatasetDetails datasetDetails = (ExternalDatasetDetails) dataset.getDatasetDetails();
        LookupAdapterFactory<?> adapterFactory = AdapterFactoryProvider.getLookupAdapterFactory(getApplicationContext().getServiceContext(), datasetDetails.getProperties(), itemType, ridIndexes, retainInput, retainMissing, context.getMissingWriterFactory());
        String fileIndexName = IndexingConstants.getFilesIndexName(dataset.getDatasetName());
        Pair<IFileSplitProvider, AlgebricksPartitionConstraint> spPc = metadataProvider.getSplitProviderAndConstraints(dataset, fileIndexName);
        Index fileIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), fileIndexName);
        // Create the file index data flow helper
        IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storaegComponentProvider.getStorageManager(), spPc.first);
        // Create the out record descriptor, appContext and fileSplitProvider for the files index
        RecordDescriptor outRecDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context);
        ISearchOperationCallbackFactory searchOpCallbackFactory = dataset.getSearchCallbackFactory(storaegComponentProvider, fileIndex, jobId, IndexOperation.SEARCH, null);
        // Create the operator
        ExternalLookupOperatorDescriptor op = new ExternalLookupOperatorDescriptor(jobSpec, adapterFactory, outRecDesc, indexDataflowHelperFactory, searchOpCallbackFactory, ExternalDatasetsRegistry.INSTANCE.getAndLockDatasetVersion(dataset, this));
        return new Pair<>(op, spPc.second);
    } catch (Exception e) {
        throw new AlgebricksException(e);
    }
}
Also used : IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ExternalLookupOperatorDescriptor(org.apache.asterix.external.operators.ExternalLookupOperatorDescriptor) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) Index(org.apache.asterix.metadata.entities.Index) IDataSourceIndex(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex) MetadataException(org.apache.asterix.metadata.MetadataException) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) CompilationException(org.apache.asterix.common.exceptions.CompilationException) IOException(java.io.IOException) AsterixException(org.apache.asterix.common.exceptions.AsterixException) ISearchOperationCallbackFactory(org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory) ExternalDatasetDetails(org.apache.asterix.metadata.entities.ExternalDatasetDetails) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) ARecordType(org.apache.asterix.om.types.ARecordType) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Aggregations

RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)169 JobSpecification (org.apache.hyracks.api.job.JobSpecification)90 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)74 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)72 Test (org.junit.Test)69 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)64 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)55 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)53 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)52 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)41 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)40 FileSplit (org.apache.hyracks.api.io.FileSplit)38 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)37 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)36 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)35 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)35 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)33 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)31 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)27 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)25