Search in sources :

Example 31 with IBinaryHashFunctionFactory

use of org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory in project asterixdb by apache.

the class HashPartitionExchangePOperator method createConnectorDescriptor.

@Override
public Pair<IConnectorDescriptor, TargetConstraint> createConnectorDescriptor(IConnectorDescriptorRegistry spec, ILogicalOperator op, IOperatorSchema opSchema, JobGenContext context) throws AlgebricksException {
    int[] keys = new int[hashFields.size()];
    IBinaryHashFunctionFactory[] hashFunctionFactories = new IBinaryHashFunctionFactory[hashFields.size()];
    int i = 0;
    IBinaryHashFunctionFactoryProvider hashFunProvider = context.getBinaryHashFunctionFactoryProvider();
    IVariableTypeEnvironment env = context.getTypeEnvironment(op);
    for (LogicalVariable v : hashFields) {
        keys[i] = opSchema.findVariable(v);
        hashFunctionFactories[i] = hashFunProvider.getBinaryHashFunctionFactory(env.getVarType(v));
        ++i;
    }
    ITuplePartitionComputerFactory tpcf = new FieldHashPartitionComputerFactory(keys, hashFunctionFactories);
    IConnectorDescriptor conn = new MToNPartitioningConnectorDescriptor(spec, tpcf);
    return new Pair<>(conn, null);
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) ITuplePartitionComputerFactory(org.apache.hyracks.api.dataflow.value.ITuplePartitionComputerFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IBinaryHashFunctionFactoryProvider(org.apache.hyracks.algebricks.data.IBinaryHashFunctionFactoryProvider) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) TargetConstraint(org.apache.hyracks.algebricks.core.algebra.base.IHyracksJobBuilder.TargetConstraint) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Example 32 with IBinaryHashFunctionFactory

use of org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory in project asterixdb by apache.

the class HashPartitionMergeExchangePOperator method createConnectorDescriptor.

@Override
public Pair<IConnectorDescriptor, TargetConstraint> createConnectorDescriptor(IConnectorDescriptorRegistry spec, ILogicalOperator op, IOperatorSchema opSchema, JobGenContext context) throws AlgebricksException {
    int[] keys = new int[partitionFields.size()];
    IBinaryHashFunctionFactory[] hashFunctionFactories = new IBinaryHashFunctionFactory[partitionFields.size()];
    IVariableTypeEnvironment env = context.getTypeEnvironment(op);
    {
        int i = 0;
        IBinaryHashFunctionFactoryProvider hashFunProvider = context.getBinaryHashFunctionFactoryProvider();
        for (LogicalVariable v : partitionFields) {
            keys[i] = opSchema.findVariable(v);
            hashFunctionFactories[i] = hashFunProvider.getBinaryHashFunctionFactory(env.getVarType(v));
            ++i;
        }
    }
    ITuplePartitionComputerFactory tpcf = new FieldHashPartitionComputerFactory(keys, hashFunctionFactories);
    int n = orderColumns.size();
    int[] sortFields = new int[n];
    IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[n];
    INormalizedKeyComputerFactoryProvider nkcfProvider = context.getNormalizedKeyComputerFactoryProvider();
    INormalizedKeyComputerFactory nkcf = null;
    int j = 0;
    for (OrderColumn oc : orderColumns) {
        LogicalVariable var = oc.getColumn();
        sortFields[j] = opSchema.findVariable(var);
        Object type = env.getVarType(var);
        IBinaryComparatorFactoryProvider bcfp = context.getBinaryComparatorFactoryProvider();
        comparatorFactories[j] = bcfp.getBinaryComparatorFactory(type, oc.getOrder() == OrderKind.ASC);
        if (j == 0 && nkcfProvider != null && type != null) {
            nkcf = nkcfProvider.getNormalizedKeyComputerFactory(type, oc.getOrder() == OrderKind.ASC);
        }
        j++;
    }
    IConnectorDescriptor conn = new MToNPartitioningMergingConnectorDescriptor(spec, tpcf, sortFields, comparatorFactories, nkcf);
    return new Pair<IConnectorDescriptor, TargetConstraint>(conn, null);
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) ITuplePartitionComputerFactory(org.apache.hyracks.api.dataflow.value.ITuplePartitionComputerFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IBinaryHashFunctionFactoryProvider(org.apache.hyracks.algebricks.data.IBinaryHashFunctionFactoryProvider) MToNPartitioningMergingConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor) OrderColumn(org.apache.hyracks.algebricks.core.algebra.properties.OrderColumn) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryComparatorFactoryProvider(org.apache.hyracks.algebricks.data.IBinaryComparatorFactoryProvider) TargetConstraint(org.apache.hyracks.algebricks.core.algebra.base.IHyracksJobBuilder.TargetConstraint) INormalizedKeyComputerFactory(org.apache.hyracks.api.dataflow.value.INormalizedKeyComputerFactory) INormalizedKeyComputerFactoryProvider(org.apache.hyracks.algebricks.data.INormalizedKeyComputerFactoryProvider) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Example 33 with IBinaryHashFunctionFactory

use of org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory in project asterixdb by apache.

the class InMemoryHashJoinPOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    int[] keysLeft = JobGenHelper.variablesToFieldIndexes(keysLeftBranch, inputSchemas[0]);
    int[] keysRight = JobGenHelper.variablesToFieldIndexes(keysRightBranch, inputSchemas[1]);
    IVariableTypeEnvironment env = context.getTypeEnvironment(op);
    IBinaryHashFunctionFactory[] hashFunFactories = JobGenHelper.variablesToBinaryHashFunctionFactories(keysLeftBranch, env, context);
    IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[keysLeft.length];
    int i = 0;
    IBinaryComparatorFactoryProvider bcfp = context.getBinaryComparatorFactoryProvider();
    for (LogicalVariable v : keysLeftBranch) {
        Object t = env.getVarType(v);
        comparatorFactories[i++] = bcfp.getBinaryComparatorFactory(t, true);
    }
    IPredicateEvaluatorFactoryProvider predEvaluatorFactoryProvider = context.getPredicateEvaluatorFactoryProvider();
    IPredicateEvaluatorFactory predEvaluatorFactory = (predEvaluatorFactoryProvider == null ? null : predEvaluatorFactoryProvider.getPredicateEvaluatorFactory(keysLeft, keysRight));
    RecordDescriptor recDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), propagatedSchema, context);
    IOperatorDescriptorRegistry spec = builder.getJobSpec();
    IOperatorDescriptor opDesc = null;
    switch(kind) {
        case INNER:
            {
                opDesc = new InMemoryHashJoinOperatorDescriptor(spec, keysLeft, keysRight, hashFunFactories, comparatorFactories, recDescriptor, tableSize, predEvaluatorFactory, memSizeInFrames);
                break;
            }
        case LEFT_OUTER:
            {
                IMissingWriterFactory[] nonMatchWriterFactories = new IMissingWriterFactory[inputSchemas[1].getSize()];
                for (int j = 0; j < nonMatchWriterFactories.length; j++) {
                    nonMatchWriterFactories[j] = context.getMissingWriterFactory();
                }
                opDesc = new InMemoryHashJoinOperatorDescriptor(spec, keysLeft, keysRight, hashFunFactories, comparatorFactories, predEvaluatorFactory, recDescriptor, true, nonMatchWriterFactories, tableSize, memSizeInFrames);
                break;
            }
        default:
            {
                throw new NotImplementedException();
            }
    }
    contributeOpDesc(builder, (AbstractLogicalOperator) op, opDesc);
    ILogicalOperator src1 = op.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src1, 0, op, 0);
    ILogicalOperator src2 = op.getInputs().get(1).getValue();
    builder.contributeGraphEdge(src2, 0, op, 1);
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) IPredicateEvaluatorFactoryProvider(org.apache.hyracks.api.dataflow.value.IPredicateEvaluatorFactoryProvider) IPredicateEvaluatorFactory(org.apache.hyracks.api.dataflow.value.IPredicateEvaluatorFactory) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) InMemoryHashJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.InMemoryHashJoinOperatorDescriptor) NotImplementedException(org.apache.hyracks.algebricks.common.exceptions.NotImplementedException) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) IOperatorDescriptorRegistry(org.apache.hyracks.api.job.IOperatorDescriptorRegistry) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) IBinaryComparatorFactoryProvider(org.apache.hyracks.algebricks.data.IBinaryComparatorFactoryProvider) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment)

Example 34 with IBinaryHashFunctionFactory

use of org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory in project asterixdb by apache.

the class Dataset method getPrimaryHashFunctionFactories.

/**
     * Gets the hash function factories for the primary key fields of this dataset.
     *
     * @param metadataProvider,
     *            the metadata provider.
     * @return the hash function factories for the primary key fields of this dataset.
     * @throws AlgebricksException
     */
public IBinaryHashFunctionFactory[] getPrimaryHashFunctionFactories(MetadataProvider metadataProvider) throws AlgebricksException {
    ARecordType recordType = (ARecordType) metadataProvider.findType(this);
    ARecordType metaType = (ARecordType) metadataProvider.findMetaType(this);
    List<List<String>> partitioningKeys = getPrimaryKeys();
    int numPrimaryKeys = partitioningKeys.size();
    IBinaryHashFunctionFactory[] hashFuncFactories = new IBinaryHashFunctionFactory[numPrimaryKeys];
    List<Integer> indicators = null;
    if (hasMetaPart()) {
        indicators = ((InternalDatasetDetails) getDatasetDetails()).getKeySourceIndicator();
    }
    for (int i = 0; i < numPrimaryKeys; i++) {
        IAType keyType = (indicators == null || indicators.get(i) == 0) ? recordType.getSubFieldType(partitioningKeys.get(i)) : metaType.getSubFieldType(partitioningKeys.get(i));
        hashFuncFactories[i] = BinaryHashFunctionFactoryProvider.INSTANCE.getBinaryHashFunctionFactory(keyType);
    }
    return hashFuncFactories;
}
Also used : List(java.util.List) ARecordType(org.apache.asterix.om.types.ARecordType) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) IAType(org.apache.asterix.om.types.IAType)

Example 35 with IBinaryHashFunctionFactory

use of org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory in project asterixdb by apache.

the class ScanPrintTest method scanPrint02.

@Test
public void scanPrint02() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders.tbl") };
    IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC2_ID);
    ResultSetId rsId = new ResultSetId(1);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    spec.addResultSetId(rsId);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID);
    IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn1, ordScanner, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Aggregations

IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)36 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)32 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)32 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)31 JobSpecification (org.apache.hyracks.api.job.JobSpecification)29 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)28 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)28 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)27 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)24 Test (org.junit.Test)23 MultiFieldsAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory)20 IFieldAggregateDescriptorFactory (org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory)19 IntSumFieldAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory)17 AbstractSingleActivityOperatorDescriptor (org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor)15 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)15 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)13 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)13 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)12 HashSpillableTableFactory (org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory)11 ExternalGroupOperatorDescriptor (org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor)11