Search in sources :

Example 6 with IBinaryHashFunctionFamily

use of org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily in project asterixdb by apache.

the class ExternalGroupByPOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    List<LogicalVariable> gbyCols = getGbyColumns();
    int[] keys = JobGenHelper.variablesToFieldIndexes(gbyCols, inputSchemas[0]);
    GroupByOperator gby = (GroupByOperator) op;
    int numFds = gby.getDecorList().size();
    int[] fdColumns = new int[numFds];
    int j = 0;
    for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getDecorList()) {
        ILogicalExpression expr = p.second.getValue();
        if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
            throw new AlgebricksException("pre-sorted group-by expects variable references.");
        }
        VariableReferenceExpression v = (VariableReferenceExpression) expr;
        LogicalVariable decor = v.getVariableReference();
        fdColumns[j++] = inputSchemas[0].findVariable(decor);
    }
    if (gby.getNestedPlans().size() != 1) {
        throw new AlgebricksException("External group-by currently works only for one nested plan with one root containing" + "an aggregate and a nested-tuple-source.");
    }
    ILogicalPlan p0 = gby.getNestedPlans().get(0);
    if (p0.getRoots().size() != 1) {
        throw new AlgebricksException("External group-by currently works only for one nested plan with one root containing" + "an aggregate and a nested-tuple-source.");
    }
    Mutable<ILogicalOperator> r0 = p0.getRoots().get(0);
    AggregateOperator aggOp = (AggregateOperator) r0.getValue();
    IPartialAggregationTypeComputer partialAggregationTypeComputer = context.getPartialAggregationTypeComputer();
    List<Object> intermediateTypes = new ArrayList<Object>();
    int n = aggOp.getExpressions().size();
    ISerializedAggregateEvaluatorFactory[] aff = new ISerializedAggregateEvaluatorFactory[n];
    int i = 0;
    IExpressionRuntimeProvider expressionRuntimeProvider = context.getExpressionRuntimeProvider();
    IVariableTypeEnvironment aggOpInputEnv = context.getTypeEnvironment(aggOp.getInputs().get(0).getValue());
    IVariableTypeEnvironment outputEnv = context.getTypeEnvironment(op);
    for (Mutable<ILogicalExpression> exprRef : aggOp.getExpressions()) {
        AggregateFunctionCallExpression aggFun = (AggregateFunctionCallExpression) exprRef.getValue();
        aff[i++] = expressionRuntimeProvider.createSerializableAggregateFunctionFactory(aggFun, aggOpInputEnv, inputSchemas, context);
        intermediateTypes.add(partialAggregationTypeComputer.getType(aggFun, aggOpInputEnv, context.getMetadataProvider()));
    }
    int[] keyAndDecFields = new int[keys.length + fdColumns.length];
    for (i = 0; i < keys.length; ++i) {
        keyAndDecFields[i] = keys[i];
    }
    for (i = 0; i < fdColumns.length; i++) {
        keyAndDecFields[keys.length + i] = fdColumns[i];
    }
    List<LogicalVariable> keyAndDecVariables = new ArrayList<LogicalVariable>();
    for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getGroupByList()) {
        keyAndDecVariables.add(p.first);
    }
    for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getDecorList()) {
        keyAndDecVariables.add(GroupByOperator.getDecorVariable(p));
    }
    for (LogicalVariable var : keyAndDecVariables) {
        aggOpInputEnv.setVarType(var, outputEnv.getVarType(var));
    }
    compileSubplans(inputSchemas[0], gby, opSchema, context);
    IOperatorDescriptorRegistry spec = builder.getJobSpec();
    IBinaryComparatorFactory[] comparatorFactories = JobGenHelper.variablesToAscBinaryComparatorFactories(gbyCols, aggOpInputEnv, context);
    RecordDescriptor recordDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), opSchema, context);
    IBinaryHashFunctionFamily[] hashFunctionFactories = JobGenHelper.variablesToBinaryHashFunctionFamilies(gbyCols, aggOpInputEnv, context);
    ISerializedAggregateEvaluatorFactory[] merges = new ISerializedAggregateEvaluatorFactory[n];
    List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
    IOperatorSchema[] localInputSchemas = new IOperatorSchema[1];
    localInputSchemas[0] = new OperatorSchemaImpl();
    for (i = 0; i < n; i++) {
        AggregateFunctionCallExpression aggFun = (AggregateFunctionCallExpression) aggOp.getMergeExpressions().get(i).getValue();
        aggFun.getUsedVariables(usedVars);
    }
    i = 0;
    for (Object type : intermediateTypes) {
        aggOpInputEnv.setVarType(usedVars.get(i++), type);
    }
    for (LogicalVariable keyVar : keyAndDecVariables) {
        localInputSchemas[0].addVariable(keyVar);
    }
    for (LogicalVariable usedVar : usedVars) {
        localInputSchemas[0].addVariable(usedVar);
    }
    for (i = 0; i < n; i++) {
        AggregateFunctionCallExpression mergeFun = (AggregateFunctionCallExpression) aggOp.getMergeExpressions().get(i).getValue();
        merges[i] = expressionRuntimeProvider.createSerializableAggregateFunctionFactory(mergeFun, aggOpInputEnv, localInputSchemas, context);
    }
    IAggregatorDescriptorFactory aggregatorFactory = new SerializableAggregatorDescriptorFactory(aff);
    IAggregatorDescriptorFactory mergeFactory = new SerializableAggregatorDescriptorFactory(merges);
    INormalizedKeyComputerFactory normalizedKeyFactory = JobGenHelper.variablesToAscNormalizedKeyComputerFactory(gbyCols, aggOpInputEnv, context);
    // Calculates the hash table size (# of unique hash values) based on the budget and a tuple size.
    int memoryBudgetInBytes = context.getFrameSize() * frameLimit;
    int groupByColumnsCount = gby.getGroupByList().size() + numFds;
    int hashTableSize = ExternalGroupOperatorDescriptor.calculateGroupByTableCardinality(memoryBudgetInBytes, groupByColumnsCount, context.getFrameSize());
    ExternalGroupOperatorDescriptor gbyOpDesc = new ExternalGroupOperatorDescriptor(spec, hashTableSize, inputSize, keyAndDecFields, frameLimit, comparatorFactories, normalizedKeyFactory, aggregatorFactory, mergeFactory, recordDescriptor, recordDescriptor, new HashSpillableTableFactory(hashFunctionFactories));
    contributeOpDesc(builder, gby, gbyOpDesc);
    ILogicalOperator src = op.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src, 0, op, 0);
}
Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IOperatorSchema(org.apache.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema) HashSpillableTableFactory(org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory) ArrayList(java.util.ArrayList) ISerializedAggregateEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.ISerializedAggregateEvaluatorFactory) IExpressionRuntimeProvider(org.apache.hyracks.algebricks.core.algebra.expressions.IExpressionRuntimeProvider) AggregateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator) IPartialAggregationTypeComputer(org.apache.hyracks.algebricks.core.algebra.expressions.IPartialAggregationTypeComputer) IBinaryHashFunctionFamily(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily) LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) AggregateFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression) GroupByOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator) SerializableAggregatorDescriptorFactory(org.apache.hyracks.algebricks.runtime.operators.aggreg.SerializableAggregatorDescriptorFactory) ExternalGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) IOperatorDescriptorRegistry(org.apache.hyracks.api.job.IOperatorDescriptorRegistry) OperatorSchemaImpl(org.apache.hyracks.algebricks.core.jobgen.impl.OperatorSchemaImpl) IAggregatorDescriptorFactory(org.apache.hyracks.dataflow.std.group.IAggregatorDescriptorFactory) Mutable(org.apache.commons.lang3.mutable.Mutable) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) INormalizedKeyComputerFactory(org.apache.hyracks.api.dataflow.value.INormalizedKeyComputerFactory) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) ILogicalPlan(org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment)

Example 7 with IBinaryHashFunctionFamily

use of org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily in project asterixdb by apache.

the class LocalityAwareConnectorTest method localityAwareAggregationTest.

/**
     * Test of aggregations using locality aware connector. The output two files should be the
     * same, each of which is the aggregation of two copies of the lineitem.tbl.
     * Note that if the hashing connector is not working correctly, the two files may be different. This
     * also means that even the output files are the same, the hashing may have other problems.
     *
     * @throws Exception
     */
@Test
public void localityAwareAggregationTest() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, "asterix-001", "asterix-002", "asterix-003", "asterix-004");
    RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
    int[] keyFields = new int[] { 0 };
    int tableSize = 8;
    ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, tableSize, fileSize, keyFields, fileSize / spec.getFrameSize() + 1, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(3, false), new FloatSumFieldAggregatorFactory(5, false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(2, false), new FloatSumFieldAggregatorFactory(3, false) }), outputRec, outputRec, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }));
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, "asterix-005", "asterix-006");
    BitSet nodemap = new BitSet(8);
    nodemap.set(0);
    nodemap.set(2);
    nodemap.set(5);
    nodemap.set(7);
    IConnectorDescriptor conn1 = new LocalityAwareMToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), new HashtableLocalityMap(nodemap));
    spec.connect(conn1, csvScanner, 0, grouper, 0);
    AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "localityAwareSumInmemGroupTest");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, "asterix-005", "asterix-006");
    IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn2, grouper, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : UTF8StringNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) ExternalGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) LocalityAwareMToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.LocalityAwareMToNPartitioningConnectorDescriptor) HashSpillableTableFactory(org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory) BitSet(java.util.BitSet) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) HashtableLocalityMap(org.apache.hyracks.dataflow.std.connectors.HashtableLocalityMap) FloatSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.FloatSumFieldAggregatorFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) IntSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) IBinaryHashFunctionFamily(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily) Test(org.junit.Test)

Example 8 with IBinaryHashFunctionFamily

use of org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily in project asterixdb by apache.

the class LocalityAwareConnectorTest method globalPartitionAggregationTest.

/**
     * Test for locality aware connector, using the global hashing node mapper. This should have
     * the exactly the same result as using {@link MToNPartitioningConnectorDescriptor}.
     *
     * @throws Exception
     */
@Test
public void globalPartitionAggregationTest() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, "asterix-001", "asterix-002", "asterix-003", "asterix-004");
    RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
    int[] keyFields = new int[] { 0 };
    int tableSize = 8;
    ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, tableSize, fileSize, keyFields, fileSize / spec.getFrameSize() + 1, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(3, false), new FloatSumFieldAggregatorFactory(5, false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(2, false), new FloatSumFieldAggregatorFactory(3, false) }), outputRec, outputRec, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }));
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, "asterix-005", "asterix-006");
    IConnectorDescriptor conn1 = new LocalityAwareMToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), new GlobalHashingLocalityMap());
    spec.connect(conn1, csvScanner, 0, grouper, 0);
    AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "localityAwareSumInmemGroupTest");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, "asterix-005", "asterix-006");
    IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn2, grouper, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : UTF8StringNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) ExternalGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) LocalityAwareMToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.LocalityAwareMToNPartitioningConnectorDescriptor) HashSpillableTableFactory(org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) FloatSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.FloatSumFieldAggregatorFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) IntSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory) JobSpecification(org.apache.hyracks.api.job.JobSpecification) GlobalHashingLocalityMap(org.apache.hyracks.dataflow.std.connectors.GlobalHashingLocalityMap) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) IBinaryHashFunctionFamily(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily) Test(org.junit.Test)

Example 9 with IBinaryHashFunctionFamily

use of org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily in project asterixdb by apache.

the class AggregationTest method multiKeySumExtGroupTest.

@Test
public void multiKeySumExtGroupTest() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
    RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    int[] keyFields = new int[] { 8, 0 };
    int frameLimits = 5;
    int tableSize = 8;
    long fileSize = frameLimits * spec.getFrameSize();
    ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, tableSize, fileSize, keyFields, frameLimits, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(3, false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(2, false), new IntSumFieldAggregatorFactory(3, false) }), outputRec, outputRec, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE, UTF8StringBinaryHashFunctionFamily.INSTANCE }));
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
    IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn1, csvScanner, 0, grouper, 0);
    AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeySumExtGroupTest");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
    IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn2, grouper, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : UTF8StringNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) ExternalGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) HashSpillableTableFactory(org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) IntSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) IBinaryHashFunctionFamily(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily) Test(org.junit.Test)

Example 10 with IBinaryHashFunctionFamily

use of org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily in project asterixdb by apache.

the class AggregationTest method multiKeyMinMaxStringExtGroupTest.

@Test
public void multiKeyMinMaxStringExtGroupTest() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
    RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
    int[] keyFields = new int[] { 8, 0 };
    int frameLimits = 5;
    int tableSize = 8;
    long fileSize = frameLimits * spec.getFrameSize();
    ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, tableSize, fileSize, keyFields, frameLimits, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new MinMaxStringFieldAggregatorFactory(15, true, true) }), new MultiFieldsAggregatorFactory(new int[] { 0, 1 }, new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(2, false), new MinMaxStringFieldAggregatorFactory(3, true, true) }), outputRec, outputRec, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE, UTF8StringBinaryHashFunctionFamily.INSTANCE }));
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
    IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn1, csvScanner, 0, grouper, 0);
    AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeyMinMaxStringExtGroupTest");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
    IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn2, grouper, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : UTF8StringNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) ExternalGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) HashSpillableTableFactory(org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) MinMaxStringFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MinMaxStringFieldAggregatorFactory) IntSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) IBinaryHashFunctionFamily(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily) Test(org.junit.Test)

Aggregations

IBinaryHashFunctionFamily (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily)12 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)11 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)10 HashSpillableTableFactory (org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory)10 ExternalGroupOperatorDescriptor (org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor)10 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)9 JobSpecification (org.apache.hyracks.api.job.JobSpecification)9 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)9 UTF8StringNormalizedKeyComputerFactory (org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory)9 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)9 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)9 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)9 IFieldAggregateDescriptorFactory (org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory)9 IntSumFieldAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory)9 MultiFieldsAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory)9 AbstractSingleActivityOperatorDescriptor (org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor)8 Test (org.junit.Test)8 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)7 FloatSumFieldAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.FloatSumFieldAggregatorFactory)4 LogicalVariable (org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable)3