Search in sources :

Example 16 with IFieldAggregateDescriptorFactory

use of org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory in project asterixdb by apache.

the class AggregationTest method multiKeyAvgPreClusterGroupTest.

@Test
public void multiKeyAvgPreClusterGroupTest() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
    RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
    int[] keyFields = new int[] { 8, 0 };
    PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(spec, keyFields, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, true), new CountFieldAggregatorFactory(true), new AvgFieldGroupAggregatorFactory(1, true) }), outputRec);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
    IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn1, csvScanner, 0, grouper, 0);
    AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeyAvgInmemGroupTest");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
    IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn2, grouper, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) CountFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.CountFieldAggregatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) AvgFieldGroupAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.AvgFieldGroupAggregatorFactory) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) PreclusteredGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor) IntSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) Test(org.junit.Test)

Example 17 with IFieldAggregateDescriptorFactory

use of org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory in project asterixdb by apache.

the class AggregationTest method singleKeyMinMaxStringExtGroupTest.

@Test
public void singleKeyMinMaxStringExtGroupTest() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
    RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
    int[] keyFields = new int[] { 0 };
    int frameLimits = 5;
    int tableSize = 8;
    long fileSize = frameLimits * spec.getFrameSize();
    ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, tableSize, fileSize, keyFields, frameLimits, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new MinMaxStringFieldAggregatorFactory(15, true, true) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new MinMaxStringFieldAggregatorFactory(2, true, true) }), outputRec, outputRec, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }));
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
    IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn1, csvScanner, 0, grouper, 0);
    AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgExtGroupTest");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
    IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn2, grouper, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : UTF8StringNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) ExternalGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) HashSpillableTableFactory(org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) MinMaxStringFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MinMaxStringFieldAggregatorFactory) IntSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) IBinaryHashFunctionFamily(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily) Test(org.junit.Test)

Example 18 with IFieldAggregateDescriptorFactory

use of org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory in project asterixdb by apache.

the class AggregationTest method singleKeyMinMaxStringPreClusterGroupTest.

@Test
public void singleKeyMinMaxStringPreClusterGroupTest() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
    RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
    int[] keyFields = new int[] { 0 };
    PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(spec, keyFields, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, true), new MinMaxStringFieldAggregatorFactory(15, true, false) }), outputRec);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
    IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn1, csvScanner, 0, grouper, 0);
    AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgInmemGroupTest");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
    IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn2, grouper, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) PreclusteredGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor) MinMaxStringFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MinMaxStringFieldAggregatorFactory) IntSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) Test(org.junit.Test)

Example 19 with IFieldAggregateDescriptorFactory

use of org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory in project asterixdb by apache.

the class AggregationTest method multiKeySumPreClusterGroupTest.

@Test
public void multiKeySumPreClusterGroupTest() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
    RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    int[] keyFields = new int[] { 8, 0 };
    PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(spec, keyFields, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, true), new IntSumFieldAggregatorFactory(3, true) }), outputRec);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
    IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn1, csvScanner, 0, grouper, 0);
    AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeySumInmemGroupTest");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
    IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn2, grouper, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) PreclusteredGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor) IntSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) Test(org.junit.Test)

Aggregations

IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)19 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)19 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)19 JobSpecification (org.apache.hyracks.api.job.JobSpecification)19 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)19 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)19 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)19 IFieldAggregateDescriptorFactory (org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory)19 MultiFieldsAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory)19 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)18 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)17 Test (org.junit.Test)17 IntSumFieldAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory)16 AbstractSingleActivityOperatorDescriptor (org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor)15 HashSpillableTableFactory (org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory)10 ExternalGroupOperatorDescriptor (org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor)10 IBinaryHashFunctionFamily (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily)9 UTF8StringNormalizedKeyComputerFactory (org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory)9 PreclusteredGroupOperatorDescriptor (org.apache.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor)9 CountFieldAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.CountFieldAggregatorFactory)8