use of org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor in project asterixdb by apache.
the class Groupby method createJob.
private static JobSpecification createJob(FileSplit[] inSplits, FileSplit[] outSplits, int htSize, long fileSize, int frameLimit, int frameSize, String alg, boolean outPlain) {
JobSpecification spec = new JobSpecification(frameSize);
IFileSplitProvider splitsProvider = new ConstantFileSplitProvider(inSplits);
FileScanOperatorDescriptor fileScanner = new FileScanOperatorDescriptor(spec, splitsProvider, new DelimitedDataTupleParserFactory(lineitemParserFactories, '|'), lineitemDesc);
createPartitionConstraint(spec, fileScanner, inSplits);
// Output: each unique string with an integer count
RecordDescriptor outDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, // IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE });
// Specify the grouping key, which will be the string extracted during
// the scan.
int[] keys = new int[] { 0 };
AbstractOperatorDescriptor grouper;
if (alg.equalsIgnoreCase("hash")) {
// external hash graph
grouper = new ExternalGroupOperatorDescriptor(spec, htSize, fileSize, keys, frameLimit, new IBinaryComparatorFactory[] { // PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY),
PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new IntegerNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(keys.length, false) }), outDesc, outDesc, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { MurmurHash3BinaryHashFunctionFamily.INSTANCE }));
createPartitionConstraint(spec, grouper, outSplits);
} else if (alg.equalsIgnoreCase("sort")) {
grouper = new SortGroupByOperatorDescriptor(spec, frameLimit, keys, keys, new IntegerNormalizedKeyComputerFactory(), new IBinaryComparatorFactory[] { // PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY),
PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(keys.length, true) }), outDesc, outDesc, false);
createPartitionConstraint(spec, grouper, outSplits);
} else {
System.err.println("unknow groupby alg:" + alg);
return null;
}
// Connect scanner with the grouper
IConnectorDescriptor scanGroupConnDef2 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keys, new IBinaryHashFunctionFactory[] { // PointableBinaryHashFunctionFactory.of(IntegerPointable.FACTORY),
PointableBinaryHashFunctionFactory.of(IntegerPointable.FACTORY) }));
spec.connect(scanGroupConnDef2, fileScanner, 0, grouper, 0);
IFileSplitProvider outSplitProvider = new ConstantFileSplitProvider(outSplits);
AbstractSingleActivityOperatorDescriptor writer = outPlain ? new PlainFileWriterOperatorDescriptor(spec, outSplitProvider, "|") : new FrameFileWriterOperatorDescriptor(spec, outSplitProvider);
createPartitionConstraint(spec, writer, outSplits);
IConnectorDescriptor groupOutConn = new OneToOneConnectorDescriptor(spec);
spec.connect(groupOutConn, grouper, 0, writer, 0);
spec.addRoot(writer);
return spec;
}
use of org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor in project asterixdb by apache.
the class Sort method createJob.
private static JobSpecification createJob(FileSplit[] ordersSplits, FileSplit[] outputSplit, String memBufferAlg, int frameLimit, int frameSize, int limit, boolean usingHeapSorter) {
JobSpecification spec = new JobSpecification();
spec.setFrameSize(frameSize);
IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(orderParserFactories, '|'), ordersDesc);
createPartitionConstraint(spec, ordScanner, ordersSplits);
AbstractSorterOperatorDescriptor sorter;
if (usingHeapSorter && limit < Integer.MAX_VALUE) {
sorter = new TopKSorterOperatorDescriptor(spec, frameLimit, limit, SortFields, null, SortFieldsComparatorFactories, ordersDesc);
} else {
if (memBufferAlg.equalsIgnoreCase("bestfit")) {
sorter = new ExternalSortOperatorDescriptor(spec, frameLimit, SortFields, null, SortFieldsComparatorFactories, ordersDesc, Algorithm.MERGE_SORT, EnumFreeSlotPolicy.SMALLEST_FIT, limit);
} else if (memBufferAlg.equalsIgnoreCase("biggestfit")) {
sorter = new ExternalSortOperatorDescriptor(spec, frameLimit, SortFields, null, SortFieldsComparatorFactories, ordersDesc, Algorithm.MERGE_SORT, EnumFreeSlotPolicy.BIGGEST_FIT, limit);
} else {
sorter = new ExternalSortOperatorDescriptor(spec, frameLimit, SortFields, null, SortFieldsComparatorFactories, ordersDesc, Algorithm.MERGE_SORT, EnumFreeSlotPolicy.LAST_FIT, limit);
}
}
createPartitionConstraint(spec, sorter, ordersSplits);
IFileSplitProvider outputSplitProvider = new ConstantFileSplitProvider(outputSplit);
IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outputSplitProvider, "|");
createPartitionConstraint(spec, printer, outputSplit);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
spec.connect(new MToNPartitioningMergingConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(SortFields, orderBinaryHashFunctionFactories), SortFields, SortFieldsComparatorFactories, new UTF8StringNormalizedKeyComputerFactory()), sorter, 0, printer, 0);
spec.addRoot(printer);
return spec;
}
use of org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor in project asterixdb by apache.
the class RTreeSecondaryIndexInsertOperatorTest method searchUpdatedSecondaryIndexTest.
@Test
public void searchUpdatedSecondaryIndexTest() throws Exception {
JobSpecification spec = new JobSpecification();
// build tuple
ArrayTupleBuilder tb = new ArrayTupleBuilder(secondaryKeyFieldCount);
DataOutput dos = tb.getDataOutput();
tb.reset();
DoubleSerializerDeserializer.INSTANCE.serialize(61.2894, dos);
tb.addFieldEndOffset();
DoubleSerializerDeserializer.INSTANCE.serialize(-149.624, dos);
tb.addFieldEndOffset();
DoubleSerializerDeserializer.INSTANCE.serialize(61.8894, dos);
tb.addFieldEndOffset();
DoubleSerializerDeserializer.INSTANCE.serialize(-149.024, dos);
tb.addFieldEndOffset();
ISerializerDeserializer[] keyRecDescSers = { DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, keyProviderOp, NC1_ID);
int[] keyFields = { 0, 1, 2, 3 };
RTreeSearchOperatorDescriptor secondarySearchOp = new RTreeSearchOperatorDescriptor(spec, secondaryRecDesc, keyFields, true, true, secondaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondarySearchOp, NC1_ID);
// fifth field from the tuples coming from secondary index
int[] primaryLowKeyFields = { 4 };
// fifth field from the tuples coming from secondary index
int[] primaryHighKeyFields = { 4 };
// search primary index
BTreeSearchOperatorDescriptor primarySearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc, primaryLowKeyFields, primaryHighKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primarySearchOp, NC1_ID);
IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { createFile(nc1) });
IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, secondarySearchOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), secondarySearchOp, 0, primarySearchOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), primarySearchOp, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
use of org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor in project asterixdb by apache.
the class RTreeSecondaryIndexScanOperatorTest method scanPrimaryIndexTest.
@Test
public void scanPrimaryIndexTest() throws Exception {
JobSpecification spec = new JobSpecification();
// build dummy tuple
ArrayTupleBuilder tb = new ArrayTupleBuilder(secondaryKeyFieldCount);
DataOutput dos = tb.getDataOutput();
tb.reset();
DoubleSerializerDeserializer.INSTANCE.serialize(0.0, dos);
tb.addFieldEndOffset();
DoubleSerializerDeserializer.INSTANCE.serialize(0.0, dos);
tb.addFieldEndOffset();
DoubleSerializerDeserializer.INSTANCE.serialize(0.0, dos);
tb.addFieldEndOffset();
DoubleSerializerDeserializer.INSTANCE.serialize(0.0, dos);
tb.addFieldEndOffset();
ISerializerDeserializer[] keyRecDescSers = { DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, keyProviderOp, NC1_ID);
int[] keyFields = null;
RTreeSearchOperatorDescriptor secondarySearchOp = new RTreeSearchOperatorDescriptor(spec, secondaryRecDesc, keyFields, true, true, secondaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondarySearchOp, NC1_ID);
IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { createFile(nc1) });
IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, secondarySearchOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), secondarySearchOp, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
use of org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor in project asterixdb by apache.
the class RTreeSecondaryIndexStatsOperatorTest method showPrimaryIndexStats.
@Test
public void showPrimaryIndexStats() throws Exception {
JobSpecification spec = new JobSpecification();
TreeIndexStatsOperatorDescriptor secondaryStatsOp = new TreeIndexStatsOperatorDescriptor(spec, storageManager, secondaryHelperFactory);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryStatsOp, NC1_ID);
IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { createFile(nc1) });
IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), secondaryStatsOp, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
Aggregations