use of org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor in project asterixdb by apache.
the class SortMergeTest method sortMergeTest02.
@Test
public void sortMergeTest02() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part2.tbl") };
IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID, NC2_ID);
ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, 4, new int[] { 1, 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID);
ResultSetId rsId = new ResultSetId(1);
spec.addResultSetId(rsId);
IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
spec.connect(new MToNPartitioningMergingConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1, 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), new int[] { 1, 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory()), sorter, 0, printer, 0);
runTest(spec);
}
use of org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor in project asterixdb by apache.
the class TPCHCustomerOptimizedHybridHashJoinTest method customerOrderCIDHybridHashJoin_Case3.
@Test
public void customerOrderCIDHybridHashJoin_Case3() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] custSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer3.tbl") };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders1.tbl") };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(orderValueParserFactories, '|'), ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC2_ID);
FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, new DelimitedDataTupleParserFactory(custValueParserFactories, '|'), custDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID);
OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 6, 122, 1.2, new int[] { 0 }, new int[] { 1 }, new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, custOrderJoinDesc, new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), null);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID);
String path = getClass().getName() + File.separator + "case3";
IOperatorDescriptor printer = getPrinter(spec, path);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor custJoinConn = new OneToOneConnectorDescriptor(spec);
spec.connect(custJoinConn, custScanner, 0, join, 0);
IConnectorDescriptor ordJoinConn = new MToNBroadcastConnectorDescriptor(spec);
spec.connect(ordJoinConn, ordScanner, 0, join, 1);
IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec);
spec.connect(joinPrinterConn, join, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
System.out.println("output to " + path);
}
use of org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor in project asterixdb by apache.
the class TPCHCustomerOptimizedHybridHashJoinTest method customerOrderCIDHybridHashJoin_Case2.
@Test
public void customerOrderCIDHybridHashJoin_Case2() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] custSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer3.tbl") };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders4.tbl") };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(orderValueParserFactories, '|'), ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC2_ID);
FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, new DelimitedDataTupleParserFactory(custValueParserFactories, '|'), custDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID);
OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 122, 1.2, new int[] { 0 }, new int[] { 1 }, new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, custOrderJoinDesc, new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), null);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID);
String path = getClass().getName() + File.separator + "case2";
IOperatorDescriptor printer = getPrinter(spec, path);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor custJoinConn = new OneToOneConnectorDescriptor(spec);
spec.connect(custJoinConn, custScanner, 0, join, 0);
IConnectorDescriptor ordJoinConn = new MToNBroadcastConnectorDescriptor(spec);
spec.connect(ordJoinConn, ordScanner, 0, join, 1);
IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec);
spec.connect(joinPrinterConn, join, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
System.out.println("output to " + path);
}
use of org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor in project asterixdb by apache.
the class ScanPrintTest method scanPrint01.
@Test
public void scanPrint01() throws Exception {
JobSpecification spec = new JobSpecification();
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "words.txt"), new ManagedFileSplit(NC1_ID, "data" + File.separator + "nc1" + File.separator + "words.txt") });
RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID, NC1_ID);
ResultSetId rsId = new ResultSetId(1);
IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
spec.addResultSetId(rsId);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn = new OneToOneConnectorDescriptor(spec);
spec.connect(conn, csvScanner, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
use of org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor in project asterixdb by apache.
the class ScanPrintTest method scanPrint03.
@Test
public void scanPrint03() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders.tbl") };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { IntegerParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC2_ID);
ResultSetId rsId = new ResultSetId(1);
IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
spec.addResultSetId(rsId);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID);
IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(IntegerPointable.FACTORY) }));
spec.connect(conn1, ordScanner, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
Aggregations