use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class CountOfCountsTest method countOfCountsExternalSortMultiNC.
@Test
public void countOfCountsExternalSortMultiNC() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] splits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "words.txt") };
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, 3, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
InMemorySortOperatorDescriptor sorter2 = new InMemorySortOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter2, NC1_ID, NC2_ID);
RecordDescriptor desc3 = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group2 = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc3);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group2, NC1_ID, NC2_ID);
ResultSetId rsId = new ResultSetId(1);
IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
spec.addResultSetId(rsId);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, sorter, 0);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, sorter, 0, group, 0);
IConnectorDescriptor conn3 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn3, group, 0, sorter2, 0);
IConnectorDescriptor conn4 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn4, sorter2, 0, group2, 0);
IConnectorDescriptor conn5 = new MToNBroadcastConnectorDescriptor(spec);
spec.connect(conn5, group2, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class AbstractRTreeOperatorTest method setup.
@Before
public void setup() throws Exception {
testHelper = createTestHelper();
primaryFileName = testHelper.getPrimaryIndexName();
primarySplitProvider = new ConstantFileSplitProvider(new FileSplit[] { new ManagedFileSplit(NC1_ID, primaryFileName) });
primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
secondaryFileName = testHelper.getSecondaryIndexName();
secondarySplitProvider = new ConstantFileSplitProvider(new FileSplit[] { new ManagedFileSplit(NC1_ID, secondaryFileName) });
secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, secondarySplitProvider);
// field, type and key declarations for primary index
primaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[2] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[4] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[5] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[6] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[7] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[8] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[9] = UTF8StringPointable.TYPE_TRAITS;
primaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
// field, type and key declarations for secondary indexes
secondaryTypeTraits[0] = DoublePointable.TYPE_TRAITS;
secondaryTypeTraits[1] = DoublePointable.TYPE_TRAITS;
secondaryTypeTraits[2] = DoublePointable.TYPE_TRAITS;
secondaryTypeTraits[3] = DoublePointable.TYPE_TRAITS;
secondaryTypeTraits[4] = UTF8StringPointable.TYPE_TRAITS;
secondaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
secondaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
secondaryComparatorFactories[2] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
secondaryComparatorFactories[3] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
// This only used for LSMRTree
int[] rtreeFields = null;
int[] filterFields = null;
ITypeTraits[] filterTypes = null;
IBinaryComparatorFactory[] filterCmpFactories = null;
if (rTreeType == RTreeType.LSMRTREE || rTreeType == RTreeType.LSMRTREE_WITH_ANTIMATTER) {
rtreeFields = new int[] { 0, 1, 2, 3, 4 };
filterFields = new int[] { 4 };
filterTypes = new ITypeTraits[] { UTF8StringPointable.TYPE_TRAITS };
filterCmpFactories = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) };
}
int[] btreeFields = null;
if (rTreeType == RTreeType.LSMRTREE) {
btreeKeyFieldCount = 1;
btreeComparatorFactories = new IBinaryComparatorFactory[btreeKeyFieldCount];
btreeComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
btreeFields = new int[] { 4 };
} else {
btreeComparatorFactories[0] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
btreeComparatorFactories[1] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
btreeComparatorFactories[2] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
btreeComparatorFactories[3] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
btreeComparatorFactories[4] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
}
IPrimitiveValueProviderFactory[] secondaryValueProviderFactories = RTreeUtils.createPrimitiveValueProviderFactories(secondaryComparatorFactories.length, DoublePointable.FACTORY);
rtreeFactory = createSecondaryResourceFactory(secondaryValueProviderFactories, RTreePolicyType.RSTARTREE, btreeComparatorFactories, LSMRTreeUtils.proposeBestLinearizer(secondaryTypeTraits, secondaryComparatorFactories.length), btreeFields);
}
use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class FileSplitDomain method toString.
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("FileSplitDomain[");
boolean fst = true;
for (FileSplit fs : splits) {
if (fst) {
fst = false;
} else {
sb.append(", ");
}
sb.append(fs.getNodeName() + ":" + fs.getPath());
}
sb.append(']');
return sb.toString();
}
use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class WordCountMain method parseFileSplits.
private static FileSplit[] parseFileSplits(String fileSplits) {
String[] splits = fileSplits.split(",");
FileSplit[] fSplits = new FileSplit[splits.length];
for (int i = 0; i < splits.length; ++i) {
String s = splits[i].trim();
int idx = s.indexOf(':');
if (idx < 0) {
throw new IllegalArgumentException("File split " + s + " not well formed");
}
File file = new File(s.substring(idx + 1));
fSplits[i] = new UnmanagedFileSplit(s.substring(0, idx), file.getAbsolutePath());
fileSize += file.length();
}
return fSplits;
}
use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class TPCHCustomerOptimizedHybridHashJoinTest method customerOrderCIDHybridHashJoin_Case1.
@Test
public void customerOrderCIDHybridHashJoin_Case1() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] custSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer4.tbl") };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders4.tbl") };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(orderValueParserFactories, '|'), ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC2_ID);
FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, new DelimitedDataTupleParserFactory(custValueParserFactories, '|'), custDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID);
OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 243, 1.2, new int[] { 0 }, new int[] { 1 }, new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, custOrderJoinDesc, new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), null);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID);
String path = getClass().getName() + File.separator + "case1";
IOperatorDescriptor printer = getPrinter(spec, path);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor custJoinConn = new OneToOneConnectorDescriptor(spec);
spec.connect(custJoinConn, custScanner, 0, join, 0);
IConnectorDescriptor ordJoinConn = new MToNBroadcastConnectorDescriptor(spec);
spec.connect(ordJoinConn, ordScanner, 0, join, 1);
IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec);
spec.connect(joinPrinterConn, join, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
System.out.println("output to " + path);
}
Aggregations