use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class Join method createJob.
private static JobSpecification createJob(FileSplit[] customerSplits, FileSplit[] orderSplits, FileSplit[] resultSplits, int numJoinPartitions, String algo, int graceInputSize, int graceRecordsPerFrame, double graceFactor, int memSize, int tableSize, boolean hasGroupBy, int frameSize) throws HyracksDataException {
JobSpecification spec = new JobSpecification(frameSize);
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(customerSplits);
long custFileSize = 0;
for (int i = 0; i < customerSplits.length; i++) {
custFileSize += customerSplits[i].getFile(null).length();
}
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(orderSplits);
long orderFileSize = 0;
for (int i = 0; i < orderSplits.length; i++) {
orderFileSize += orderSplits[i].getFile(null).length();
}
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(orderParserFactories, '|'), Common.ordersDesc);
createPartitionConstraint(spec, ordScanner, orderSplits);
FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, new DelimitedDataTupleParserFactory(custParserFactories, '|'), Common.custDesc);
createPartitionConstraint(spec, custScanner, customerSplits);
IOperatorDescriptor join;
if ("nestedloop".equalsIgnoreCase(algo)) {
join = new NestedLoopJoinOperatorDescriptor(spec, new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), Common.custOrderJoinDesc, memSize, false, null);
} else if ("inmem".equalsIgnoreCase(algo)) {
join = new InMemoryHashJoinOperatorDescriptor(spec, new int[] { 0 }, new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, Common.custOrderJoinDesc, tableSize, null, memSize * frameSize);
} else if ("hybrid".equalsIgnoreCase(algo)) {
join = new OptimizedHybridHashJoinOperatorDescriptor(spec, memSize, graceInputSize, graceFactor, new int[] { 0 }, new int[] { 1 }, new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, Common.custOrderJoinDesc, new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), null);
} else {
System.err.println("unknown algorithm:" + algo);
return null;
}
PartitionConstraintHelper.addPartitionCountConstraint(spec, join, numJoinPartitions);
IConnectorDescriptor ordJoinConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(ordJoinConn, ordScanner, 0, join, 1);
IConnectorDescriptor custJoinConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(custJoinConn, custScanner, 0, join, 0);
IOperatorDescriptor endingOp = join;
if (hasGroupBy) {
RecordDescriptor groupResultDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
ExternalGroupOperatorDescriptor gby = new ExternalGroupOperatorDescriptor(spec, tableSize, custFileSize + orderFileSize, new int[] { 6 }, memSize, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(3, false), new FloatSumFieldAggregatorFactory(5, false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(2, false), new FloatSumFieldAggregatorFactory(3, false) }), groupResultDesc, groupResultDesc, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }));
createPartitionConstraint(spec, gby, resultSplits);
IConnectorDescriptor joinGroupConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 6 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(joinGroupConn, join, 0, gby, 0);
endingOp = gby;
}
IFileSplitProvider outSplitProvider = new ConstantFileSplitProvider(resultSplits);
//FrameFileWriterOperatorDescriptor writer = new FrameFileWriterOperatorDescriptor(spec, outSplitProvider);
IOperatorDescriptor writer = new PlainFileWriterOperatorDescriptor(spec, outSplitProvider, "|");
createPartitionConstraint(spec, writer, resultSplits);
IConnectorDescriptor endingPrinterConn = new OneToOneConnectorDescriptor(spec);
spec.connect(endingPrinterConn, endingOp, 0, writer, 0);
spec.addRoot(writer);
return spec;
}
use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class DataflowTest method testHDFSReadWriteOperators.
/**
* Test a job with only HDFS read and writes.
*
* @throws Exception
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
public void testHDFSReadWriteOperators() throws Exception {
FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
FileOutputFormat.setOutputPath(conf, new Path(HDFS_OUTPUT_PATH));
conf.setInputFormatClass(TextInputFormat.class);
Scheduler scheduler = new Scheduler(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT);
InputFormat inputFormat = ReflectionUtils.newInstance(conf.getInputFormatClass(), getConfiguration());
List<InputSplit> splits = inputFormat.getSplits(conf);
String[] readSchedule = scheduler.getLocationConstraints(splits);
JobSpecification jobSpec = new JobSpecification();
RecordDescriptor recordDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
String[] locations = new String[] { HyracksUtils.NC1_ID, HyracksUtils.NC1_ID, HyracksUtils.NC2_ID, HyracksUtils.NC2_ID };
HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(jobSpec, recordDesc, conf, splits, readSchedule, new TextKeyValueParserFactory());
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, readOperator, locations);
ExternalSortOperatorDescriptor sortOperator = new ExternalSortOperatorDescriptor(jobSpec, 10, new int[] { 0 }, new IBinaryComparatorFactory[] { RawBinaryComparatorFactory.INSTANCE }, recordDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, sortOperator, locations);
HDFSWriteOperatorDescriptor writeOperator = new HDFSWriteOperatorDescriptor(jobSpec, conf, new TextTupleWriterFactory());
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, writeOperator, HyracksUtils.NC1_ID);
jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), readOperator, 0, sortOperator, 0);
jobSpec.connect(new MToNPartitioningMergingConnectorDescriptor(jobSpec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { RawBinaryHashFunctionFactory.INSTANCE }), new int[] { 0 }, new IBinaryComparatorFactory[] { RawBinaryComparatorFactory.INSTANCE }, null), sortOperator, 0, writeOperator, 0);
jobSpec.addRoot(writeOperator);
IHyracksClientConnection client = new HyracksConnection(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT);
JobId jobId = client.startJob(jobSpec);
client.waitForCompletion(jobId);
Assert.assertEquals(true, checkResults());
}
use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class AbstractRTreeExamplesTest method rStarTreePageSplitTestExample.
/**
* This test the r*tree page split. Originally this test didn't pass since
* the r*tree assumes always that there will be enough space for the new
* tuple after split. Now it passes since if there is not space in the
* designated page, then we will just insert it in the other split page.
*/
@Test
public void rStarTreePageSplitTestExample() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("R*Tree page split test.");
}
// Declare fields.
int fieldCount = 5;
ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
typeTraits[0] = IntegerPointable.TYPE_TRAITS;
typeTraits[1] = IntegerPointable.TYPE_TRAITS;
typeTraits[2] = IntegerPointable.TYPE_TRAITS;
typeTraits[3] = IntegerPointable.TYPE_TRAITS;
typeTraits[4] = UTF8StringPointable.TYPE_TRAITS;
// Declare field serdes.
ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() };
// Declare RTree keys.
int rtreeKeyFieldCount = 4;
IBinaryComparatorFactory[] rtreeCmpFactories = new IBinaryComparatorFactory[rtreeKeyFieldCount];
rtreeCmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
rtreeCmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
rtreeCmpFactories[2] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
rtreeCmpFactories[3] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// Declare BTree keys, this will only be used for LSMRTree
int btreeKeyFieldCount;
IBinaryComparatorFactory[] btreeCmpFactories;
int[] btreeFields = null;
if (rTreeType == RTreeType.LSMRTREE) {
//Parameters look different for LSM RTREE from LSM RTREE WITH ANTI MATTER TUPLES
btreeKeyFieldCount = 1;
btreeCmpFactories = new IBinaryComparatorFactory[btreeKeyFieldCount];
btreeCmpFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
btreeFields = new int[btreeKeyFieldCount];
for (int i = 0; i < btreeKeyFieldCount; i++) {
btreeFields[i] = rtreeKeyFieldCount + i;
}
} else {
btreeKeyFieldCount = 5;
btreeCmpFactories = new IBinaryComparatorFactory[btreeKeyFieldCount];
btreeCmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
btreeCmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
btreeCmpFactories[2] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
btreeCmpFactories[3] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
btreeCmpFactories[4] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
}
// create value providers
IPrimitiveValueProviderFactory[] valueProviderFactories = RTreeUtils.createPrimitiveValueProviderFactories(rtreeCmpFactories.length, IntegerPointable.FACTORY);
ITreeIndex treeIndex = createTreeIndex(typeTraits, rtreeCmpFactories, btreeCmpFactories, valueProviderFactories, RTreePolicyType.RSTARTREE, null, btreeFields, null, null, null);
treeIndex.create();
treeIndex.activate();
ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
IIndexAccessor indexAccessor = treeIndex.createAccessor(TestOperationCallback.INSTANCE, TestOperationCallback.INSTANCE);
int p1x = rnd.nextInt();
int p1y = rnd.nextInt();
int p2x = rnd.nextInt();
int p2y = rnd.nextInt();
String data = "";
for (int i = 0; i < 210; i++) {
data += "X";
}
TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
indexAccessor.insert(tuple);
p1x = rnd.nextInt();
p1y = rnd.nextInt();
p2x = rnd.nextInt();
p2y = rnd.nextInt();
data = "";
for (int i = 0; i < 210; i++) {
data += "X";
}
TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
indexAccessor.insert(tuple);
p1x = rnd.nextInt();
p1y = rnd.nextInt();
p2x = rnd.nextInt();
p2y = rnd.nextInt();
data = "XXX";
TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
indexAccessor.insert(tuple);
p1x = rnd.nextInt();
p1y = rnd.nextInt();
p2x = rnd.nextInt();
p2y = rnd.nextInt();
data = "XXX";
TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
indexAccessor.insert(tuple);
p1x = rnd.nextInt();
p1y = rnd.nextInt();
p2x = rnd.nextInt();
p2y = rnd.nextInt();
data = "";
for (int i = 0; i < 210; i++) {
data += "X";
}
TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
indexAccessor.insert(tuple);
p1x = rnd.nextInt();
p1y = rnd.nextInt();
p2x = rnd.nextInt();
p2y = rnd.nextInt();
data = "";
for (int i = 0; i < 210; i++) {
data += "X";
}
TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
indexAccessor.insert(tuple);
treeIndex.deactivate();
treeIndex.destroy();
}
use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class AbstractRTreeExamplesTest method rTreePageSplitTestExample.
/**
* This test the rtree page split. Originally this test didn't pass since
* the rtree assumes always that there will be enough space for the new
* tuple after split. Now it passes since if there is not space in the
* designated page, then we will just insert it in the other split page.
*/
@Test
public void rTreePageSplitTestExample() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("RTree page split test.");
}
// Declare fields.
int fieldCount = 5;
ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
typeTraits[0] = IntegerPointable.TYPE_TRAITS;
typeTraits[1] = IntegerPointable.TYPE_TRAITS;
typeTraits[2] = IntegerPointable.TYPE_TRAITS;
typeTraits[3] = IntegerPointable.TYPE_TRAITS;
typeTraits[4] = UTF8StringPointable.TYPE_TRAITS;
// Declare field serdes.
ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() };
// Declare RTree keys.
int rtreeKeyFieldCount = 4;
IBinaryComparatorFactory[] rtreeCmpFactories = new IBinaryComparatorFactory[rtreeKeyFieldCount];
rtreeCmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
rtreeCmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
rtreeCmpFactories[2] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
rtreeCmpFactories[3] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// Declare BTree keys, this will only be used for LSMRTree
int btreeKeyFieldCount;
IBinaryComparatorFactory[] btreeCmpFactories;
int[] btreeFields = null;
if (rTreeType == RTreeType.LSMRTREE) {
//Parameters look different for LSM RTREE from LSM RTREE WITH ANTI MATTER TUPLES
btreeKeyFieldCount = 1;
btreeCmpFactories = new IBinaryComparatorFactory[btreeKeyFieldCount];
btreeCmpFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
btreeFields = new int[btreeKeyFieldCount];
for (int i = 0; i < btreeKeyFieldCount; i++) {
btreeFields[i] = rtreeKeyFieldCount + i;
}
} else {
btreeKeyFieldCount = 5;
btreeCmpFactories = new IBinaryComparatorFactory[btreeKeyFieldCount];
btreeCmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
btreeCmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
btreeCmpFactories[2] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
btreeCmpFactories[3] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
btreeCmpFactories[4] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
}
// create value providers
IPrimitiveValueProviderFactory[] valueProviderFactories = RTreeUtils.createPrimitiveValueProviderFactories(rtreeCmpFactories.length, IntegerPointable.FACTORY);
//2
ITreeIndex treeIndex = createTreeIndex(typeTraits, rtreeCmpFactories, btreeCmpFactories, valueProviderFactories, RTreePolicyType.RTREE, null, btreeFields, null, null, null);
treeIndex.create();
treeIndex.activate();
ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
IIndexAccessor indexAccessor = treeIndex.createAccessor(TestOperationCallback.INSTANCE, TestOperationCallback.INSTANCE);
int p1x = rnd.nextInt();
int p1y = rnd.nextInt();
int p2x = rnd.nextInt();
int p2y = rnd.nextInt();
String data = "";
for (int i = 0; i < 210; i++) {
data += "X";
}
TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
indexAccessor.insert(tuple);
p1x = rnd.nextInt();
p1y = rnd.nextInt();
p2x = rnd.nextInt();
p2y = rnd.nextInt();
data = "XXX";
TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
indexAccessor.insert(tuple);
p1x = rnd.nextInt();
p1y = rnd.nextInt();
p2x = rnd.nextInt();
p2y = rnd.nextInt();
data = "XXX";
TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
indexAccessor.insert(tuple);
p1x = rnd.nextInt();
p1y = rnd.nextInt();
p2x = rnd.nextInt();
p2y = rnd.nextInt();
data = "XXX";
TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
indexAccessor.insert(tuple);
p1x = rnd.nextInt();
p1y = rnd.nextInt();
p2x = rnd.nextInt();
p2y = rnd.nextInt();
data = "";
for (int i = 0; i < 210; i++) {
data += "X";
}
TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
indexAccessor.insert(tuple);
p1x = rnd.nextInt();
p1y = rnd.nextInt();
p2x = rnd.nextInt();
p2y = rnd.nextInt();
data = "";
for (int i = 0; i < 210; i++) {
data += "X";
}
TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
indexAccessor.insert(tuple);
treeIndex.deactivate();
treeIndex.destroy();
}
use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class OrderedIndexTestDriver method oneStringKeyAndValue.
@Test
public void oneStringKeyAndValue() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("BTree " + getTestOpName() + " Test With One String Key And Value.");
}
ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
// Range search in ["cbf", cc7"]
ITupleReference lowKey = TupleUtils.createTuple(fieldSerdes, "cbf");
ITupleReference highKey = TupleUtils.createTuple(fieldSerdes, "cc7");
for (BTreeLeafFrameType leafFrameType : leafFrameTypesToTest) {
runTest(fieldSerdes, 1, leafFrameType, lowKey, highKey, null, null);
}
}
Aggregations