use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class CountOfCountsTest method countOfCountsExternalSortMultiNC.
@Test
public void countOfCountsExternalSortMultiNC() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] splits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "words.txt") };
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, 3, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
InMemorySortOperatorDescriptor sorter2 = new InMemorySortOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter2, NC1_ID, NC2_ID);
RecordDescriptor desc3 = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group2 = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc3);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group2, NC1_ID, NC2_ID);
ResultSetId rsId = new ResultSetId(1);
IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
spec.addResultSetId(rsId);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, sorter, 0);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, sorter, 0, group, 0);
IConnectorDescriptor conn3 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn3, group, 0, sorter2, 0);
IConnectorDescriptor conn4 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn4, sorter2, 0, group2, 0);
IConnectorDescriptor conn5 = new MToNBroadcastConnectorDescriptor(spec);
spec.connect(conn5, group2, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class AggregationTest method singleKeyAvgExtGroupTest.
@Test
public void singleKeyAvgExtGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
int frameLimits = 5;
int tableSize = 8;
long fileSize = frameLimits * spec.getFrameSize();
ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, tableSize, fileSize, keyFields, frameLimits, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new CountFieldAggregatorFactory(false), new AvgFieldGroupAggregatorFactory(1, false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(2, false), new AvgFieldMergeAggregatorFactory(3, false) }), outputRec, outputRec, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }));
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgExtGroupTest");
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class OrderedIndexExamplesTest method deleteExample.
/**
* Deletion Example. Create a BTree with one variable-length key field and
* one variable-length value field. Fill B-tree with random values using
* insertions, then delete entries one-by-one. Repeat procedure a few times
* on same BTree.
*/
@Test
public void deleteExample() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Deletion Example");
}
// Declare fields.
int fieldCount = 2;
ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
typeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
// Declare field serdes.
ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
// Declare keys.
int keyFieldCount = 1;
IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
cmpFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
// This is only used for the LSM-BTree.
int[] bloomFilterKeyFields = new int[keyFieldCount];
bloomFilterKeyFields[0] = 0;
ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields, null, null, null, null);
treeIndex.create();
treeIndex.activate();
ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
IIndexAccessor indexAccessor = treeIndex.createAccessor(TestOperationCallback.INSTANCE, TestOperationCallback.INSTANCE);
// Max string length to be generated.
int runs = 3;
for (int run = 0; run < runs; run++) {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Deletion example run: " + (run + 1) + "/" + runs);
LOGGER.info("Inserting into tree...");
}
int maxLength = 10;
int ins = 10000;
String[] f0s = new String[ins];
String[] f1s = new String[ins];
int insDone = 0;
int[] insDoneCmp = new int[ins];
for (int i = 0; i < ins; i++) {
String f0 = randomString(Math.abs(rnd.nextInt()) % maxLength + 1, rnd);
String f1 = randomString(Math.abs(rnd.nextInt()) % maxLength + 1, rnd);
TupleUtils.createTuple(tb, tuple, fieldSerdes, f0, f1);
f0s[i] = f0;
f1s[i] = f1;
if (LOGGER.isLoggable(Level.INFO)) {
if (i % 1000 == 0) {
LOGGER.info("Inserting " + i);
}
}
try {
indexAccessor.insert(tuple);
insDone++;
} catch (HyracksDataException e) {
if (e.getErrorCode() != ErrorCode.DUPLICATE_KEY) {
throw e;
}
}
insDoneCmp[i] = insDone;
}
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Deleting from tree...");
}
int delDone = 0;
for (int i = 0; i < ins; i++) {
TupleUtils.createTuple(tb, tuple, fieldSerdes, f0s[i], f1s[i]);
if (LOGGER.isLoggable(Level.INFO)) {
if (i % 1000 == 0) {
LOGGER.info("Deleting " + i);
}
}
try {
indexAccessor.delete(tuple);
delDone++;
} catch (HyracksDataException e) {
if (e.getErrorCode() != ErrorCode.UPDATE_OR_DELETE_NON_EXISTENT_KEY) {
throw e;
}
}
if (insDoneCmp[i] != delDone) {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("INCONSISTENT STATE, ERROR IN DELETION EXAMPLE.");
LOGGER.info("INSDONECMP: " + insDoneCmp[i] + " " + delDone);
}
break;
}
}
if (insDone != delDone) {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("ERROR! INSDONE: " + insDone + " DELDONE: " + delDone);
}
break;
}
}
treeIndex.validate();
treeIndex.deactivate();
treeIndex.destroy();
}
use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class OrderedIndexExamplesTest method varLenKeyValueExample.
/**
* Variable-Length Example. Create a BTree with one variable-length key
* field and one variable-length value field. Fill BTree with random values
* using insertions (not bulk load) Perform ordered scans and range search.
*/
@Test
public void varLenKeyValueExample() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Variable-Length Key,Value Example");
}
// Declare fields.
int fieldCount = 2;
ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
typeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
// Declare field serdes.
ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
// Declare keys.
int keyFieldCount = 1;
IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
cmpFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
// This is only used for the LSM-BTree.
int[] bloomFilterKeyFields = new int[keyFieldCount];
bloomFilterKeyFields[0] = 0;
ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields, null, null, null, null);
treeIndex.create();
treeIndex.activate();
long start = System.currentTimeMillis();
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Inserting into tree...");
}
ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
IIndexAccessor indexAccessor = treeIndex.createAccessor(TestOperationCallback.INSTANCE, TestOperationCallback.INSTANCE);
// Max string length to be generated.
int maxLength = 10;
int numInserts = 10000;
for (int i = 0; i < 10000; i++) {
String f0 = randomString(Math.abs(rnd.nextInt()) % maxLength + 1, rnd);
String f1 = randomString(Math.abs(rnd.nextInt()) % maxLength + 1, rnd);
TupleUtils.createTuple(tb, tuple, fieldSerdes, f0, f1);
if (LOGGER.isLoggable(Level.INFO)) {
if (i % 1000 == 0) {
LOGGER.info("Inserting[" + i + "] " + f0 + " " + f1);
}
}
try {
indexAccessor.insert(tuple);
} catch (HyracksDataException e) {
if (e.getErrorCode() != ErrorCode.DUPLICATE_KEY) {
throw e;
}
}
}
long end = System.currentTimeMillis();
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info(numInserts + " inserts in " + (end - start) + "ms");
}
orderedScan(indexAccessor, fieldSerdes);
diskOrderScan(indexAccessor, fieldSerdes);
// Build low key.
ArrayTupleBuilder lowKeyTb = new ArrayTupleBuilder(1);
ArrayTupleReference lowKey = new ArrayTupleReference();
TupleUtils.createTuple(lowKeyTb, lowKey, fieldSerdes, "cbf");
// Build high key.
ArrayTupleBuilder highKeyTb = new ArrayTupleBuilder(1);
ArrayTupleReference highKey = new ArrayTupleReference();
TupleUtils.createTuple(highKeyTb, highKey, fieldSerdes, "cc7");
rangeSearch(cmpFactories, indexAccessor, fieldSerdes, lowKey, highKey, null, null);
treeIndex.validate();
treeIndex.deactivate();
treeIndex.destroy();
}
use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class DataflowTest method testHDFSReadWriteOperators.
/**
* Test a job with only HDFS read and writes.
*
* @throws Exception
*/
public void testHDFSReadWriteOperators() throws Exception {
FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
FileOutputFormat.setOutputPath(conf, new Path(HDFS_OUTPUT_PATH));
conf.setInputFormat(TextInputFormat.class);
Scheduler scheduler = new Scheduler(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT);
InputSplit[] splits = conf.getInputFormat().getSplits(conf, numberOfNC * 4);
String[] readSchedule = scheduler.getLocationConstraints(splits);
JobSpecification jobSpec = new JobSpecification();
RecordDescriptor recordDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
String[] locations = new String[] { HyracksUtils.NC1_ID, HyracksUtils.NC1_ID, HyracksUtils.NC2_ID, HyracksUtils.NC2_ID };
HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(jobSpec, recordDesc, conf, splits, readSchedule, new TextKeyValueParserFactory());
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, readOperator, locations);
ExternalSortOperatorDescriptor sortOperator = new ExternalSortOperatorDescriptor(jobSpec, 10, new int[] { 0 }, new IBinaryComparatorFactory[] { RawBinaryComparatorFactory.INSTANCE }, recordDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, sortOperator, locations);
HDFSWriteOperatorDescriptor writeOperator = new HDFSWriteOperatorDescriptor(jobSpec, conf, new TextTupleWriterFactory());
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, writeOperator, HyracksUtils.NC1_ID);
jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), readOperator, 0, sortOperator, 0);
jobSpec.connect(new MToNPartitioningMergingConnectorDescriptor(jobSpec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { RawBinaryHashFunctionFactory.INSTANCE }), new int[] { 0 }, new IBinaryComparatorFactory[] { RawBinaryComparatorFactory.INSTANCE }, null), sortOperator, 0, writeOperator, 0);
jobSpec.addRoot(writeOperator);
IHyracksClientConnection client = new HyracksConnection(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT);
JobId jobId = client.startJob(jobSpec);
client.waitForCompletion(jobId);
Assert.assertEquals(true, checkResults());
}
Aggregations