use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class ConnectorDescriptorWithMessagingTest method testMessageLargerThanEmptyFrame.
@Test
public void testMessageLargerThanEmptyFrame() throws Exception {
try {
List<Integer> routing = Arrays.asList(0, 1, 2, 3, 4);
IConnectorDescriptorRegistry connDescRegistry = Mockito.mock(IConnectorDescriptorRegistry.class);
ITuplePartitionComputerFactory partitionComputerFactory = new TestPartitionComputerFactory(routing);
MToNPartitioningWithMessageConnectorDescriptor connector = new MToNPartitioningWithMessageConnectorDescriptor(connDescRegistry, partitionComputerFactory);
IHyracksTaskContext ctx = TestUtils.create(DEFAULT_FRAME_SIZE);
VSizeFrame message = new VSizeFrame(ctx);
VSizeFrame tempBuffer = new VSizeFrame(ctx);
TaskUtil.putInSharedMap(HyracksConstants.KEY_MESSAGE, message, ctx);
writeRandomMessage(message, MessagingFrameTupleAppender.MARKER_MESSAGE, DEFAULT_FRAME_SIZE + 1);
ISerializerDeserializer<?>[] serdes = new ISerializerDeserializer<?>[] { Integer64SerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE, BooleanSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() };
RecordDescriptor rDesc = new RecordDescriptor(serdes);
TestPartitionWriterFactory partitionWriterFactory = new TestPartitionWriterFactory();
IFrameWriter partitioner = connector.createPartitioner(ctx, rDesc, partitionWriterFactory, CURRENT_PRODUCER, NUMBER_OF_CONSUMERS, NUMBER_OF_CONSUMERS);
partitioner.open();
FrameTupleAccessor fta = new FrameTupleAccessor(rDesc);
List<TestFrameWriter> recipients = new ArrayList<>();
for (IFrameWriter writer : partitionWriterFactory.getWriters().values()) {
recipients.add((TestFrameWriter) writer);
}
partitioner.flush();
for (TestFrameWriter writer : recipients) {
Assert.assertEquals(writer.nextFrameCount(), 1);
fta.reset(writer.getLastFrame());
Assert.assertEquals(fta.getTupleCount(), 1);
FeedUtils.processFeedMessage(writer.getLastFrame(), tempBuffer, fta);
Assert.assertEquals(MessagingFrameTupleAppender.MARKER_MESSAGE, MessagingFrameTupleAppender.getMessageType(tempBuffer));
}
message.getBuffer().clear();
message.getBuffer().put(MessagingFrameTupleAppender.ACK_REQ_FEED_MESSAGE);
message.getBuffer().flip();
partitioner.flush();
for (TestFrameWriter writer : recipients) {
Assert.assertEquals(writer.nextFrameCount(), 2);
fta.reset(writer.getLastFrame());
Assert.assertEquals(fta.getTupleCount(), 1);
FeedUtils.processFeedMessage(writer.getLastFrame(), tempBuffer, fta);
Assert.assertEquals(MessagingFrameTupleAppender.ACK_REQ_FEED_MESSAGE, MessagingFrameTupleAppender.getMessageType(tempBuffer));
}
message.getBuffer().clear();
message.getBuffer().put(MessagingFrameTupleAppender.NULL_FEED_MESSAGE);
message.getBuffer().flip();
partitioner.flush();
for (TestFrameWriter writer : recipients) {
Assert.assertEquals(writer.nextFrameCount(), 3);
fta.reset(writer.getLastFrame());
Assert.assertEquals(fta.getTupleCount(), 1);
FeedUtils.processFeedMessage(writer.getLastFrame(), tempBuffer, fta);
Assert.assertEquals(MessagingFrameTupleAppender.NULL_FEED_MESSAGE, MessagingFrameTupleAppender.getMessageType(tempBuffer));
}
partitioner.close();
for (TestFrameWriter writer : recipients) {
Assert.assertEquals(writer.nextFrameCount(), 4);
Assert.assertEquals(writer.closeCount(), 1);
}
} catch (Throwable th) {
th.printStackTrace();
throw th;
}
}
use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class ConnectorDescriptorWithMessagingTest method testMessageLargerThanSome.
@Test
public void testMessageLargerThanSome() throws Exception {
try {
// Routing will be to 1, 3, and 4 only. 0 and 2 will receive no tuples
List<Integer> routing = Arrays.asList(1, 3, 4);
IConnectorDescriptorRegistry connDescRegistry = Mockito.mock(IConnectorDescriptorRegistry.class);
ITuplePartitionComputerFactory partitionComputerFactory = new TestPartitionComputerFactory(routing);
MToNPartitioningWithMessageConnectorDescriptor connector = new MToNPartitioningWithMessageConnectorDescriptor(connDescRegistry, partitionComputerFactory);
IHyracksTaskContext ctx = TestUtils.create(DEFAULT_FRAME_SIZE);
VSizeFrame message = new VSizeFrame(ctx);
VSizeFrame tempBuffer = new VSizeFrame(ctx);
TaskUtil.putInSharedMap(HyracksConstants.KEY_MESSAGE, message, ctx);
message.getBuffer().clear();
writeRandomMessage(message, MessagingFrameTupleAppender.MARKER_MESSAGE, DEFAULT_FRAME_SIZE);
ISerializerDeserializer<?>[] serdes = new ISerializerDeserializer<?>[] { Integer64SerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE, BooleanSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() };
FieldType[] types = { FieldType.Integer64, FieldType.Double, FieldType.Boolean, FieldType.String };
RecordDescriptor rDesc = new RecordDescriptor(serdes);
TestPartitionWriterFactory partitionWriterFactory = new TestPartitionWriterFactory();
PartitionWithMessageDataWriter partitioner = (PartitionWithMessageDataWriter) connector.createPartitioner(ctx, rDesc, partitionWriterFactory, CURRENT_PRODUCER, NUMBER_OF_CONSUMERS, NUMBER_OF_CONSUMERS);
partitioner.open();
FrameTupleAccessor fta = new FrameTupleAccessor(rDesc);
List<TestFrameWriter> recipients = new ArrayList<>();
for (int i = 0; i < partitionWriterFactory.getWriters().values().size(); i++) {
recipients.add(partitionWriterFactory.getWriters().get(i));
}
TestTupleGenerator ttg = new TestTupleGenerator(types, STRING_FIELD_SIZES, true);
VSizeFrame frame = new VSizeFrame(ctx);
FrameTupleAppender appender = new FrameTupleAppender(frame);
ITupleReference tuple = ttg.next();
while (appender.append(tuple)) {
tuple = ttg.next();
}
partitioner.nextFrame(frame.getBuffer());
partitioner.flush();
Assert.assertEquals(1, partitionWriterFactory.getWriters().get(0).nextFrameCount());
Assert.assertEquals(2, partitionWriterFactory.getWriters().get(1).nextFrameCount());
Assert.assertEquals(1, partitionWriterFactory.getWriters().get(2).nextFrameCount());
Assert.assertEquals(2, partitionWriterFactory.getWriters().get(3).nextFrameCount());
Assert.assertEquals(2, partitionWriterFactory.getWriters().get(4).nextFrameCount());
for (TestFrameWriter writer : recipients) {
fta.reset(writer.getLastFrame());
Assert.assertEquals(fta.getTupleCount(), 1);
FeedUtils.processFeedMessage(writer.getLastFrame(), tempBuffer, fta);
Assert.assertEquals(MessagingFrameTupleAppender.MARKER_MESSAGE, MessagingFrameTupleAppender.getMessageType(tempBuffer));
}
partitioner.close();
} catch (Throwable th) {
th.printStackTrace();
throw th;
}
}
use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class SecondaryIndexBulkLoadExample method createJob.
private static JobSpecification createJob(Options options) {
JobSpecification spec = new JobSpecification(options.frameSize);
String[] splitNCs = options.ncs.split(",");
IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
// schema of tuples that we are retrieving from the primary index
RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { // we will use this as payload in secondary index
IntegerSerializerDeserializer.INSTANCE, // we will use this ask key in secondary index
new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
int primaryFieldCount = 4;
ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
primaryTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[2] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
// comparators for sort fields and BTree fields
IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[2];
comparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
comparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// use a disk-order scan to read primary index
IFileSplitProvider primarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.primaryBTreeName);
IIndexDataflowHelperFactory primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
TreeIndexDiskOrderScanOperatorDescriptor btreeScanOp = new TreeIndexDiskOrderScanOperatorDescriptor(spec, recDesc, primaryHelperFactory, NoOpOperationCallbackFactory.INSTANCE);
JobHelper.createPartitionConstraint(spec, btreeScanOp, splitNCs);
// sort the tuples as preparation for bulk load into secondary index
// fields to sort on
int[] sortFields = { 1, 0 };
ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, options.sbSize, sortFields, comparatorFactories, recDesc);
JobHelper.createPartitionConstraint(spec, sorter, splitNCs);
// tuples to be put into B-Tree shall have 2 fields
int secondaryFieldCount = 2;
ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
secondaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
secondaryTypeTraits[1] = IntegerPointable.TYPE_TRAITS;
// the B-Tree expects its keyfields to be at the front of its input
// tuple
int[] fieldPermutation = { 1, 0 };
IFileSplitProvider btreeSplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.secondaryBTreeName);
IIndexDataflowHelperFactory secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, btreeSplitProvider);
TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, null, fieldPermutation, 0.7f, false, 1000L, true, secondaryHelperFactory);
JobHelper.createPartitionConstraint(spec, btreeBulkLoad, splitNCs);
NullSinkOperatorDescriptor nsOpDesc = new NullSinkOperatorDescriptor(spec);
JobHelper.createPartitionConstraint(spec, nsOpDesc, splitNCs);
// connect the ops
spec.connect(new OneToOneConnectorDescriptor(spec), btreeScanOp, 0, sorter, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, btreeBulkLoad, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), btreeBulkLoad, 0, nsOpDesc, 0);
spec.addRoot(nsOpDesc);
return spec;
}
use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class AbstractBTreeOperatorTest method loadSecondaryIndex.
protected void loadSecondaryIndex() throws Exception {
JobSpecification spec = new JobSpecification();
// build dummy tuple containing nothing
ArrayTupleBuilder tb = new ArrayTupleBuilder(primaryKeyFieldCount * 2);
DataOutput dos = tb.getDataOutput();
tb.reset();
new UTF8StringSerializerDeserializer().serialize("0", dos);
tb.addFieldEndOffset();
RecordDescriptor keyRecDesc = secondaryRecDesc;
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, keyProviderOp, NC1_ID);
// - infinity
int[] lowKeyFields = null;
// + infinity
int[] highKeyFields = null;
// scan primary index
BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc, lowKeyFields, highKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
// sort based on secondary keys
ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, 1000, secondaryFieldPermutationA, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, primaryRecDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID);
// load secondary index
int[] fieldPermutation = { 3, 0 };
TreeIndexBulkLoadOperatorDescriptor secondaryBtreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, secondaryRecDesc, fieldPermutation, 0.7f, true, 1000L, true, secondaryHelperFactory);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryBtreeBulkLoad, NC1_ID);
NullSinkOperatorDescriptor nsOpDesc = new NullSinkOperatorDescriptor(spec);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, nsOpDesc, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryBtreeSearchOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), primaryBtreeSearchOp, 0, sorter, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, secondaryBtreeBulkLoad, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBtreeBulkLoad, 0, nsOpDesc, 0);
spec.addRoot(nsOpDesc);
runTest(spec);
}
use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class BTreePrimaryIndexScanOperatorTest method scanPrimaryIndexTest.
@Test
public void scanPrimaryIndexTest() throws Exception {
JobSpecification spec = new JobSpecification();
// build dummy tuple containing nothing
ArrayTupleBuilder tb = new ArrayTupleBuilder(DataSetConstants.primaryKeyFieldCount * 2);
DataOutput dos = tb.getDataOutput();
tb.reset();
new UTF8StringSerializerDeserializer().serialize("0", dos);
tb.addFieldEndOffset();
ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, keyProviderOp, NC1_ID);
// - infinity
int[] lowKeyFields = null;
// + infinity
int[] highKeyFields = null;
BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, DataSetConstants.primaryRecDesc, lowKeyFields, highKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { createFile(nc1) });
IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryBtreeSearchOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), primaryBtreeSearchOp, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
Aggregations