use of org.apache.hyracks.api.dataflow.value.ITypeTraits in project asterixdb by apache.
the class JobGenHelper method mkRecordDescriptor.
@SuppressWarnings("rawtypes")
public static RecordDescriptor mkRecordDescriptor(IVariableTypeEnvironment env, IOperatorSchema opSchema, JobGenContext context) throws AlgebricksException {
ISerializerDeserializer[] fields = new ISerializerDeserializer[opSchema.getSize()];
ITypeTraits[] typeTraits = new ITypeTraits[opSchema.getSize()];
ISerializerDeserializerProvider sdp = context.getSerializerDeserializerProvider();
ITypeTraitProvider ttp = context.getTypeTraitProvider();
int i = 0;
for (LogicalVariable var : opSchema) {
Object t = env.getVarType(var);
if (t == null) {
LOGGER.warning("No type for variable " + var);
}
fields[i] = sdp.getSerializerDeserializer(t);
typeTraits[i] = ttp.getTypeTrait(t);
i++;
}
return new RecordDescriptor(fields, typeTraits);
}
use of org.apache.hyracks.api.dataflow.value.ITypeTraits in project asterixdb by apache.
the class SecondaryIndexSearchExample method createJob.
private static JobSpecification createJob(Options options) throws HyracksDataException {
JobSpecification spec = new JobSpecification(options.frameSize);
String[] splitNCs = options.ncs.split(",");
IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
// schema of tuples coming out of secondary index
RecordDescriptor secondaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
int secondaryFieldCount = 2;
ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
secondaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
secondaryTypeTraits[1] = IntegerPointable.TYPE_TRAITS;
// comparators for sort fields and BTree fields
IBinaryComparatorFactory[] secondaryComparatorFactories = new IBinaryComparatorFactory[2];
secondaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
secondaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// comparators for primary index
IBinaryComparatorFactory[] primaryComparatorFactories = new IBinaryComparatorFactory[1];
primaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// schema of tuples coming out of primary index
RecordDescriptor primaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
int primaryFieldCount = 4;
ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
primaryTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[2] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
// comparators for btree, note that we only need a comparator for the
// non-unique key
// i.e. we will have a range condition on the first field only (implying
// [-infinity, +infinity] for the second field)
IBinaryComparatorFactory[] searchComparatorFactories = new IBinaryComparatorFactory[1];
searchComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
// build tuple containing low and high search keys
// low
ArrayTupleBuilder tb = new ArrayTupleBuilder(searchComparatorFactories.length * 2);
// and
// high
// key
DataOutput dos = tb.getDataOutput();
tb.reset();
// low
new UTF8StringSerializerDeserializer().serialize("0", dos);
// key
tb.addFieldEndOffset();
// high
new UTF8StringSerializerDeserializer().serialize("f", dos);
// key
tb.addFieldEndOffset();
ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
JobHelper.createPartitionConstraint(spec, keyProviderOp, splitNCs);
// low key is in field 0 of tuples
int[] secondaryLowKeyFields = { 0 };
// going into secondary index
// search op
// high key is in field 1 of
int[] secondaryHighKeyFields = { 1 };
// tuples going into secondary
// index search op
IFileSplitProvider secondarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.secondaryBTreeName);
IIndexDataflowHelperFactory secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, secondarySplitProvider);
BTreeSearchOperatorDescriptor secondarySearchOp = new BTreeSearchOperatorDescriptor(spec, secondaryRecDesc, secondaryLowKeyFields, secondaryHighKeyFields, true, true, secondaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
JobHelper.createPartitionConstraint(spec, secondarySearchOp, splitNCs);
// secondary index will output tuples with [UTF8String, Integer]
// the Integer field refers to the key in the primary index of the
// source data records
// low key is in field 0 of tuples
int[] primaryLowKeyFields = { 1 };
// going into primary index search op
// high key is in field 1 of tuples
int[] primaryHighKeyFields = { 1 };
// going into primary index search
// op
IFileSplitProvider primarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.primaryBTreeName);
IIndexDataflowHelperFactory primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
BTreeSearchOperatorDescriptor primarySearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc, primaryLowKeyFields, primaryHighKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
JobHelper.createPartitionConstraint(spec, primarySearchOp, splitNCs);
// have each node print the results of its respective B-Tree
PrinterOperatorDescriptor printer = new PrinterOperatorDescriptor(spec);
JobHelper.createPartitionConstraint(spec, printer, splitNCs);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, secondarySearchOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), secondarySearchOp, 0, primarySearchOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), primarySearchOp, 0, printer, 0);
spec.addRoot(printer);
return spec;
}
use of org.apache.hyracks.api.dataflow.value.ITypeTraits in project asterixdb by apache.
the class InsertPipelineExample method createJob.
private static JobSpecification createJob(Options options) {
JobSpecification spec = new JobSpecification(options.frameSize);
String[] splitNCs = options.ncs.split(",");
// schema of tuples to be generated: 4 fields with int, string, string,
// string
// we will use field 2 as primary key to fill a clustered index
RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { // this field will not go into B-Tree
new UTF8StringSerializerDeserializer(), // we will use this as payload
new UTF8StringSerializerDeserializer(), // we will use this field as key
IntegerSerializerDeserializer.INSTANCE, // we will use this as payload
IntegerSerializerDeserializer.INSTANCE, // we will use this as payload
new UTF8StringSerializerDeserializer() });
// generate numRecords records with field 2 being unique, integer values
// in [0, 100000], and strings with max length of 10 characters, and
// random seed 100
DataGenOperatorDescriptor dataGen = new DataGenOperatorDescriptor(spec, recDesc, options.numTuples, 2, 0, 100000, 10, 100);
// run data generator on first nodecontroller given
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, dataGen, splitNCs[0]);
IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
// prepare insertion into primary index
// tuples to be put into B-Tree shall have 4 fields
int primaryFieldCount = 4;
ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
primaryTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[2] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
// comparator factories for primary index
IBinaryComparatorFactory[] primaryComparatorFactories = new IBinaryComparatorFactory[1];
primaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// the B-Tree expects its keyfields to be at the front of its input
// tuple
// map field 2 of input
int[] primaryFieldPermutation = { 2, 1, 3, 4 };
// tuple to field 0 of
// B-Tree tuple, etc.
IFileSplitProvider primarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.primaryBTreeName);
IIndexDataflowHelperFactory primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
// create operator descriptor
TreeIndexInsertUpdateDeleteOperatorDescriptor primaryInsert = new TreeIndexInsertUpdateDeleteOperatorDescriptor(spec, recDesc, primaryFieldPermutation, IndexOperation.INSERT, primaryHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
JobHelper.createPartitionConstraint(spec, primaryInsert, splitNCs);
// prepare insertion into secondary index
// tuples to be put into B-Tree shall have 2 fields
int secondaryFieldCount = 2;
ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
secondaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
secondaryTypeTraits[1] = IntegerPointable.TYPE_TRAITS;
// comparator factories for secondary index
IBinaryComparatorFactory[] secondaryComparatorFactories = new IBinaryComparatorFactory[2];
secondaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
secondaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// the B-Tree expects its keyfields to be at the front of its input
// tuple
int[] secondaryFieldPermutation = { 1, 2 };
IFileSplitProvider secondarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.secondaryBTreeName);
IIndexDataflowHelperFactory secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, secondarySplitProvider);
// create operator descriptor
TreeIndexInsertUpdateDeleteOperatorDescriptor secondaryInsert = new TreeIndexInsertUpdateDeleteOperatorDescriptor(spec, recDesc, secondaryFieldPermutation, IndexOperation.INSERT, secondaryHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
JobHelper.createPartitionConstraint(spec, secondaryInsert, splitNCs);
// end the insert pipeline at this sink operator
NullSinkOperatorDescriptor nullSink = new NullSinkOperatorDescriptor(spec);
JobHelper.createPartitionConstraint(spec, nullSink, splitNCs);
// distribute the records from the datagen via hashing to the bulk load
// ops
IBinaryHashFunctionFactory[] hashFactories = new IBinaryHashFunctionFactory[1];
hashFactories[0] = PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY);
IConnectorDescriptor hashConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, hashFactories));
// connect the ops
spec.connect(hashConn, dataGen, 0, primaryInsert, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), primaryInsert, 0, secondaryInsert, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), secondaryInsert, 0, nullSink, 0);
spec.addRoot(nullSink);
return spec;
}
use of org.apache.hyracks.api.dataflow.value.ITypeTraits in project asterixdb by apache.
the class PrimaryIndexBulkLoadExample method createJob.
private static JobSpecification createJob(Options options) {
JobSpecification spec = new JobSpecification(options.frameSize);
String[] splitNCs = options.ncs.split(",");
// schema of tuples to be generated: 5 fields with string, string, int,
// int, string
// we will use field-index 2 as primary key to fill a clustered index
RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { // this field will not go into B-Tree
new UTF8StringSerializerDeserializer(), // we will use this as payload
new UTF8StringSerializerDeserializer(), // we will use this field as key
IntegerSerializerDeserializer.INSTANCE, // we will use this as payload
IntegerSerializerDeserializer.INSTANCE, // we will use this as payload
new UTF8StringSerializerDeserializer() });
// generate numRecords records with field 2 being unique, integer values
// in [0, 100000], and strings with max length of 10 characters, and
// random seed 50
DataGenOperatorDescriptor dataGen = new DataGenOperatorDescriptor(spec, recDesc, options.numTuples, 2, 0, 100000, 10, 50);
// run data generator on first nodecontroller given
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, dataGen, splitNCs[0]);
// sort the tuples as preparation for bulk load
// fields to sort on
int[] sortFields = { 2 };
// comparators for sort fields
IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[1];
comparatorFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, options.sbSize, sortFields, comparatorFactories, recDesc);
JobHelper.createPartitionConstraint(spec, sorter, splitNCs);
// tuples to be put into B-Tree shall have 4 fields
int fieldCount = 4;
ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
typeTraits[0] = IntegerPointable.TYPE_TRAITS;
typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
typeTraits[2] = IntegerPointable.TYPE_TRAITS;
typeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
// create providers for B-Tree
IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
// the B-Tree expects its keyfields to be at the front of its input
// tuple
// map field 2 of input tuple
int[] fieldPermutation = { 2, 1, 3, 4 };
// to field 0 of B-Tree tuple,
// etc.
IFileSplitProvider btreeSplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.btreeName);
IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(storageManager, btreeSplitProvider);
TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, recDesc, fieldPermutation, 0.7f, false, 1000L, true, dataflowHelperFactory);
JobHelper.createPartitionConstraint(spec, btreeBulkLoad, splitNCs);
// distribute the records from the datagen via hashing to the bulk load
// ops
IBinaryHashFunctionFactory[] hashFactories = new IBinaryHashFunctionFactory[1];
hashFactories[0] = PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY);
IConnectorDescriptor hashConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, hashFactories));
NullSinkOperatorDescriptor nsOpDesc = new NullSinkOperatorDescriptor(spec);
JobHelper.createPartitionConstraint(spec, nsOpDesc, splitNCs);
spec.connect(hashConn, dataGen, 0, sorter, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, btreeBulkLoad, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), btreeBulkLoad, 0, nsOpDesc, 0);
spec.addRoot(nsOpDesc);
return spec;
}
use of org.apache.hyracks.api.dataflow.value.ITypeTraits in project asterixdb by apache.
the class LSMBTreeUtil method createExternalBTreeWithBuddy.
public static ExternalBTreeWithBuddy createExternalBTreeWithBuddy(IIOManager ioManager, FileReference file, IBufferCache diskBufferCache, IFileMapProvider diskFileMapProvider, ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories, double bloomFilterFalsePositiveRate, ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallback ioOpCallback, int[] buddyBTreeFields, boolean durable, IMetadataPageManagerFactory freePageManagerFactory) {
ITypeTraits[] buddyBtreeTypeTraits = new ITypeTraits[buddyBTreeFields.length];
IBinaryComparatorFactory[] buddyBtreeCmpFactories = new IBinaryComparatorFactory[buddyBTreeFields.length];
for (int i = 0; i < buddyBtreeTypeTraits.length; i++) {
buddyBtreeTypeTraits[i] = typeTraits[buddyBTreeFields[i]];
buddyBtreeCmpFactories[i] = cmpFactories[buddyBTreeFields[i]];
}
TypeAwareTupleWriterFactory buddyBtreeTupleWriterFactory = new TypeAwareTupleWriterFactory(buddyBtreeTypeTraits);
ITreeIndexFrameFactory buddyBtreeInteriorFrameFactory = new BTreeNSMInteriorFrameFactory(buddyBtreeTupleWriterFactory);
ITreeIndexFrameFactory buddyBtreeLeafFrameFactory = new BTreeNSMLeafFrameFactory(buddyBtreeTupleWriterFactory);
LSMBTreeTupleWriterFactory insertTupleWriterFactory = new LSMBTreeTupleWriterFactory(typeTraits, cmpFactories.length, false);
LSMBTreeCopyTupleWriterFactory copyTupleWriterFactory = new LSMBTreeCopyTupleWriterFactory(typeTraits, cmpFactories.length);
ITreeIndexFrameFactory insertLeafFrameFactory = new BTreeNSMLeafFrameFactory(insertTupleWriterFactory);
ITreeIndexFrameFactory copyTupleLeafFrameFactory = new BTreeNSMLeafFrameFactory(copyTupleWriterFactory);
ITreeIndexFrameFactory interiorFrameFactory = new BTreeNSMInteriorFrameFactory(insertTupleWriterFactory);
TreeIndexFactory<BTree> diskBTreeFactory = new BTreeFactory(ioManager, diskBufferCache, diskFileMapProvider, freePageManagerFactory, interiorFrameFactory, copyTupleLeafFrameFactory, cmpFactories, typeTraits.length);
TreeIndexFactory<BTree> bulkLoadBTreeFactory = new BTreeFactory(ioManager, diskBufferCache, diskFileMapProvider, freePageManagerFactory, interiorFrameFactory, insertLeafFrameFactory, cmpFactories, typeTraits.length);
int[] bloomFilterKeyFields = new int[buddyBtreeCmpFactories.length];
for (int i = 0; i < buddyBtreeCmpFactories.length; i++) {
bloomFilterKeyFields[i] = i;
}
BloomFilterFactory bloomFilterFactory = new BloomFilterFactory(diskBufferCache, diskFileMapProvider, bloomFilterKeyFields);
// buddy b-tree factory
TreeIndexFactory<BTree> diskBuddyBTreeFactory = new BTreeFactory(ioManager, diskBufferCache, diskFileMapProvider, freePageManagerFactory, buddyBtreeInteriorFrameFactory, buddyBtreeLeafFrameFactory, buddyBtreeCmpFactories, buddyBtreeTypeTraits.length);
ILSMIndexFileManager fileNameManager = new LSMBTreeWithBuddyFileManager(ioManager, diskFileMapProvider, file, diskBTreeFactory, diskBuddyBTreeFactory);
// the disk only index uses an empty ArrayList for virtual buffer caches
ExternalBTreeWithBuddy lsmTree = new ExternalBTreeWithBuddy(ioManager, interiorFrameFactory, insertLeafFrameFactory, buddyBtreeLeafFrameFactory, diskBufferCache, fileNameManager, bulkLoadBTreeFactory, diskBTreeFactory, diskBuddyBTreeFactory, bloomFilterFactory, diskFileMapProvider, bloomFilterFalsePositiveRate, mergePolicy, opTracker, ioScheduler, ioOpCallback, cmpFactories, buddyBtreeCmpFactories, buddyBTreeFields, durable);
return lsmTree;
}
Aggregations