use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.
the class JobBuilder method contributeMicroOperator.
@Override
public void contributeMicroOperator(ILogicalOperator op, IPushRuntimeFactory runtime, RecordDescriptor recDesc, AlgebricksPartitionConstraint pc) {
microOps.put(op, new Pair<IPushRuntimeFactory, RecordDescriptor>(runtime, recDesc));
revMicroOpMap.put(runtime, op);
if (pc != null) {
pcForMicroOps.put(op, pc);
}
AbstractLogicalOperator logicalOp = (AbstractLogicalOperator) op;
if (logicalOp.getExecutionMode() == ExecutionMode.UNPARTITIONED && pc == null) {
AlgebricksPartitionConstraint apc = countOneLocation;
pcForMicroOps.put(logicalOp, apc);
}
}
use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.
the class JobGenHelper method mkRecordDescriptor.
@SuppressWarnings("rawtypes")
public static RecordDescriptor mkRecordDescriptor(IVariableTypeEnvironment env, IOperatorSchema opSchema, JobGenContext context) throws AlgebricksException {
ISerializerDeserializer[] fields = new ISerializerDeserializer[opSchema.getSize()];
ITypeTraits[] typeTraits = new ITypeTraits[opSchema.getSize()];
ISerializerDeserializerProvider sdp = context.getSerializerDeserializerProvider();
ITypeTraitProvider ttp = context.getTypeTraitProvider();
int i = 0;
for (LogicalVariable var : opSchema) {
Object t = env.getVarType(var);
if (t == null) {
LOGGER.warning("No type for variable " + var);
}
fields[i] = sdp.getSerializerDeserializer(t);
typeTraits[i] = ttp.getTypeTrait(t);
i++;
}
return new RecordDescriptor(fields, typeTraits);
}
use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.
the class SecondaryIndexSearchExample method createJob.
private static JobSpecification createJob(Options options) throws HyracksDataException {
JobSpecification spec = new JobSpecification(options.frameSize);
String[] splitNCs = options.ncs.split(",");
IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
// schema of tuples coming out of secondary index
RecordDescriptor secondaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
int secondaryFieldCount = 2;
ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
secondaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
secondaryTypeTraits[1] = IntegerPointable.TYPE_TRAITS;
// comparators for sort fields and BTree fields
IBinaryComparatorFactory[] secondaryComparatorFactories = new IBinaryComparatorFactory[2];
secondaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
secondaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// comparators for primary index
IBinaryComparatorFactory[] primaryComparatorFactories = new IBinaryComparatorFactory[1];
primaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// schema of tuples coming out of primary index
RecordDescriptor primaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
int primaryFieldCount = 4;
ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
primaryTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[2] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
// comparators for btree, note that we only need a comparator for the
// non-unique key
// i.e. we will have a range condition on the first field only (implying
// [-infinity, +infinity] for the second field)
IBinaryComparatorFactory[] searchComparatorFactories = new IBinaryComparatorFactory[1];
searchComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
// build tuple containing low and high search keys
// low
ArrayTupleBuilder tb = new ArrayTupleBuilder(searchComparatorFactories.length * 2);
// and
// high
// key
DataOutput dos = tb.getDataOutput();
tb.reset();
// low
new UTF8StringSerializerDeserializer().serialize("0", dos);
// key
tb.addFieldEndOffset();
// high
new UTF8StringSerializerDeserializer().serialize("f", dos);
// key
tb.addFieldEndOffset();
ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
JobHelper.createPartitionConstraint(spec, keyProviderOp, splitNCs);
// low key is in field 0 of tuples
int[] secondaryLowKeyFields = { 0 };
// going into secondary index
// search op
// high key is in field 1 of
int[] secondaryHighKeyFields = { 1 };
// tuples going into secondary
// index search op
IFileSplitProvider secondarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.secondaryBTreeName);
IIndexDataflowHelperFactory secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, secondarySplitProvider);
BTreeSearchOperatorDescriptor secondarySearchOp = new BTreeSearchOperatorDescriptor(spec, secondaryRecDesc, secondaryLowKeyFields, secondaryHighKeyFields, true, true, secondaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
JobHelper.createPartitionConstraint(spec, secondarySearchOp, splitNCs);
// secondary index will output tuples with [UTF8String, Integer]
// the Integer field refers to the key in the primary index of the
// source data records
// low key is in field 0 of tuples
int[] primaryLowKeyFields = { 1 };
// going into primary index search op
// high key is in field 1 of tuples
int[] primaryHighKeyFields = { 1 };
// going into primary index search
// op
IFileSplitProvider primarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.primaryBTreeName);
IIndexDataflowHelperFactory primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
BTreeSearchOperatorDescriptor primarySearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc, primaryLowKeyFields, primaryHighKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
JobHelper.createPartitionConstraint(spec, primarySearchOp, splitNCs);
// have each node print the results of its respective B-Tree
PrinterOperatorDescriptor printer = new PrinterOperatorDescriptor(spec);
JobHelper.createPartitionConstraint(spec, printer, splitNCs);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, secondarySearchOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), secondarySearchOp, 0, primarySearchOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), primarySearchOp, 0, printer, 0);
spec.addRoot(printer);
return spec;
}
use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.
the class DataGenOperatorDescriptor method createPushRuntime.
@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
final FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx));
final RecordDescriptor recDesc = outRecDescs[0];
final ArrayTupleBuilder tb = new ArrayTupleBuilder(recDesc.getFields().length);
final Random rnd = new Random(randomSeed);
final int maxUniqueAttempts = 20;
return new AbstractUnaryOutputSourceOperatorNodePushable() {
// for quick & dirty exclusion of duplicates
// WARNING: could contain numRecord entries and use a lot of memory
HashSet<String> stringHs = new HashSet<String>();
HashSet<Integer> intHs = new HashSet<Integer>();
@Override
public void initialize() throws HyracksDataException {
try {
writer.open();
for (int i = 0; i < numRecords; i++) {
tb.reset();
for (int j = 0; j < recDesc.getFieldCount(); j++) {
genField(tb, j);
}
if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
appender.write(writer, true);
if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
throw new HyracksDataException("Record size (" + tb.getSize() + ") larger than frame size (" + appender.getBuffer().capacity() + ")");
}
}
}
appender.write(writer, true);
} catch (Throwable th) {
writer.fail();
throw new HyracksDataException(th);
} finally {
writer.close();
}
}
private void genField(ArrayTupleBuilder tb, int fieldIndex) throws HyracksDataException {
DataOutput dos = tb.getDataOutput();
if (recDesc.getFields()[fieldIndex] instanceof IntegerSerializerDeserializer) {
int val = -1;
if (fieldIndex == uniqueField) {
int attempt = 0;
while (attempt < maxUniqueAttempts) {
int tmp = Math.abs(rnd.nextInt()) % (intMaxVal - intMinVal) + intMinVal;
if (intHs.contains(tmp))
attempt++;
else {
val = tmp;
intHs.add(val);
break;
}
}
if (attempt == maxUniqueAttempts)
throw new HyracksDataException("MaxUnique attempts reached in datagen");
} else {
val = Math.abs(rnd.nextInt()) % (intMaxVal - intMinVal) + intMinVal;
}
recDesc.getFields()[fieldIndex].serialize(val, dos);
tb.addFieldEndOffset();
} else if (recDesc.getFields()[fieldIndex] instanceof UTF8StringSerializerDeserializer) {
String val = null;
if (fieldIndex == uniqueField) {
int attempt = 0;
while (attempt < maxUniqueAttempts) {
String tmp = randomString(maxStrLen, rnd);
if (stringHs.contains(tmp))
attempt++;
else {
val = tmp;
stringHs.add(val);
break;
}
}
if (attempt == maxUniqueAttempts)
throw new HyracksDataException("MaxUnique attempts reached in datagen");
} else {
val = randomString(maxStrLen, rnd);
}
recDesc.getFields()[fieldIndex].serialize(val, dos);
tb.addFieldEndOffset();
} else {
throw new HyracksDataException("Type unsupported in data generator. Only integers and strings allowed");
}
}
private String randomString(int length, Random random) {
String s = Long.toHexString(Double.doubleToLongBits(random.nextDouble()));
StringBuilder strBuilder = new StringBuilder();
for (int i = 0; i < s.length() && i < length; i++) {
strBuilder.append(s.charAt(Math.abs(random.nextInt()) % s.length()));
}
return strBuilder.toString();
}
};
}
use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.
the class AbstractBTreeOperatorTest method loadPrimaryIndex.
protected void loadPrimaryIndex() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl") };
IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = inputRecordDesc;
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(inputParserFactories, '|'), ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID);
ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, 1000, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID);
int[] fieldPermutation = { 0, 1, 2, 4, 5, 7 };
TreeIndexBulkLoadOperatorDescriptor primaryBtreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, primaryRecDesc, fieldPermutation, 0.7f, true, 1000L, true, primaryHelperFactory);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeBulkLoad, NC1_ID);
NullSinkOperatorDescriptor nsOpDesc = new NullSinkOperatorDescriptor(spec);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, nsOpDesc, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, primaryBtreeBulkLoad, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), primaryBtreeBulkLoad, 0, nsOpDesc, 0);
spec.addRoot(nsOpDesc);
runTest(spec);
}
Aggregations