use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.
the class RebalanceUtil method createPrimaryIndexUpsertOp.
// Creates the primary index upsert operator for populating the target dataset.
private static IOperatorDescriptor createPrimaryIndexUpsertOp(JobSpecification spec, MetadataProvider metadataProvider, Dataset source, Dataset target) throws AlgebricksException {
int numKeys = source.getPrimaryKeys().size();
int numValues = source.hasMetaPart() ? 2 : 1;
int[] fieldPermutation = IntStream.range(0, numKeys + numValues).toArray();
Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> upsertOpAndConstraints = DatasetUtil.createPrimaryIndexUpsertOp(spec, metadataProvider, target, source.getPrimaryRecordDescriptor(metadataProvider), fieldPermutation, MissingWriterFactory.INSTANCE);
IOperatorDescriptor upsertOp = upsertOpAndConstraints.first;
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, upsertOp, upsertOpAndConstraints.second);
return upsertOp;
}
use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.
the class TokenizePOperator method contributeRuntimeOperator.
@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
TokenizeOperator tokenizeOp = (TokenizeOperator) op;
if (tokenizeOp.getOperation() != Kind.INSERT || !tokenizeOp.isBulkload()) {
throw new AlgebricksException("Tokenize Operator only works when bulk-loading data.");
}
IMetadataProvider mp = context.getMetadataProvider();
IVariableTypeEnvironment typeEnv = context.getTypeEnvironment(op);
JobSpecification spec = builder.getJobSpec();
RecordDescriptor inputDesc = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op.getInputs().get(0).getValue()), inputSchemas[0], context);
Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> runtimeAndConstraints = mp.getTokenizerRuntime(dataSourceIndex, propagatedSchema, inputSchemas, typeEnv, primaryKeys, secondaryKeys, null, inputDesc, context, spec, true);
builder.contributeHyracksOperator(tokenizeOp, runtimeAndConstraints.first);
builder.contributeAlgebricksPartitionConstraint(runtimeAndConstraints.first, runtimeAndConstraints.second);
ILogicalOperator src = tokenizeOp.getInputs().get(0).getValue();
builder.contributeGraphEdge(src, 0, tokenizeOp, 0);
}
use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.
the class JobBuilder method setPartitionConstraintsBottomup.
private void setPartitionConstraintsBottomup(OperatorDescriptorId opId, Map<IConnectorDescriptor, TargetConstraint> tgtConstraints, IOperatorDescriptor parentOp, boolean finalPass) {
List<IConnectorDescriptor> opInputs = jobSpec.getOperatorInputMap().get(opId);
AlgebricksPartitionConstraint opConstraint = null;
IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(opId);
if (opInputs != null) {
for (IConnectorDescriptor conn : opInputs) {
ConnectorDescriptorId cid = conn.getConnectorId();
org.apache.commons.lang3.tuple.Pair<org.apache.commons.lang3.tuple.Pair<IOperatorDescriptor, Integer>, org.apache.commons.lang3.tuple.Pair<IOperatorDescriptor, Integer>> p = jobSpec.getConnectorOperatorMap().get(cid);
IOperatorDescriptor src = p.getLeft().getLeft();
// Pre-order DFS
setPartitionConstraintsBottomup(src.getOperatorId(), tgtConstraints, opDesc, finalPass);
TargetConstraint constraint = tgtConstraints.get(conn);
if (constraint != null) {
switch(constraint) {
case ONE:
opConstraint = countOneLocation;
break;
case SAME_COUNT:
opConstraint = partitionConstraintMap.get(src);
break;
}
}
}
}
if (partitionConstraintMap.get(opDesc) == null) {
if (finalPass && opConstraint == null && (opInputs == null || opInputs.isEmpty())) {
opConstraint = countOneLocation;
}
if (finalPass && opConstraint == null) {
opConstraint = clusterLocations;
}
// Sets up the location constraint.
if (opConstraint != null) {
partitionConstraintMap.put(opDesc, opConstraint);
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(jobSpec, opDesc, opConstraint);
}
}
}
use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.
the class JobBuilder method setupConnectors.
private Map<IConnectorDescriptor, TargetConstraint> setupConnectors() throws AlgebricksException {
Map<IConnectorDescriptor, TargetConstraint> tgtConstraints = new HashMap<>();
for (ILogicalOperator exchg : connectors.keySet()) {
ILogicalOperator inOp = inEdges.get(exchg).get(0);
ILogicalOperator outOp = outEdges.get(exchg).get(0);
IOperatorDescriptor inOpDesc = findOpDescForAlgebraicOp(inOp);
IOperatorDescriptor outOpDesc = findOpDescForAlgebraicOp(outOp);
Pair<IConnectorDescriptor, TargetConstraint> connPair = connectors.get(exchg);
IConnectorDescriptor conn = connPair.first;
int producerPort = outEdges.get(inOp).indexOf(exchg);
int consumerPort = inEdges.get(outOp).indexOf(exchg);
jobSpec.connect(conn, inOpDesc, producerPort, outOpDesc, consumerPort);
if (connPair.second != null) {
tgtConstraints.put(conn, connPair.second);
}
}
return tgtConstraints;
}
use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.
the class BTreePrimaryIndexScanOperatorTest method scanPrimaryIndexTest.
@Test
public void scanPrimaryIndexTest() throws Exception {
JobSpecification spec = new JobSpecification();
// build dummy tuple containing nothing
ArrayTupleBuilder tb = new ArrayTupleBuilder(DataSetConstants.primaryKeyFieldCount * 2);
DataOutput dos = tb.getDataOutput();
tb.reset();
new UTF8StringSerializerDeserializer().serialize("0", dos);
tb.addFieldEndOffset();
ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, keyProviderOp, NC1_ID);
// - infinity
int[] lowKeyFields = null;
// + infinity
int[] highKeyFields = null;
BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, DataSetConstants.primaryRecDesc, lowKeyFields, highKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { createFile(nc1) });
IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryBtreeSearchOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), primaryBtreeSearchOp, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
Aggregations