use of org.apache.hyracks.api.job.IOperatorDescriptorRegistry in project asterixdb by apache.
the class SortGroupByPOperator method contributeRuntimeOperator.
@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
List<LogicalVariable> gbyCols = getGbyColumns();
int[] keys = JobGenHelper.variablesToFieldIndexes(gbyCols, inputSchemas[0]);
GroupByOperator gby = (GroupByOperator) op;
int numFds = gby.getDecorList().size();
int[] fdColumns = new int[numFds];
int j = 0;
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getDecorList()) {
ILogicalExpression expr = p.second.getValue();
if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
throw new AlgebricksException("Sort group-by expects variable references.");
}
VariableReferenceExpression v = (VariableReferenceExpression) expr;
LogicalVariable decor = v.getVariableReference();
fdColumns[j++] = inputSchemas[0].findVariable(decor);
}
if (gby.getNestedPlans().size() != 1) {
throw new AlgebricksException("Sort group-by currently works only for one nested plan with one root containing" + "an aggregate and a nested-tuple-source.");
}
ILogicalPlan p0 = gby.getNestedPlans().get(0);
if (p0.getRoots().size() != 1) {
throw new AlgebricksException("Sort group-by currently works only for one nested plan with one root containing" + "an aggregate and a nested-tuple-source.");
}
Mutable<ILogicalOperator> r0 = p0.getRoots().get(0);
AggregateOperator aggOp = (AggregateOperator) r0.getValue();
IPartialAggregationTypeComputer partialAggregationTypeComputer = context.getPartialAggregationTypeComputer();
List<Object> intermediateTypes = new ArrayList<Object>();
int n = aggOp.getExpressions().size();
IAggregateEvaluatorFactory[] aff = new IAggregateEvaluatorFactory[n];
int i = 0;
IExpressionRuntimeProvider expressionRuntimeProvider = context.getExpressionRuntimeProvider();
IVariableTypeEnvironment aggOpInputEnv = context.getTypeEnvironment(aggOp.getInputs().get(0).getValue());
IVariableTypeEnvironment outputEnv = context.getTypeEnvironment(op);
for (Mutable<ILogicalExpression> exprRef : aggOp.getExpressions()) {
AggregateFunctionCallExpression aggFun = (AggregateFunctionCallExpression) exprRef.getValue();
aff[i++] = expressionRuntimeProvider.createAggregateFunctionFactory(aggFun, aggOpInputEnv, inputSchemas, context);
intermediateTypes.add(partialAggregationTypeComputer.getType(aggFun, aggOpInputEnv, context.getMetadataProvider()));
}
int[] keyAndDecFields = new int[keys.length + fdColumns.length];
for (i = 0; i < keys.length; ++i) {
keyAndDecFields[i] = keys[i];
}
for (i = 0; i < fdColumns.length; i++) {
keyAndDecFields[keys.length + i] = fdColumns[i];
}
List<LogicalVariable> keyAndDecVariables = new ArrayList<LogicalVariable>();
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getGroupByList()) {
keyAndDecVariables.add(p.first);
}
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getDecorList()) {
keyAndDecVariables.add(GroupByOperator.getDecorVariable(p));
}
for (LogicalVariable var : keyAndDecVariables) {
aggOpInputEnv.setVarType(var, outputEnv.getVarType(var));
}
compileSubplans(inputSchemas[0], gby, opSchema, context);
IOperatorDescriptorRegistry spec = builder.getJobSpec();
IBinaryComparatorFactory[] compFactories = new IBinaryComparatorFactory[gbyCols.size()];
IBinaryComparatorFactoryProvider bcfProvider = context.getBinaryComparatorFactoryProvider();
i = 0;
for (LogicalVariable v : gbyCols) {
Object type = aggOpInputEnv.getVarType(v);
if (orderColumns[i].getOrder() == OrderKind.ASC) {
compFactories[i] = bcfProvider.getBinaryComparatorFactory(type, true);
} else {
compFactories[i] = bcfProvider.getBinaryComparatorFactory(type, false);
}
i++;
}
RecordDescriptor recordDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), opSchema, context);
IAggregateEvaluatorFactory[] merges = new IAggregateEvaluatorFactory[n];
List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
IOperatorSchema[] localInputSchemas = new IOperatorSchema[1];
localInputSchemas[0] = new OperatorSchemaImpl();
for (i = 0; i < n; i++) {
AggregateFunctionCallExpression aggFun = (AggregateFunctionCallExpression) aggOp.getMergeExpressions().get(i).getValue();
aggFun.getUsedVariables(usedVars);
}
i = 0;
for (Object type : intermediateTypes) {
aggOpInputEnv.setVarType(usedVars.get(i++), type);
}
for (LogicalVariable keyVar : keyAndDecVariables) {
localInputSchemas[0].addVariable(keyVar);
}
for (LogicalVariable usedVar : usedVars) {
localInputSchemas[0].addVariable(usedVar);
}
for (i = 0; i < n; i++) {
AggregateFunctionCallExpression mergeFun = (AggregateFunctionCallExpression) aggOp.getMergeExpressions().get(i).getValue();
merges[i] = expressionRuntimeProvider.createAggregateFunctionFactory(mergeFun, aggOpInputEnv, localInputSchemas, context);
}
RecordDescriptor partialAggRecordDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), localInputSchemas[0], context);
IAggregatorDescriptorFactory aggregatorFactory = new SimpleAlgebricksAccumulatingAggregatorFactory(aff, keyAndDecFields);
IAggregatorDescriptorFactory mergeFactory = new SimpleAlgebricksAccumulatingAggregatorFactory(merges, keyAndDecFields);
INormalizedKeyComputerFactory normalizedKeyFactory = null;
INormalizedKeyComputerFactoryProvider nkcfProvider = context.getNormalizedKeyComputerFactoryProvider();
if (nkcfProvider == null) {
normalizedKeyFactory = null;
}
Object type = aggOpInputEnv.getVarType(gbyCols.get(0));
normalizedKeyFactory = orderColumns[0].getOrder() == OrderKind.ASC ? nkcfProvider.getNormalizedKeyComputerFactory(type, true) : nkcfProvider.getNormalizedKeyComputerFactory(type, false);
SortGroupByOperatorDescriptor gbyOpDesc = new SortGroupByOperatorDescriptor(spec, frameLimit, keys, keyAndDecFields, normalizedKeyFactory, compFactories, aggregatorFactory, mergeFactory, partialAggRecordDescriptor, recordDescriptor, false);
contributeOpDesc(builder, gby, gbyOpDesc);
ILogicalOperator src = op.getInputs().get(0).getValue();
builder.contributeGraphEdge(src, 0, op, 0);
}
use of org.apache.hyracks.api.job.IOperatorDescriptorRegistry in project asterixdb by apache.
the class ExternalGroupOperatorDescriptorTest method testCalculateGroupByTableCardinality.
@Test
public void testCalculateGroupByTableCardinality() throws Exception {
// Sets a dummy variable.
IOperatorDescriptorRegistry spec = new JobSpecification(32768);
ExternalGroupOperatorDescriptor eGByOp = new ExternalGroupOperatorDescriptor(spec, 0, 0, null, 4, null, null, null, null, null, null, null);
// Test 1: compiler.groupmemory: 512 bytes, frame size: 256 bytes, with 1 column group-by
long memoryBudgetInBytes = 512;
int numberOfGroupByColumns = 1;
int frameSize = 256;
int resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)", memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
Assert.assertTrue(resultCardinality == 10);
// Sets the frame size to 128KB.
frameSize = 128 * 1024;
// Test 2: memory size: 1 MB, frame size: 128 KB, 1 column group-by
memoryBudgetInBytes = 1024 * 1024;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)", memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
Assert.assertTrue(resultCardinality == 20388);
// Test 3: memory size: 100 MB, frame size: 128 KB, 1 column group-by
memoryBudgetInBytes = 1024 * 1024 * 100;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)", memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
Assert.assertTrue(resultCardinality == 2016724);
// Test 4: memory size: 1 GB, frame size: 128 KB, 1 column group-by
memoryBudgetInBytes = 1024 * 1024 * 1024;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)", memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
Assert.assertTrue(resultCardinality == 20649113);
// Test 5: memory size: 10 GB, frame size: 128 KB, 1 column group-by
memoryBudgetInBytes = 1024 * 1024 * 1024 * 10L;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)", memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
Assert.assertTrue(resultCardinality == 206489044);
// Test 6: memory size: 100 GB, frame size: 128 KB, 1 column group-by
memoryBudgetInBytes = 1024 * 1024 * 1024 * 100L;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)", memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
Assert.assertTrue(resultCardinality == 2045222521);
// Test 7: memory size: 1 TB, frame size: 128 KB, 1 column group-by
// The cardinality will be set to Integer.MAX_VALUE in this case since the budget is too huge.
memoryBudgetInBytes = 1024 * 1024 * 1024 * 1024L;
frameSize = 128 * 1024;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)", memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
Assert.assertTrue(resultCardinality == 2147483647);
// Test 8: memory size: 1 MB, frame size: 128 KB, 2 columns group-by
memoryBudgetInBytes = 1024 * 1024;
numberOfGroupByColumns = 2;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)", memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
Assert.assertTrue(resultCardinality == 17825);
// Test 9: memory size: 1 MB, frame size: 128 KB, 3 columns group-by
memoryBudgetInBytes = 1024 * 1024;
numberOfGroupByColumns = 3;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)", memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
Assert.assertTrue(resultCardinality == 16227);
// Test 10: memory size: 1 MB, frame size: 128 KB, 4 columns group-by
memoryBudgetInBytes = 1024 * 1024;
numberOfGroupByColumns = 4;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)", memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
Assert.assertTrue(resultCardinality == 14563);
// Test 11: memory size: 32 MB, frame size: 128 KB, 2 columns group-by
memoryBudgetInBytes = 1024 * 1024 * 32L;
numberOfGroupByColumns = 4;
resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)", memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
Assert.assertTrue(resultCardinality == 441913);
}
use of org.apache.hyracks.api.job.IOperatorDescriptorRegistry in project asterixdb by apache.
the class ExternalGroupByPOperator method contributeRuntimeOperator.
@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
List<LogicalVariable> gbyCols = getGbyColumns();
int[] keys = JobGenHelper.variablesToFieldIndexes(gbyCols, inputSchemas[0]);
GroupByOperator gby = (GroupByOperator) op;
int numFds = gby.getDecorList().size();
int[] fdColumns = new int[numFds];
int j = 0;
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getDecorList()) {
ILogicalExpression expr = p.second.getValue();
if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
throw new AlgebricksException("pre-sorted group-by expects variable references.");
}
VariableReferenceExpression v = (VariableReferenceExpression) expr;
LogicalVariable decor = v.getVariableReference();
fdColumns[j++] = inputSchemas[0].findVariable(decor);
}
if (gby.getNestedPlans().size() != 1) {
throw new AlgebricksException("External group-by currently works only for one nested plan with one root containing" + "an aggregate and a nested-tuple-source.");
}
ILogicalPlan p0 = gby.getNestedPlans().get(0);
if (p0.getRoots().size() != 1) {
throw new AlgebricksException("External group-by currently works only for one nested plan with one root containing" + "an aggregate and a nested-tuple-source.");
}
Mutable<ILogicalOperator> r0 = p0.getRoots().get(0);
AggregateOperator aggOp = (AggregateOperator) r0.getValue();
IPartialAggregationTypeComputer partialAggregationTypeComputer = context.getPartialAggregationTypeComputer();
List<Object> intermediateTypes = new ArrayList<Object>();
int n = aggOp.getExpressions().size();
ISerializedAggregateEvaluatorFactory[] aff = new ISerializedAggregateEvaluatorFactory[n];
int i = 0;
IExpressionRuntimeProvider expressionRuntimeProvider = context.getExpressionRuntimeProvider();
IVariableTypeEnvironment aggOpInputEnv = context.getTypeEnvironment(aggOp.getInputs().get(0).getValue());
IVariableTypeEnvironment outputEnv = context.getTypeEnvironment(op);
for (Mutable<ILogicalExpression> exprRef : aggOp.getExpressions()) {
AggregateFunctionCallExpression aggFun = (AggregateFunctionCallExpression) exprRef.getValue();
aff[i++] = expressionRuntimeProvider.createSerializableAggregateFunctionFactory(aggFun, aggOpInputEnv, inputSchemas, context);
intermediateTypes.add(partialAggregationTypeComputer.getType(aggFun, aggOpInputEnv, context.getMetadataProvider()));
}
int[] keyAndDecFields = new int[keys.length + fdColumns.length];
for (i = 0; i < keys.length; ++i) {
keyAndDecFields[i] = keys[i];
}
for (i = 0; i < fdColumns.length; i++) {
keyAndDecFields[keys.length + i] = fdColumns[i];
}
List<LogicalVariable> keyAndDecVariables = new ArrayList<LogicalVariable>();
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getGroupByList()) {
keyAndDecVariables.add(p.first);
}
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getDecorList()) {
keyAndDecVariables.add(GroupByOperator.getDecorVariable(p));
}
for (LogicalVariable var : keyAndDecVariables) {
aggOpInputEnv.setVarType(var, outputEnv.getVarType(var));
}
compileSubplans(inputSchemas[0], gby, opSchema, context);
IOperatorDescriptorRegistry spec = builder.getJobSpec();
IBinaryComparatorFactory[] comparatorFactories = JobGenHelper.variablesToAscBinaryComparatorFactories(gbyCols, aggOpInputEnv, context);
RecordDescriptor recordDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), opSchema, context);
IBinaryHashFunctionFamily[] hashFunctionFactories = JobGenHelper.variablesToBinaryHashFunctionFamilies(gbyCols, aggOpInputEnv, context);
ISerializedAggregateEvaluatorFactory[] merges = new ISerializedAggregateEvaluatorFactory[n];
List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
IOperatorSchema[] localInputSchemas = new IOperatorSchema[1];
localInputSchemas[0] = new OperatorSchemaImpl();
for (i = 0; i < n; i++) {
AggregateFunctionCallExpression aggFun = (AggregateFunctionCallExpression) aggOp.getMergeExpressions().get(i).getValue();
aggFun.getUsedVariables(usedVars);
}
i = 0;
for (Object type : intermediateTypes) {
aggOpInputEnv.setVarType(usedVars.get(i++), type);
}
for (LogicalVariable keyVar : keyAndDecVariables) {
localInputSchemas[0].addVariable(keyVar);
}
for (LogicalVariable usedVar : usedVars) {
localInputSchemas[0].addVariable(usedVar);
}
for (i = 0; i < n; i++) {
AggregateFunctionCallExpression mergeFun = (AggregateFunctionCallExpression) aggOp.getMergeExpressions().get(i).getValue();
merges[i] = expressionRuntimeProvider.createSerializableAggregateFunctionFactory(mergeFun, aggOpInputEnv, localInputSchemas, context);
}
IAggregatorDescriptorFactory aggregatorFactory = new SerializableAggregatorDescriptorFactory(aff);
IAggregatorDescriptorFactory mergeFactory = new SerializableAggregatorDescriptorFactory(merges);
INormalizedKeyComputerFactory normalizedKeyFactory = JobGenHelper.variablesToAscNormalizedKeyComputerFactory(gbyCols, aggOpInputEnv, context);
// Calculates the hash table size (# of unique hash values) based on the budget and a tuple size.
int memoryBudgetInBytes = context.getFrameSize() * frameLimit;
int groupByColumnsCount = gby.getGroupByList().size() + numFds;
int hashTableSize = ExternalGroupOperatorDescriptor.calculateGroupByTableCardinality(memoryBudgetInBytes, groupByColumnsCount, context.getFrameSize());
ExternalGroupOperatorDescriptor gbyOpDesc = new ExternalGroupOperatorDescriptor(spec, hashTableSize, inputSize, keyAndDecFields, frameLimit, comparatorFactories, normalizedKeyFactory, aggregatorFactory, mergeFactory, recordDescriptor, recordDescriptor, new HashSpillableTableFactory(hashFunctionFactories));
contributeOpDesc(builder, gby, gbyOpDesc);
ILogicalOperator src = op.getInputs().get(0).getValue();
builder.contributeGraphEdge(src, 0, op, 0);
}
use of org.apache.hyracks.api.job.IOperatorDescriptorRegistry in project asterixdb by apache.
the class InMemoryHashJoinPOperator method contributeRuntimeOperator.
@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
int[] keysLeft = JobGenHelper.variablesToFieldIndexes(keysLeftBranch, inputSchemas[0]);
int[] keysRight = JobGenHelper.variablesToFieldIndexes(keysRightBranch, inputSchemas[1]);
IVariableTypeEnvironment env = context.getTypeEnvironment(op);
IBinaryHashFunctionFactory[] hashFunFactories = JobGenHelper.variablesToBinaryHashFunctionFactories(keysLeftBranch, env, context);
IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[keysLeft.length];
int i = 0;
IBinaryComparatorFactoryProvider bcfp = context.getBinaryComparatorFactoryProvider();
for (LogicalVariable v : keysLeftBranch) {
Object t = env.getVarType(v);
comparatorFactories[i++] = bcfp.getBinaryComparatorFactory(t, true);
}
IPredicateEvaluatorFactoryProvider predEvaluatorFactoryProvider = context.getPredicateEvaluatorFactoryProvider();
IPredicateEvaluatorFactory predEvaluatorFactory = (predEvaluatorFactoryProvider == null ? null : predEvaluatorFactoryProvider.getPredicateEvaluatorFactory(keysLeft, keysRight));
RecordDescriptor recDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), propagatedSchema, context);
IOperatorDescriptorRegistry spec = builder.getJobSpec();
IOperatorDescriptor opDesc = null;
switch(kind) {
case INNER:
{
opDesc = new InMemoryHashJoinOperatorDescriptor(spec, keysLeft, keysRight, hashFunFactories, comparatorFactories, recDescriptor, tableSize, predEvaluatorFactory, memSizeInFrames);
break;
}
case LEFT_OUTER:
{
IMissingWriterFactory[] nonMatchWriterFactories = new IMissingWriterFactory[inputSchemas[1].getSize()];
for (int j = 0; j < nonMatchWriterFactories.length; j++) {
nonMatchWriterFactories[j] = context.getMissingWriterFactory();
}
opDesc = new InMemoryHashJoinOperatorDescriptor(spec, keysLeft, keysRight, hashFunFactories, comparatorFactories, predEvaluatorFactory, recDescriptor, true, nonMatchWriterFactories, tableSize, memSizeInFrames);
break;
}
default:
{
throw new NotImplementedException();
}
}
contributeOpDesc(builder, (AbstractLogicalOperator) op, opDesc);
ILogicalOperator src1 = op.getInputs().get(0).getValue();
builder.contributeGraphEdge(src1, 0, op, 0);
ILogicalOperator src2 = op.getInputs().get(1).getValue();
builder.contributeGraphEdge(src2, 0, op, 1);
}
Aggregations