use of org.apache.drill.common.expression.FieldReference in project drill by apache.
the class TestOutputBatchSize method testLeftNestedLoopJoin.
@Test
public void testLeftNestedLoopJoin() throws Exception {
LogicalExpression functionCallExpr = new FunctionCall("equal", ImmutableList.of((LogicalExpression) new FieldReference("c1", ExpressionPosition.UNKNOWN), (LogicalExpression) new FieldReference("c2", ExpressionPosition.UNKNOWN)), ExpressionPosition.UNKNOWN);
NestedLoopJoinPOP nestedLoopJoin = new NestedLoopJoinPOP(null, null, JoinRelType.LEFT, functionCallExpr);
numRows = 4000 * 2;
// create left input rows like this.
// "a1" : 5, "b1" : wideString, "c1" : <id>
List<String> leftJsonBatches = Lists.newArrayList();
StringBuilder leftBatchString = new StringBuilder();
leftBatchString.append("[");
for (int i = 0; i < numRows; i++) {
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
}
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
leftBatchString.append("]");
leftJsonBatches.add(leftBatchString.toString());
// create right input rows like this.
// "a2" : 6, "b2" : wideString, "c2" : <id>
List<String> rightJsonBatches = Lists.newArrayList();
StringBuilder rightBatchString = new StringBuilder();
rightBatchString.append("[");
for (int i = 0; i < numRows; i++) {
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
rightBatchString.append("]");
rightJsonBatches.add(rightBatchString.toString());
// output rows will be like this.
// "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
// "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
// "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately 4 batches.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(nestedLoopJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
4).expectedBatchSize(// verify batch size
totalSize / 2).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
for (long i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
}
opTestBuilder.go();
}
use of org.apache.drill.common.expression.FieldReference in project drill by apache.
the class TestOutputBatchSize method testNestedLoopJoinMultipleOutputBatches.
@Test
public void testNestedLoopJoinMultipleOutputBatches() throws Exception {
LogicalExpression functionCallExpr = new FunctionCall("equal", ImmutableList.of((LogicalExpression) new FieldReference("c1", ExpressionPosition.UNKNOWN), (LogicalExpression) new FieldReference("c2", ExpressionPosition.UNKNOWN)), ExpressionPosition.UNKNOWN);
NestedLoopJoinPOP nestedLoopJoin = new NestedLoopJoinPOP(null, null, JoinRelType.INNER, functionCallExpr);
mockOpContext(nestedLoopJoin, initReservation, maxAllocation);
numRows = 4000 * 2;
// create left input rows like this.
// "a1" : 5, "b1" : wideString, "c1" : <id>
List<String> leftJsonBatches = Lists.newArrayList();
StringBuilder leftBatchString = new StringBuilder();
leftBatchString.append("[");
for (int i = 0; i < numRows; i++) {
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
}
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
leftBatchString.append("]");
leftJsonBatches.add(leftBatchString.toString());
// create right input rows like this.
// "a2" : 6, "b2" : wideString, "c2" : <id>
List<String> rightJsonBatches = Lists.newArrayList();
StringBuilder rightBatchString = new StringBuilder();
rightBatchString.append("[");
for (int i = 0; i < numRows; i++) {
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
rightBatchString.append("]");
rightJsonBatches.add(rightBatchString.toString());
// output rows will be like this.
// "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
// "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
// "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately 4 batches.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(nestedLoopJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
4).expectedBatchSize(// verify batch size
totalSize / 2).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
for (long i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
}
opTestBuilder.go();
}
use of org.apache.drill.common.expression.FieldReference in project drill by apache.
the class TestOutputBatchSize method testNestedLoopJoinLowerLimit.
@Test
public void testNestedLoopJoinLowerLimit() throws Exception {
// test the lower limit of at least one batch
LogicalExpression functionCallExpr = new FunctionCall("equal", ImmutableList.of((LogicalExpression) new FieldReference("c1", ExpressionPosition.UNKNOWN), (LogicalExpression) new FieldReference("c2", ExpressionPosition.UNKNOWN)), ExpressionPosition.UNKNOWN);
NestedLoopJoinPOP nestedLoopJoin = new NestedLoopJoinPOP(null, null, JoinRelType.INNER, functionCallExpr);
numRows = 10;
// create left input rows like this.
// "a1" : 5, "b1" : wideString, "c1" : <id>
List<String> leftJsonBatches = Lists.newArrayList();
StringBuilder leftBatchString = new StringBuilder();
leftBatchString.append("[");
for (int i = 0; i < numRows; i++) {
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
}
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
leftBatchString.append("]");
leftJsonBatches.add(leftBatchString.toString());
// create right input rows like this.
// "a2" : 6, "b2" : wideString, "c2" : <id>
List<String> rightJsonBatches = Lists.newArrayList();
StringBuilder rightBatchString = new StringBuilder();
rightBatchString.append("[");
for (int i = 0; i < numRows; i++) {
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
rightBatchString.append("]");
rightJsonBatches.add(rightBatchString.toString());
// output rows will be like this.
// "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
// "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
// "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
// set very low value of output batch size so we can do only one row per batch.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", 128);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(nestedLoopJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
10).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
for (long i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
}
opTestBuilder.go();
}
use of org.apache.drill.common.expression.FieldReference in project drill by apache.
the class DrillLogicalTestUtils method parseExprs.
public static List<NamedExpression> parseExprs(String... expressionsAndOutputNames) {
Preconditions.checkArgument(expressionsAndOutputNames.length % 2 == 0, "List of expressions and output field names" + " is not complete, each expression must explicitly give and output name,");
List<NamedExpression> ret = new ArrayList<>();
for (int i = 0; i < expressionsAndOutputNames.length; i += 2) {
ret.add(new NamedExpression(LogicalExpressionParser.parse(expressionsAndOutputNames[i]), new FieldReference(new SchemaPath(new PathSegment.NameSegment(expressionsAndOutputNames[i + 1])))));
}
return ret;
}
use of org.apache.drill.common.expression.FieldReference in project drill by apache.
the class HashAggTemplate method setup.
@Override
public void setup(HashAggregate hashAggrConfig, HashTableConfig htConfig, FragmentContext context, OperatorContext oContext, RecordBatch incoming, HashAggBatch outgoing, LogicalExpression[] valueExprs, List<TypedFieldId> valueFieldIds, ClassGenerator<?> cg, TypedFieldId[] groupByOutFieldIds, VectorContainer outContainer, int extraRowBytes) {
if (valueExprs == null || valueFieldIds == null) {
throw new IllegalArgumentException("Invalid aggr value exprs or workspace variables.");
}
if (valueFieldIds.size() < valueExprs.length) {
throw new IllegalArgumentException("Wrong number of workspace variables.");
}
this.context = context;
this.stats = oContext.getStats();
this.allocator = oContext.getAllocator();
this.updater = new HashAggUpdater(allocator);
this.oContext = oContext;
this.incoming = incoming;
this.outgoing = outgoing;
this.cg = cg;
this.outContainer = outContainer;
this.useMemoryPrediction = context.getOptions().getOption(ExecConstants.HASHAGG_USE_MEMORY_PREDICTION_VALIDATOR);
this.phase = hashAggrConfig.getAggPhase();
// single phase can not spill
canSpill = phase.hasTwo();
// Typically for testing - force a spill after a partition has more than so many batches
minBatchesPerPartition = context.getOptions().getOption(ExecConstants.HASHAGG_MIN_BATCHES_PER_PARTITION_VALIDATOR);
// Set the memory limit
long memoryLimit = allocator.getLimit();
// Optional configured memory limit, typically used only for testing.
long configLimit = context.getOptions().getOption(ExecConstants.HASHAGG_MAX_MEMORY_VALIDATOR);
if (configLimit > 0) {
logger.warn("Memory limit was changed to {}", configLimit);
memoryLimit = Math.min(memoryLimit, configLimit);
// enforce at the allocator
allocator.setLimit(memoryLimit);
}
// TODO: This functionality will be added later.
if (hashAggrConfig.getGroupByExprs().size() == 0) {
throw new IllegalArgumentException("Currently, hash aggregation is only applicable if there are group-by " + "expressions.");
}
htIdxHolder = new IndexPointer();
materializedValueFields = new MaterializedField[valueFieldIds.size()];
if (valueFieldIds.size() > 0) {
int i = 0;
FieldReference ref = new FieldReference("dummy", ExpressionPosition.UNKNOWN, valueFieldIds.get(0).getIntermediateType());
for (TypedFieldId id : valueFieldIds) {
materializedValueFields[i++] = MaterializedField.create(ref.getAsNamePart().getName(), id.getIntermediateType());
}
}
spillSet = new SpillSet(context, hashAggrConfig);
baseHashTable = new ChainedHashTable(htConfig, context, allocator, incoming, null, /* no incoming probe */
outgoing);
// retain these for delayedSetup, and to allow recreating hash tables (after a spill)
this.groupByOutFieldIds = groupByOutFieldIds;
numGroupByOutFields = groupByOutFieldIds.length;
// Start calculating the row widths (with the extra columns; the rest would be done in updateEstMaxBatchSize())
estRowWidth = extraRowBytes;
estValuesRowWidth = extraRowBytes;
try {
doSetup(incoming);
} catch (SchemaChangeException e) {
throw HashAggBatch.schemaChangeException(e, "Hash Aggregate", logger);
}
}
Aggregations