use of org.apache.drill.common.expression.FieldReference in project drill by apache.
the class TestOutputBatchSize method testNestedLoopJoinSingleOutputBatch.
@Test
public void testNestedLoopJoinSingleOutputBatch() throws Exception {
LogicalExpression functionCallExpr = new FunctionCall("equal", ImmutableList.of((LogicalExpression) new FieldReference("c1", ExpressionPosition.UNKNOWN), (LogicalExpression) new FieldReference("c2", ExpressionPosition.UNKNOWN)), ExpressionPosition.UNKNOWN);
NestedLoopJoinPOP nestedLoopJoin = new NestedLoopJoinPOP(null, null, JoinRelType.INNER, functionCallExpr);
// create multiple batches from both sides.
numRows = 4096 * 2;
// create left input rows like this.
// "a1" : 5, "b1" : wideString, "c1" : <id>
List<String> leftJsonBatches = Lists.newArrayList();
StringBuilder leftBatchString = new StringBuilder();
leftBatchString.append("[");
for (int i = 0; i < numRows; i++) {
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
}
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
leftBatchString.append("]");
leftJsonBatches.add(leftBatchString.toString());
// create right input rows like this.
// "a2" : 6, "b2" : wideString, "c2" : <id>
List<String> rightJsonBatches = Lists.newArrayList();
StringBuilder rightBatchString = new StringBuilder();
rightBatchString.append("[");
for (int i = 0; i < numRows; i++) {
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
rightBatchString.append("]");
rightJsonBatches.add(rightBatchString.toString());
// output rows will be like this.
// "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
// "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
// "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to twice of total size expected.
// We should get 1 batch.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize * 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(nestedLoopJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
1).expectedBatchSize(// verify batch size
totalSize).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
for (long i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
}
opTestBuilder.go();
}
use of org.apache.drill.common.expression.FieldReference in project drill by apache.
the class ExpressionTreeMaterializerTest method testMaterializingLateboundField.
@Test
public void testMaterializingLateboundField() throws SchemaChangeException {
final RecordBatch batch = mock(RecordBatch.class);
TypedFieldId fieldId = new TypedFieldId.Builder().finalType(Types.required(MinorType.BIGINT)).addId(-5).build();
when(batch.getValueVectorId(new SchemaPath("test", ExpressionPosition.UNKNOWN))).thenReturn(fieldId);
final SchemaBuilder builder = BatchSchema.newBuilder();
builder.addField(getField("test", bigIntType));
final BatchSchema schema = builder.build();
ErrorCollector ec = new ErrorCollectorImpl();
LogicalExpression expr = ExpressionTreeMaterializer.materialize(new FieldReference("test", ExpressionPosition.UNKNOWN), batch, ec, registry);
assertEquals(bigIntType, expr.getMajorType());
assertFalse(ec.hasErrors());
}
use of org.apache.drill.common.expression.FieldReference in project drill by apache.
the class OrderedPartitionRecordBatch method buildTable.
private void buildTable() {
// Get all samples from distributed map
SortRecordBatchBuilder containerBuilder = new SortRecordBatchBuilder(context.getAllocator());
VectorContainer allSamplesContainer = new VectorContainer();
VectorContainer candidatePartitionTable = new VectorContainer();
CachedVectorContainer wrap = null;
try {
for (CachedVectorContainer w : mmap.get(mapKey)) {
containerBuilder.add(w.get());
}
containerBuilder.build(allSamplesContainer);
List<Ordering> orderDefs = Lists.newArrayList();
int i = 0;
for (Ordering od : popConfig.getOrderings()) {
SchemaPath sp = SchemaPath.getSimplePath("f" + i++);
orderDefs.add(new Ordering(od.getDirection(), new FieldReference(sp)));
}
// sort the data incoming samples.
SelectionVector4 newSv4 = containerBuilder.getSv4();
Sorter sorter = SortBatch.createNewSorter(context, orderDefs, allSamplesContainer);
try {
sorter.setup(context, newSv4, allSamplesContainer);
} catch (SchemaChangeException e) {
throw schemaChangeException(e, logger);
}
sorter.sort(newSv4, allSamplesContainer);
// Copy every Nth record from the samples into a candidate partition table, where N = totalSampledRecords/partitions
// Attempt to push this to the distributed map. Only the first candidate to get pushed will be used.
SampleCopier copier = null;
List<ValueVector> localAllocationVectors = Lists.newArrayList();
copier = getCopier(newSv4, allSamplesContainer, candidatePartitionTable, orderDefs, localAllocationVectors);
int allocationSize = 50;
while (true) {
for (ValueVector vv : localAllocationVectors) {
AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
}
int skipRecords = containerBuilder.getSv4().getTotalCount() / partitions;
if (copier.copyRecords(skipRecords, skipRecords, partitions - 1)) {
assert copier.getOutputRecords() == partitions - 1 : String.format("output records: %d partitions: %d", copier.getOutputRecords(), partitions);
candidatePartitionTable.setValueCount(copier.getOutputRecords());
break;
} else {
candidatePartitionTable.zeroVectors();
allocationSize *= 2;
}
}
candidatePartitionTable.setRecordCount(copier.getOutputRecords());
WritableBatch batch = WritableBatch.getBatchNoHVWrap(candidatePartitionTable.getRecordCount(), candidatePartitionTable, false);
wrap = new CachedVectorContainer(batch, context.getAllocator());
tableMap.putIfAbsent(mapKey + "final", wrap, 1, TimeUnit.MINUTES);
} finally {
candidatePartitionTable.clear();
allSamplesContainer.clear();
containerBuilder.clear();
containerBuilder.close();
if (wrap != null) {
wrap.clear();
}
}
}
use of org.apache.drill.common.expression.FieldReference in project drill by apache.
the class MetadataAggregateHelper method getUnflattenedFileds.
/**
* Returns map with field names as keys and field references as values. For the case when field is map,
* fully qualified child names will be present in this map.
* For example, for (a{b, c}, d) fields list will be returned map with a.b, a.c and d keys.
*
* @param fields list of top-level fields to unflatten if required
* @param parentFields list of parent name segments
* @return map with field names as keys and field references as values
*/
private Map<String, FieldReference> getUnflattenedFileds(Collection<MaterializedField> fields, List<String> parentFields) {
Map<String, FieldReference> fieldNameRefMap = new HashMap<>();
for (MaterializedField field : fields) {
// statistics collecting is not supported for array types
if (field.getType().getMode() != TypeProtos.DataMode.REPEATED) {
// excludedColumns are applied for root fields only
if (parentFields != null || !excludedColumns.contains(SchemaPath.getSimplePath(field.getName()))) {
List<String> currentPath;
if (parentFields == null) {
currentPath = Collections.singletonList(field.getName());
} else {
currentPath = new ArrayList<>(parentFields);
currentPath.add(field.getName());
}
if (field.getType().getMinorType() == TypeProtos.MinorType.MAP && createNewAggregations()) {
fieldNameRefMap.putAll(getUnflattenedFileds(field.getChildren(), currentPath));
} else {
SchemaPath schemaPath = SchemaPath.getCompoundPath(currentPath.toArray(new String[0]));
// adds backticks for popConfig.createNewAggregations() to ensure that field will be parsed correctly
String name = createNewAggregations() ? schemaPath.toExpr() : schemaPath.getRootSegmentPath();
fieldNameRefMap.put(name, new FieldReference(schemaPath));
}
}
}
}
return fieldNameRefMap;
}
use of org.apache.drill.common.expression.FieldReference in project drill by apache.
the class HashJoinBatch method setupHashTable.
private void setupHashTable() {
List<Comparator> comparators = Lists.newArrayListWithExpectedSize(conditions.size());
conditions.forEach(cond -> comparators.add(JoinUtils.checkAndReturnSupportedJoinComparator(cond)));
if (skipHashTableBuild) {
return;
}
// Setup the hash table configuration object
List<NamedExpression> leftExpr = new ArrayList<>(conditions.size());
// Create named expressions from the conditions
for (int i = 0; i < conditions.size(); i++) {
leftExpr.add(new NamedExpression(conditions.get(i).getLeft(), new FieldReference("probe_side_" + i)));
}
// Set the left named expression to be null if the probe batch is empty.
if (leftUpstream != IterOutcome.OK_NEW_SCHEMA && leftUpstream != IterOutcome.OK) {
leftExpr = null;
} else {
if (probeBatch.getSchema().getSelectionVectorMode() != BatchSchema.SelectionVectorMode.NONE) {
throw UserException.internalError(null).message("Hash join does not support probe batch with selection vectors.").addContext("Probe batch has selection mode", (probeBatch.getSchema().getSelectionVectorMode()).toString()).build(logger);
}
}
HashTableConfig htConfig = new HashTableConfig((int) context.getOptions().getOption(ExecConstants.MIN_HASH_TABLE_SIZE), true, HashTable.DEFAULT_LOAD_FACTOR, rightExpr, leftExpr, comparators, joinControl.asInt());
// Create the chained hash table
baseHashTable = new ChainedHashTable(htConfig, context, allocator, buildBatch, probeBatch, null);
if (enableRuntimeFilter) {
setupHash64(htConfig);
}
}
Aggregations