Search in sources :

Example 66 with FieldReference

use of org.apache.drill.common.expression.FieldReference in project drill by apache.

the class TestOutputBatchSize method testNestedLoopJoinSingleOutputBatch.

@Test
public void testNestedLoopJoinSingleOutputBatch() throws Exception {
    LogicalExpression functionCallExpr = new FunctionCall("equal", ImmutableList.of((LogicalExpression) new FieldReference("c1", ExpressionPosition.UNKNOWN), (LogicalExpression) new FieldReference("c2", ExpressionPosition.UNKNOWN)), ExpressionPosition.UNKNOWN);
    NestedLoopJoinPOP nestedLoopJoin = new NestedLoopJoinPOP(null, null, JoinRelType.INNER, functionCallExpr);
    // create multiple batches from both sides.
    numRows = 4096 * 2;
    // create left input rows like this.
    // "a1" : 5, "b1" : wideString, "c1" : <id>
    List<String> leftJsonBatches = Lists.newArrayList();
    StringBuilder leftBatchString = new StringBuilder();
    leftBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
    }
    leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
    leftBatchString.append("]");
    leftJsonBatches.add(leftBatchString.toString());
    // create right input rows like this.
    // "a2" : 6, "b2" : wideString, "c2" : <id>
    List<String> rightJsonBatches = Lists.newArrayList();
    StringBuilder rightBatchString = new StringBuilder();
    rightBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    rightBatchString.append("]");
    rightJsonBatches.add(rightBatchString.toString());
    // output rows will be like this.
    // "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
    // "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
    // "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
        expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
    expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to twice of total size expected.
    // We should get 1 batch.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize * 2);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(nestedLoopJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
    1).expectedBatchSize(// verify batch size
    totalSize).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
    for (long i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
    }
    opTestBuilder.go();
}
Also used : LogicalExpression(org.apache.drill.common.expression.LogicalExpression) FieldReference(org.apache.drill.common.expression.FieldReference) LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) FunctionCall(org.apache.drill.common.expression.FunctionCall) NestedLoopJoinPOP(org.apache.drill.exec.physical.config.NestedLoopJoinPOP) Test(org.junit.Test)

Example 67 with FieldReference

use of org.apache.drill.common.expression.FieldReference in project drill by apache.

the class ExpressionTreeMaterializerTest method testMaterializingLateboundField.

@Test
public void testMaterializingLateboundField() throws SchemaChangeException {
    final RecordBatch batch = mock(RecordBatch.class);
    TypedFieldId fieldId = new TypedFieldId.Builder().finalType(Types.required(MinorType.BIGINT)).addId(-5).build();
    when(batch.getValueVectorId(new SchemaPath("test", ExpressionPosition.UNKNOWN))).thenReturn(fieldId);
    final SchemaBuilder builder = BatchSchema.newBuilder();
    builder.addField(getField("test", bigIntType));
    final BatchSchema schema = builder.build();
    ErrorCollector ec = new ErrorCollectorImpl();
    LogicalExpression expr = ExpressionTreeMaterializer.materialize(new FieldReference("test", ExpressionPosition.UNKNOWN), batch, ec, registry);
    assertEquals(bigIntType, expr.getMajorType());
    assertFalse(ec.hasErrors());
}
Also used : ErrorCollectorImpl(org.apache.drill.common.expression.ErrorCollectorImpl) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) FieldReference(org.apache.drill.common.expression.FieldReference) SchemaPath(org.apache.drill.common.expression.SchemaPath) ErrorCollector(org.apache.drill.common.expression.ErrorCollector) ExecTest(org.apache.drill.exec.ExecTest) Test(org.junit.Test) VectorTest(org.apache.drill.categories.VectorTest)

Example 68 with FieldReference

use of org.apache.drill.common.expression.FieldReference in project drill by apache.

the class OrderedPartitionRecordBatch method buildTable.

private void buildTable() {
    // Get all samples from distributed map
    SortRecordBatchBuilder containerBuilder = new SortRecordBatchBuilder(context.getAllocator());
    VectorContainer allSamplesContainer = new VectorContainer();
    VectorContainer candidatePartitionTable = new VectorContainer();
    CachedVectorContainer wrap = null;
    try {
        for (CachedVectorContainer w : mmap.get(mapKey)) {
            containerBuilder.add(w.get());
        }
        containerBuilder.build(allSamplesContainer);
        List<Ordering> orderDefs = Lists.newArrayList();
        int i = 0;
        for (Ordering od : popConfig.getOrderings()) {
            SchemaPath sp = SchemaPath.getSimplePath("f" + i++);
            orderDefs.add(new Ordering(od.getDirection(), new FieldReference(sp)));
        }
        // sort the data incoming samples.
        SelectionVector4 newSv4 = containerBuilder.getSv4();
        Sorter sorter = SortBatch.createNewSorter(context, orderDefs, allSamplesContainer);
        try {
            sorter.setup(context, newSv4, allSamplesContainer);
        } catch (SchemaChangeException e) {
            throw schemaChangeException(e, logger);
        }
        sorter.sort(newSv4, allSamplesContainer);
        // Copy every Nth record from the samples into a candidate partition table, where N = totalSampledRecords/partitions
        // Attempt to push this to the distributed map. Only the first candidate to get pushed will be used.
        SampleCopier copier = null;
        List<ValueVector> localAllocationVectors = Lists.newArrayList();
        copier = getCopier(newSv4, allSamplesContainer, candidatePartitionTable, orderDefs, localAllocationVectors);
        int allocationSize = 50;
        while (true) {
            for (ValueVector vv : localAllocationVectors) {
                AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
            }
            int skipRecords = containerBuilder.getSv4().getTotalCount() / partitions;
            if (copier.copyRecords(skipRecords, skipRecords, partitions - 1)) {
                assert copier.getOutputRecords() == partitions - 1 : String.format("output records: %d partitions: %d", copier.getOutputRecords(), partitions);
                candidatePartitionTable.setValueCount(copier.getOutputRecords());
                break;
            } else {
                candidatePartitionTable.zeroVectors();
                allocationSize *= 2;
            }
        }
        candidatePartitionTable.setRecordCount(copier.getOutputRecords());
        WritableBatch batch = WritableBatch.getBatchNoHVWrap(candidatePartitionTable.getRecordCount(), candidatePartitionTable, false);
        wrap = new CachedVectorContainer(batch, context.getAllocator());
        tableMap.putIfAbsent(mapKey + "final", wrap, 1, TimeUnit.MINUTES);
    } finally {
        candidatePartitionTable.clear();
        allSamplesContainer.clear();
        containerBuilder.clear();
        containerBuilder.close();
        if (wrap != null) {
            wrap.clear();
        }
    }
}
Also used : FieldReference(org.apache.drill.common.expression.FieldReference) SortRecordBatchBuilder(org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder) VectorContainer(org.apache.drill.exec.record.VectorContainer) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer) ValueVector(org.apache.drill.exec.vector.ValueVector) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) SchemaPath(org.apache.drill.common.expression.SchemaPath) Ordering(org.apache.drill.common.logical.data.Order.Ordering) Sorter(org.apache.drill.exec.physical.impl.sort.Sorter) WritableBatch(org.apache.drill.exec.record.WritableBatch) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 69 with FieldReference

use of org.apache.drill.common.expression.FieldReference in project drill by apache.

the class MetadataAggregateHelper method getUnflattenedFileds.

/**
 * Returns map with field names as keys and field references as values. For the case when field is map,
 * fully qualified child names will be present in this map.
 * For example, for (a{b, c}, d) fields list will be returned map with a.b, a.c and d keys.
 *
 * @param fields       list of top-level fields to unflatten if required
 * @param parentFields list of parent name segments
 * @return map with field names as keys and field references as values
 */
private Map<String, FieldReference> getUnflattenedFileds(Collection<MaterializedField> fields, List<String> parentFields) {
    Map<String, FieldReference> fieldNameRefMap = new HashMap<>();
    for (MaterializedField field : fields) {
        // statistics collecting is not supported for array types
        if (field.getType().getMode() != TypeProtos.DataMode.REPEATED) {
            // excludedColumns are applied for root fields only
            if (parentFields != null || !excludedColumns.contains(SchemaPath.getSimplePath(field.getName()))) {
                List<String> currentPath;
                if (parentFields == null) {
                    currentPath = Collections.singletonList(field.getName());
                } else {
                    currentPath = new ArrayList<>(parentFields);
                    currentPath.add(field.getName());
                }
                if (field.getType().getMinorType() == TypeProtos.MinorType.MAP && createNewAggregations()) {
                    fieldNameRefMap.putAll(getUnflattenedFileds(field.getChildren(), currentPath));
                } else {
                    SchemaPath schemaPath = SchemaPath.getCompoundPath(currentPath.toArray(new String[0]));
                    // adds backticks for popConfig.createNewAggregations() to ensure that field will be parsed correctly
                    String name = createNewAggregations() ? schemaPath.toExpr() : schemaPath.getRootSegmentPath();
                    fieldNameRefMap.put(name, new FieldReference(schemaPath));
                }
            }
        }
    }
    return fieldNameRefMap;
}
Also used : FieldReference(org.apache.drill.common.expression.FieldReference) HashMap(java.util.HashMap) SchemaPath(org.apache.drill.common.expression.SchemaPath) MaterializedField(org.apache.drill.exec.record.MaterializedField)

Example 70 with FieldReference

use of org.apache.drill.common.expression.FieldReference in project drill by apache.

the class HashJoinBatch method setupHashTable.

private void setupHashTable() {
    List<Comparator> comparators = Lists.newArrayListWithExpectedSize(conditions.size());
    conditions.forEach(cond -> comparators.add(JoinUtils.checkAndReturnSupportedJoinComparator(cond)));
    if (skipHashTableBuild) {
        return;
    }
    // Setup the hash table configuration object
    List<NamedExpression> leftExpr = new ArrayList<>(conditions.size());
    // Create named expressions from the conditions
    for (int i = 0; i < conditions.size(); i++) {
        leftExpr.add(new NamedExpression(conditions.get(i).getLeft(), new FieldReference("probe_side_" + i)));
    }
    // Set the left named expression to be null if the probe batch is empty.
    if (leftUpstream != IterOutcome.OK_NEW_SCHEMA && leftUpstream != IterOutcome.OK) {
        leftExpr = null;
    } else {
        if (probeBatch.getSchema().getSelectionVectorMode() != BatchSchema.SelectionVectorMode.NONE) {
            throw UserException.internalError(null).message("Hash join does not support probe batch with selection vectors.").addContext("Probe batch has selection mode", (probeBatch.getSchema().getSelectionVectorMode()).toString()).build(logger);
        }
    }
    HashTableConfig htConfig = new HashTableConfig((int) context.getOptions().getOption(ExecConstants.MIN_HASH_TABLE_SIZE), true, HashTable.DEFAULT_LOAD_FACTOR, rightExpr, leftExpr, comparators, joinControl.asInt());
    // Create the chained hash table
    baseHashTable = new ChainedHashTable(htConfig, context, allocator, buildBatch, probeBatch, null);
    if (enableRuntimeFilter) {
        setupHash64(htConfig);
    }
}
Also used : HashTableConfig(org.apache.drill.exec.physical.impl.common.HashTableConfig) FieldReference(org.apache.drill.common.expression.FieldReference) NamedExpression(org.apache.drill.common.logical.data.NamedExpression) ArrayList(java.util.ArrayList) ChainedHashTable(org.apache.drill.exec.physical.impl.common.ChainedHashTable) Comparator(org.apache.drill.exec.physical.impl.common.Comparator)

Aggregations

FieldReference (org.apache.drill.common.expression.FieldReference)84 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)38 Test (org.junit.Test)22 NamedExpression (org.apache.drill.common.logical.data.NamedExpression)19 FunctionCall (org.apache.drill.common.expression.FunctionCall)16 SchemaPath (org.apache.drill.common.expression.SchemaPath)14 Ordering (org.apache.drill.common.logical.data.Order.Ordering)14 ErrorCollector (org.apache.drill.common.expression.ErrorCollector)13 ArrayList (java.util.ArrayList)12 ValueExpressions (org.apache.drill.common.expression.ValueExpressions)12 MaterializedField (org.apache.drill.exec.record.MaterializedField)12 ErrorCollectorImpl (org.apache.drill.common.expression.ErrorCollectorImpl)10 RelFieldCollation (org.apache.calcite.rel.RelFieldCollation)9 Order (org.apache.drill.common.logical.data.Order)9 ExecTest (org.apache.drill.exec.ExecTest)9 AggregateCall (org.apache.calcite.rel.core.AggregateCall)8 OperatorTest (org.apache.drill.categories.OperatorTest)8 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)8 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)7 VectorTest (org.apache.drill.categories.VectorTest)6