Search in sources :

Example 66 with VectorWrapper

use of org.apache.drill.exec.record.VectorWrapper in project drill by apache.

the class TestReverseImplicitCast method twoWayCast.

@Test
public void twoWayCast() throws Throwable {
    // Function checks for casting from Float, Double to Decimal data types
    try (RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
        Drillbit bit = new Drillbit(CONFIG, serviceSet);
        DrillClient client = new DrillClient(CONFIG, serviceSet.getCoordinator())) {
        // run query.
        bit.run();
        client.connect();
        List<QueryDataBatch> results = client.runQuery(org.apache.drill.exec.proto.UserBitShared.QueryType.PHYSICAL, Files.asCharSource(DrillFileUtils.getResourceAsFile("/functions/cast/two_way_implicit_cast.json"), Charsets.UTF_8).read());
        RecordBatchLoader batchLoader = new RecordBatchLoader(bit.getContext().getAllocator());
        QueryDataBatch batch = results.get(0);
        assertTrue(batchLoader.load(batch.getHeader().getDef(), batch.getData()));
        Iterator<VectorWrapper<?>> itr = batchLoader.iterator();
        ValueVector.Accessor intAccessor1 = itr.next().getValueVector().getAccessor();
        ValueVector.Accessor varcharAccessor1 = itr.next().getValueVector().getAccessor();
        for (int i = 0; i < intAccessor1.getValueCount(); i++) {
            assertEquals(intAccessor1.getObject(i), 10);
            assertEquals(varcharAccessor1.getObject(i).toString(), "101");
        }
        batchLoader.clear();
        for (QueryDataBatch result : results) {
            result.release();
        }
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) QueryDataBatch(org.apache.drill.exec.rpc.user.QueryDataBatch) Drillbit(org.apache.drill.exec.server.Drillbit) RemoteServiceSet(org.apache.drill.exec.server.RemoteServiceSet) RecordBatchLoader(org.apache.drill.exec.record.RecordBatchLoader) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) DrillClient(org.apache.drill.exec.client.DrillClient) Test(org.junit.Test)

Example 67 with VectorWrapper

use of org.apache.drill.exec.record.VectorWrapper in project drill by apache.

the class TestHashJoin method simpleEqualityJoin.

@Test
public void simpleEqualityJoin() throws Throwable {
    // Function checks hash join with single equality condition
    try (RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
        Drillbit bit = new Drillbit(CONFIG, serviceSet);
        DrillClient client = new DrillClient(CONFIG, serviceSet.getCoordinator())) {
        // run query.
        bit.run();
        client.connect();
        List<QueryDataBatch> results = client.runQuery(org.apache.drill.exec.proto.UserBitShared.QueryType.PHYSICAL, Files.asCharSource(DrillFileUtils.getResourceAsFile("/join/hash_join.json"), Charsets.UTF_8).read().replace("#{TEST_FILE_1}", DrillFileUtils.getResourceAsFile("/build_side_input.json").toURI().toString()).replace("#{TEST_FILE_2}", DrillFileUtils.getResourceAsFile("/probe_side_input.json").toURI().toString()));
        RecordBatchLoader batchLoader = new RecordBatchLoader(bit.getContext().getAllocator());
        QueryDataBatch batch = results.get(1);
        assertTrue(batchLoader.load(batch.getHeader().getDef(), batch.getData()));
        Iterator<VectorWrapper<?>> itr = batchLoader.iterator();
        // Just test the join key
        long[] colA = { 1, 1, 2, 2, 1, 1 };
        // Check the output of decimal9
        ValueVector.Accessor intAccessor1 = itr.next().getValueVector().getAccessor();
        for (int i = 0; i < intAccessor1.getValueCount(); i++) {
            assertEquals(intAccessor1.getObject(i), colA[i]);
        }
        assertEquals(6, intAccessor1.getValueCount());
        batchLoader.clear();
        for (QueryDataBatch result : results) {
            result.release();
        }
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) QueryDataBatch(org.apache.drill.exec.rpc.user.QueryDataBatch) Drillbit(org.apache.drill.exec.server.Drillbit) RemoteServiceSet(org.apache.drill.exec.server.RemoteServiceSet) RecordBatchLoader(org.apache.drill.exec.record.RecordBatchLoader) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) DrillClient(org.apache.drill.exec.client.DrillClient) SlowTest(org.apache.drill.categories.SlowTest) OperatorTest(org.apache.drill.categories.OperatorTest) Test(org.junit.Test)

Example 68 with VectorWrapper

use of org.apache.drill.exec.record.VectorWrapper in project drill by apache.

the class TestDateTypes method testInterval.

@Test
public void testInterval() throws Exception {
    try (RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
        Drillbit bit = new Drillbit(CONFIG, serviceSet);
        DrillClient client = new DrillClient(CONFIG, serviceSet.getCoordinator())) {
        // run query.
        bit.run();
        client.connect();
        List<QueryDataBatch> results = client.runQuery(org.apache.drill.exec.proto.UserBitShared.QueryType.PHYSICAL, Files.asCharSource(DrillFileUtils.getResourceAsFile("/record/vector/test_interval.json"), Charsets.UTF_8).read().replace("#{TEST_FILE}", "/test_simple_interval.json"));
        RecordBatchLoader batchLoader = new RecordBatchLoader(bit.getContext().getAllocator());
        QueryDataBatch batch = results.get(0);
        assertTrue(batchLoader.load(batch.getHeader().getDef(), batch.getData()));
        Iterator<VectorWrapper<?>> itr = batchLoader.iterator();
        ValueVector.Accessor accessor = itr.next().getValueVector().getAccessor();
        // Check the interval type
        assertEquals((accessor.getObject(0).toString()), ("2 years 2 months 1 day 1:20:35.0"));
        assertEquals((accessor.getObject(1).toString()), ("2 years 2 months 0 days 0:0:0.0"));
        assertEquals((accessor.getObject(2).toString()), ("0 years 0 months 0 days 1:20:35.0"));
        assertEquals((accessor.getObject(3).toString()), ("2 years 2 months 1 day 1:20:35.897"));
        assertEquals((accessor.getObject(4).toString()), ("0 years 0 months 0 days 0:0:35.4"));
        assertEquals((accessor.getObject(5).toString()), ("1 year 10 months 1 day 0:-39:-25.0"));
        accessor = itr.next().getValueVector().getAccessor();
        // Check the interval year type
        assertEquals((accessor.getObject(0).toString()), ("2 years 2 months "));
        assertEquals((accessor.getObject(1).toString()), ("2 years 2 months "));
        assertEquals((accessor.getObject(2).toString()), ("0 years 0 months "));
        assertEquals((accessor.getObject(3).toString()), ("2 years 2 months "));
        assertEquals((accessor.getObject(4).toString()), ("0 years 0 months "));
        assertEquals((accessor.getObject(5).toString()), ("1 year 10 months "));
        accessor = itr.next().getValueVector().getAccessor();
        // Check the interval day type
        assertEquals((accessor.getObject(0).toString()), ("1 day 1:20:35.0"));
        assertEquals((accessor.getObject(1).toString()), ("0 days 0:0:0.0"));
        assertEquals((accessor.getObject(2).toString()), ("0 days 1:20:35.0"));
        assertEquals((accessor.getObject(3).toString()), ("1 day 1:20:35.897"));
        assertEquals((accessor.getObject(4).toString()), ("0 days 0:0:35.4"));
        assertEquals((accessor.getObject(5).toString()), ("1 day 0:-39:-25.0"));
        batchLoader.clear();
        for (QueryDataBatch b : results) {
            b.release();
        }
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) QueryDataBatch(org.apache.drill.exec.rpc.user.QueryDataBatch) Drillbit(org.apache.drill.exec.server.Drillbit) RemoteServiceSet(org.apache.drill.exec.server.RemoteServiceSet) RecordBatchLoader(org.apache.drill.exec.record.RecordBatchLoader) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) DrillClient(org.apache.drill.exec.client.DrillClient) Test(org.junit.Test) SlowTest(org.apache.drill.categories.SlowTest) VectorTest(org.apache.drill.categories.VectorTest)

Example 69 with VectorWrapper

use of org.apache.drill.exec.record.VectorWrapper in project drill by apache.

the class MetadataHandlerBatch method writeMetadataUsingBatchSchema.

private <T extends BaseMetadata & LocationProvider> VectorContainer writeMetadataUsingBatchSchema(List<T> metadataList) {
    Preconditions.checkArgument(!metadataList.isEmpty(), "Metadata list shouldn't be empty.");
    ResultSetLoader resultSetLoader = getResultSetLoaderWithBatchSchema();
    resultSetLoader.startBatch();
    RowSetLoader rowWriter = resultSetLoader.writer();
    Iterator<T> segmentsIterator = metadataList.iterator();
    while (!rowWriter.isFull() && segmentsIterator.hasNext()) {
        T metadata = segmentsIterator.next();
        metadataToHandle.remove(metadata.getMetadataInfo().identifier());
        List<Object> arguments = new ArrayList<>();
        for (VectorWrapper<?> vectorWrapper : container) {
            String[] identifierValues = Arrays.copyOf(MetadataIdentifierUtils.getValuesFromMetadataIdentifier(metadata.getMetadataInfo().identifier()), popConfig.getContext().segmentColumns().size());
            MaterializedField field = vectorWrapper.getField();
            String fieldName = field.getName();
            if (fieldName.equals(MetastoreAnalyzeConstants.LOCATION_FIELD)) {
                arguments.add(metadata.getPath().toUri().getPath());
            } else if (fieldName.equals(MetastoreAnalyzeConstants.LOCATIONS_FIELD)) {
                if (metadataType == MetadataType.SEGMENT) {
                    arguments.add(((SegmentMetadata) metadata).getLocations().stream().map(path -> path.toUri().getPath()).toArray(String[]::new));
                } else {
                    arguments.add(null);
                }
            } else if (popConfig.getContext().segmentColumns().contains(fieldName)) {
                arguments.add(identifierValues[popConfig.getContext().segmentColumns().indexOf(fieldName)]);
            } else if (AnalyzeColumnUtils.isColumnStatisticsField(fieldName)) {
                arguments.add(metadata.getColumnStatistics(SchemaPath.parseFromString(AnalyzeColumnUtils.getColumnName(fieldName))).get(AnalyzeColumnUtils.getStatisticsKind(fieldName)));
            } else if (AnalyzeColumnUtils.isMetadataStatisticsField(fieldName)) {
                arguments.add(metadata.getStatistic(AnalyzeColumnUtils.getStatisticsKind(fieldName)));
            } else if (fieldName.equals(MetastoreAnalyzeConstants.COLLECTED_MAP_FIELD)) {
                // collectedMap field value
                arguments.add(new Object[] {});
            } else if (fieldName.equals(MetastoreAnalyzeConstants.SCHEMA_FIELD)) {
                arguments.add(metadata.getSchema().jsonString());
            } else if (fieldName.equals(columnNamesOptions.lastModifiedTime())) {
                arguments.add(String.valueOf(metadata.getLastModifiedTime()));
            } else if (fieldName.equals(columnNamesOptions.rowGroupIndex())) {
                arguments.add(String.valueOf(((RowGroupMetadata) metadata).getRowGroupIndex()));
            } else if (fieldName.equals(columnNamesOptions.rowGroupStart())) {
                arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.START)));
            } else if (fieldName.equals(columnNamesOptions.rowGroupLength())) {
                arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.LENGTH)));
            } else if (fieldName.equals(MetastoreAnalyzeConstants.METADATA_TYPE)) {
                arguments.add(metadataType.name());
            } else {
                throw new UnsupportedOperationException(String.format("Found unexpected field [%s] in incoming batch.", field));
            }
        }
        rowWriter.addRow(arguments.toArray());
    }
    return resultSetLoader.harvest();
}
Also used : AbstractSingleRecordBatch(org.apache.drill.exec.record.AbstractSingleRecordBatch) MetadataType(org.apache.drill.metastore.metadata.MetadataType) Arrays(java.util.Arrays) LoggerFactory(org.slf4j.LoggerFactory) Types(org.apache.drill.common.types.Types) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) RowSetReader(org.apache.drill.exec.physical.rowSet.RowSetReader) VectorContainer(org.apache.drill.exec.record.VectorContainer) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) ResultSetLoaderImpl(org.apache.drill.exec.physical.resultSet.impl.ResultSetLoaderImpl) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) BatchSchema(org.apache.drill.exec.record.BatchSchema) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordBatch(org.apache.drill.exec.record.RecordBatch) MetastoreAnalyzeConstants(org.apache.drill.exec.metastore.analyze.MetastoreAnalyzeConstants) Collectors(java.util.stream.Collectors) List(java.util.List) MinorType(org.apache.drill.common.types.TypeProtos.MinorType) ResultSetOptionBuilder(org.apache.drill.exec.physical.resultSet.impl.ResultSetOptionBuilder) Preconditions(org.apache.drill.shaded.guava.com.google.common.base.Preconditions) MetadataIdentifierUtils(org.apache.drill.exec.metastore.analyze.MetadataIdentifierUtils) MaterializedField(org.apache.drill.exec.record.MaterializedField) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) Function(java.util.function.Function) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) ArrayList(java.util.ArrayList) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader) DirectRowSet(org.apache.drill.exec.physical.rowSet.DirectRowSet) StreamSupport(java.util.stream.StreamSupport) NONE(org.apache.drill.exec.record.RecordBatch.IterOutcome.NONE) FragmentContext(org.apache.drill.exec.ops.FragmentContext) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) Logger(org.slf4j.Logger) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) Iterator(java.util.Iterator) ExactStatisticsConstants(org.apache.drill.metastore.statistics.ExactStatisticsConstants) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) StatisticsKind(org.apache.drill.metastore.statistics.StatisticsKind) MetadataHandlerPOP(org.apache.drill.exec.physical.config.MetadataHandlerPOP) LocationProvider(org.apache.drill.metastore.metadata.LocationProvider) VarCharVector(org.apache.drill.exec.vector.VarCharVector) Tables(org.apache.drill.metastore.components.tables.Tables) Comparator(java.util.Comparator) AnalyzeColumnUtils(org.apache.drill.exec.metastore.analyze.AnalyzeColumnUtils) Collections(java.util.Collections) ArrayList(java.util.ArrayList) MaterializedField(org.apache.drill.exec.record.MaterializedField) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader)

Example 70 with VectorWrapper

use of org.apache.drill.exec.record.VectorWrapper in project drill by apache.

the class TopNBatch method innerNext.

@Override
public IterOutcome innerNext() {
    recordCount = 0;
    if (state == BatchState.DONE) {
        return NONE;
    }
    // Check if anything is remaining from previous record boundary
    if (hasOutputRecords) {
        return handleRemainingOutput();
    }
    // Reset the TopN state for next iteration
    resetTopNState();
    boolean incomingHasSv2 = false;
    switch(incoming.getSchema().getSelectionVectorMode()) {
        case NONE:
            {
                break;
            }
        case TWO_BYTE:
            {
                incomingHasSv2 = true;
                break;
            }
        case FOUR_BYTE:
            {
                throw UserException.internalError(null).message("TopN doesn't support incoming with SV4 mode").build(logger);
            }
        default:
            throw new UnsupportedOperationException("Unsupported SV mode detected in TopN incoming batch");
    }
    outer: while (true) {
        Stopwatch watch = Stopwatch.createStarted();
        if (first) {
            lastKnownOutcome = IterOutcome.OK_NEW_SCHEMA;
            // Create the SV4 object upfront to be used for both empty and non-empty incoming batches at EMIT boundary
            sv4 = new SelectionVector4(context.getAllocator(), 0);
            first = false;
        } else {
            lastKnownOutcome = next(incoming);
        }
        if (lastKnownOutcome == OK && schema == null) {
            lastKnownOutcome = IterOutcome.OK_NEW_SCHEMA;
            container.clear();
        }
        logger.debug("Took {} us to get next", watch.elapsed(TimeUnit.MICROSECONDS));
        switch(lastKnownOutcome) {
            case NONE:
                break outer;
            case NOT_YET:
                throw new UnsupportedOperationException();
            case OK_NEW_SCHEMA:
                // only change in the case that the schema truly changes.  Artificial schema changes are ignored.
                // schema change handling in case when EMIT is also seen is same as without EMIT. i.e. only if union type
                // is enabled it will be handled.
                container.clear();
                firstBatchForSchema = true;
                if (!incoming.getSchema().equals(schema)) {
                    if (schema != null) {
                        if (!unionTypeEnabled) {
                            throw new UnsupportedOperationException(String.format("TopN currently doesn't support changing " + "schemas with union type disabled. Please try enabling union type: %s and re-execute the query", ExecConstants.ENABLE_UNION_TYPE_KEY));
                        } else {
                            schema = SchemaUtil.mergeSchemas(this.schema, incoming.getSchema());
                            purgeAndResetPriorityQueue();
                            schemaChanged = true;
                        }
                    } else {
                        schema = incoming.getSchema();
                    }
                }
            // fall through.
            case OK:
            case EMIT:
                if (incoming.getRecordCount() == 0) {
                    for (VectorWrapper<?> w : incoming) {
                        w.clear();
                    }
                    // Release memory for incoming SV2 vector
                    if (incomingHasSv2) {
                        incoming.getSelectionVector2().clear();
                    }
                    break;
                }
                countSincePurge += incoming.getRecordCount();
                batchCount++;
                RecordBatchData batch;
                if (schemaChanged) {
                    batch = new RecordBatchData(SchemaUtil.coerceContainer(incoming, this.schema, oContext), oContext.getAllocator());
                } else {
                    batch = new RecordBatchData(incoming, oContext.getAllocator());
                }
                boolean success = false;
                try {
                    if (priorityQueue == null) {
                        priorityQueue = createNewPriorityQueue(new ExpandableHyperContainer(batch.getContainer()), config.getLimit());
                    } else if (!priorityQueue.isInitialized()) {
                        // means priority queue is cleaned up after producing output for first record boundary. We should
                        // initialize it for next record boundary
                        priorityQueue.init(config.getLimit(), oContext.getAllocator(), schema.getSelectionVectorMode() == SelectionVectorMode.TWO_BYTE);
                    }
                    priorityQueue.add(batch);
                    // RecordBatches which are of no use or doesn't fall under TopN category
                    if (countSincePurge > config.getLimit() && batchCount > batchPurgeThreshold) {
                        purge();
                        countSincePurge = 0;
                        batchCount = 0;
                    }
                    success = true;
                } catch (SchemaChangeException e) {
                    throw schemaChangeException(e, logger);
                } finally {
                    if (!success) {
                        batch.clear();
                    }
                }
                break;
            default:
                throw new UnsupportedOperationException();
        }
        // with records and EMIT outcome in above case statements
        if (lastKnownOutcome == EMIT) {
            break;
        }
    }
    // PriorityQueue can be uninitialized here if only empty batch is received between 2 EMIT outcome.
    if (schema == null || (priorityQueue == null || !priorityQueue.isInitialized())) {
        // builder may be null at this point if the first incoming batch is empty
        return handleEmptyBatches(lastKnownOutcome);
    }
    priorityQueue.generate();
    prepareOutputContainer(priorityQueue.getHyperBatch(), priorityQueue.getFinalSv4());
    // lastKnownOutcome.
    return getFinalOutcome();
}
Also used : ExpandableHyperContainer(org.apache.drill.exec.record.ExpandableHyperContainer) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) RecordBatchData(org.apache.drill.exec.physical.impl.sort.RecordBatchData) HyperVectorWrapper(org.apache.drill.exec.record.HyperVectorWrapper) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Aggregations

VectorWrapper (org.apache.drill.exec.record.VectorWrapper)73 ValueVector (org.apache.drill.exec.vector.ValueVector)44 Test (org.junit.Test)39 RecordBatchLoader (org.apache.drill.exec.record.RecordBatchLoader)35 QueryDataBatch (org.apache.drill.exec.rpc.user.QueryDataBatch)34 DrillClient (org.apache.drill.exec.client.DrillClient)28 Drillbit (org.apache.drill.exec.server.Drillbit)28 RemoteServiceSet (org.apache.drill.exec.server.RemoteServiceSet)28 SlowTest (org.apache.drill.categories.SlowTest)18 SchemaPath (org.apache.drill.common.expression.SchemaPath)11 ExecTest (org.apache.drill.exec.ExecTest)9 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)9 VectorContainer (org.apache.drill.exec.record.VectorContainer)9 MaterializedField (org.apache.drill.exec.record.MaterializedField)7 IOException (java.io.IOException)6 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)6 Stopwatch (com.google.common.base.Stopwatch)5 OperatorTest (org.apache.drill.categories.OperatorTest)5 TypeProtos (org.apache.drill.common.types.TypeProtos)5 TransferPair (org.apache.drill.exec.record.TransferPair)5