Search in sources :

Example 31 with MinorType

use of org.apache.drill.common.types.TypeProtos.MinorType in project drill by apache.

the class MetadataHandlerBatch method getResultSetLoaderForMetadata.

private ResultSetLoader getResultSetLoaderForMetadata(BaseMetadata baseMetadata) {
    SchemaBuilder schemaBuilder = new SchemaBuilder().addNullable(MetastoreAnalyzeConstants.LOCATION_FIELD, MinorType.VARCHAR);
    for (String segmentColumn : popConfig.getContext().segmentColumns()) {
        schemaBuilder.addNullable(segmentColumn, MinorType.VARCHAR);
    }
    baseMetadata.getColumnsStatistics().entrySet().stream().sorted(Comparator.comparing(e -> e.getKey().getRootSegmentPath())).forEach(entry -> {
        for (StatisticsKind<?> statisticsKind : AnalyzeColumnUtils.COLUMN_STATISTICS_FUNCTIONS.keySet()) {
            MinorType type = AnalyzeColumnUtils.COLUMN_STATISTICS_TYPES.get(statisticsKind);
            type = type != null ? type : entry.getValue().getComparatorType();
            schemaBuilder.addNullable(AnalyzeColumnUtils.getColumnStatisticsFieldName(entry.getKey().getRootSegmentPath(), statisticsKind), type);
        }
    });
    for (StatisticsKind<?> statisticsKind : AnalyzeColumnUtils.META_STATISTICS_FUNCTIONS.keySet()) {
        schemaBuilder.addNullable(AnalyzeColumnUtils.getMetadataStatisticsFieldName(statisticsKind), AnalyzeColumnUtils.COLUMN_STATISTICS_TYPES.get(statisticsKind));
    }
    schemaBuilder.addMapArray(MetastoreAnalyzeConstants.COLLECTED_MAP_FIELD).resumeSchema();
    if (metadataType == MetadataType.SEGMENT) {
        schemaBuilder.addArray(MetastoreAnalyzeConstants.LOCATIONS_FIELD, MinorType.VARCHAR);
    }
    if (metadataType == MetadataType.ROW_GROUP) {
        schemaBuilder.addNullable(columnNamesOptions.rowGroupIndex(), MinorType.VARCHAR);
        schemaBuilder.addNullable(columnNamesOptions.rowGroupStart(), MinorType.VARCHAR);
        schemaBuilder.addNullable(columnNamesOptions.rowGroupLength(), MinorType.VARCHAR);
    }
    schemaBuilder.addNullable(MetastoreAnalyzeConstants.SCHEMA_FIELD, MinorType.VARCHAR).addNullable(columnNamesOptions.lastModifiedTime(), MinorType.VARCHAR).add(MetastoreAnalyzeConstants.METADATA_TYPE, MinorType.VARCHAR);
    ResultSetLoaderImpl.ResultSetOptions options = new ResultSetOptionBuilder().readerSchema(schemaBuilder.buildSchema()).build();
    return new ResultSetLoaderImpl(container.getAllocator(), options);
}
Also used : ResultSetOptionBuilder(org.apache.drill.exec.physical.resultSet.impl.ResultSetOptionBuilder) ResultSetLoaderImpl(org.apache.drill.exec.physical.resultSet.impl.ResultSetLoaderImpl) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) MinorType(org.apache.drill.common.types.TypeProtos.MinorType)

Example 32 with MinorType

use of org.apache.drill.common.types.TypeProtos.MinorType in project drill by apache.

the class TestDrillbitResilience method assertDrillbitsOk.

/**
 * Check that all the drillbits are ok.
 * <p/>
 * <p>The current implementation does this by counting the number of drillbits using a query.
 */
private static void assertDrillbitsOk() {
    SingleRowListener listener = new SingleRowListener() {

        private final BufferAllocator bufferAllocator = RootAllocatorFactory.newRoot(cluster.config());

        private final RecordBatchLoader loader = new RecordBatchLoader(bufferAllocator);

        @Override
        public void rowArrived(QueryDataBatch queryResultBatch) {
            // load the single record
            final QueryData queryData = queryResultBatch.getHeader();
            loader.load(queryData.getDef(), queryResultBatch.getData());
            assertEquals(1, loader.getRecordCount());
            // there should only be one column
            final BatchSchema batchSchema = loader.getSchema();
            assertEquals(1, batchSchema.getFieldCount());
            // the column should be an integer
            final MaterializedField countField = batchSchema.getColumn(0);
            final MinorType fieldType = countField.getType().getMinorType();
            assertEquals(MinorType.BIGINT, fieldType);
            // get the column value
            final VectorWrapper<?> vw = loader.iterator().next();
            final Object obj = vw.getValueVector().getAccessor().getObject(0);
            assertTrue(obj instanceof Long);
            final Long countValue = (Long) obj;
            // assume this means all the drillbits are still ok
            assertEquals(cluster.drillbits().size(), countValue.intValue());
            loader.clear();
        }

        @Override
        public void cleanup() {
            loader.clear();
            DrillAutoCloseables.closeNoChecked(bufferAllocator);
        }
    };
    try {
        QueryTestUtil.testWithListener(client.client(), QueryType.SQL, "select count(*) from sys.memory", listener);
        listener.waitForCompletion();
        QueryState state = listener.getQueryState();
        assertSame(state, QueryState.COMPLETED, () -> String.format("QueryState should be COMPLETED (and not %s).", state));
        assertTrue(listener.getErrorList().isEmpty(), "There should not be any errors when checking if Drillbits are OK");
    } catch (final Exception e) {
        throw new RuntimeException("Couldn't query active drillbits", e);
    } finally {
        logger.debug("Cleanup listener");
        listener.cleanup();
    }
    logger.debug("Drillbits are ok.");
}
Also used : SingleRowListener(org.apache.drill.SingleRowListener) QueryData(org.apache.drill.exec.proto.UserBitShared.QueryData) RecordBatchLoader(org.apache.drill.exec.record.RecordBatchLoader) MaterializedField(org.apache.drill.exec.record.MaterializedField) QueryState(org.apache.drill.exec.proto.UserBitShared.QueryResult.QueryState) UserException(org.apache.drill.common.exceptions.UserException) RpcException(org.apache.drill.exec.rpc.RpcException) ForemanSetupException(org.apache.drill.exec.work.foreman.ForemanSetupException) ForemanException(org.apache.drill.exec.work.foreman.ForemanException) TestInstantiationException(org.junit.jupiter.api.extension.TestInstantiationException) IOException(java.io.IOException) BufferAllocator(org.apache.drill.exec.memory.BufferAllocator) QueryDataBatch(org.apache.drill.exec.rpc.user.QueryDataBatch) BatchSchema(org.apache.drill.exec.record.BatchSchema) MinorType(org.apache.drill.common.types.TypeProtos.MinorType)

Example 33 with MinorType

use of org.apache.drill.common.types.TypeProtos.MinorType in project drill by apache.

the class UnionWriterImpl method addMember.

/**
 * Add a column writer to an existing union writer. Used for implementations
 * that support "live" schema evolution: column discovery while writing.
 * The corresponding metadata must already have been added to the schema.
 * Called by the shim's <tt>addMember</tt> to do writer-level tasks.
 *
 * @param writer the column writer to add
 */
protected void addMember(AbstractObjectWriter writer) {
    final MinorType type = writer.schema().type();
    if (!variantSchema().hasType(type)) {
        variantSchema().addType(writer.schema());
    }
    writer.events().bindIndex(index);
    if (state != State.IDLE) {
        writer.events().startWrite();
        if (state == State.IN_ROW) {
            writer.events().startRow();
        }
    }
}
Also used : MinorType(org.apache.drill.common.types.TypeProtos.MinorType)

Example 34 with MinorType

use of org.apache.drill.common.types.TypeProtos.MinorType in project drill by apache.

the class ColumnReaderFactory method buildColumnReader.

public static BaseScalarReader buildColumnReader(VectorAccessor va) {
    MajorType major = va.type();
    MinorType type = major.getMinorType();
    switch(type) {
        case GENERIC_OBJECT:
        case LATE:
        case NULL:
        case LIST:
        case MAP:
        case DICT:
            throw new UnsupportedOperationException(type.toString());
        default:
            return newAccessor(type, requiredReaders);
    }
}
Also used : MajorType(org.apache.drill.common.types.TypeProtos.MajorType) MinorType(org.apache.drill.common.types.TypeProtos.MinorType)

Example 35 with MinorType

use of org.apache.drill.common.types.TypeProtos.MinorType in project drill by apache.

the class HDF5BatchReader method projectDataset.

/**
 * Writes one row of data in a metadata query. The number of dimensions here
 * is n+1. So if the actual dataset is a 1D column, it will be written as a list.
 * This is function is only called in metadata queries as the schema is not
 * known in advance. If the datasize is greater than 16MB, the function does
 * not project the dataset
 *
 * @param rowWriter
 *          The rowWriter to which the data will be written
 * @param datapath
 *          The datapath from which the data will be read
 */
private void projectDataset(RowSetLoader rowWriter, String datapath) {
    String fieldName = HDF5Utils.getNameFromPath(datapath);
    Dataset dataset = hdfFile.getDatasetByPath(datapath);
    // If the dataset is larger than 16MB, do not project the dataset
    if (dataset.getSizeInBytes() > MAX_DATASET_SIZE) {
        logger.warn("Dataset {} is greater than 16MB.  Data will be truncated in Metadata view.", datapath);
    }
    int[] dimensions = dataset.getDimensions();
    // Case for single dimensional data
    if (dimensions.length == 1) {
        MinorType currentDataType = HDF5Utils.getDataType(dataset.getDataType());
        Object data;
        try {
            data = dataset.getData();
        } catch (Exception e) {
            logger.debug("Error reading {}", datapath);
            return;
        }
        assert currentDataType != null;
        // Skip null datasets
        if (data == null) {
            return;
        }
        switch(currentDataType) {
            case GENERIC_OBJECT:
                logger.warn("Couldn't read {}", datapath);
                break;
            case VARCHAR:
                String[] stringData = (String[]) data;
                writeStringListColumn(rowWriter, fieldName, stringData);
                break;
            case TIMESTAMP:
                long[] longList = (long[]) data;
                writeTimestampListColumn(rowWriter, fieldName, longList);
                break;
            case INT:
                int[] intList = (int[]) data;
                writeIntListColumn(rowWriter, fieldName, intList);
                break;
            case SMALLINT:
                short[] shortList = (short[]) data;
                writeSmallIntColumn(rowWriter, fieldName, shortList);
                break;
            case TINYINT:
                byte[] byteList = (byte[]) data;
                writeByteListColumn(rowWriter, fieldName, byteList);
                break;
            case FLOAT4:
                float[] tempFloatList = (float[]) data;
                writeFloat4ListColumn(rowWriter, fieldName, tempFloatList);
                break;
            case FLOAT8:
                double[] tempDoubleList = (double[]) data;
                writeFloat8ListColumn(rowWriter, fieldName, tempDoubleList);
                break;
            case BIGINT:
                long[] tempBigIntList = (long[]) data;
                writeLongListColumn(rowWriter, fieldName, tempBigIntList);
                break;
            case MAP:
                try {
                    getAndMapCompoundData(datapath, hdfFile, rowWriter);
                } catch (Exception e) {
                    throw UserException.dataReadError().message("Error writing Compound Field: " + e.getMessage()).addContext(errorContext).build(logger);
                }
                break;
            default:
                // Case for data types that cannot be read
                logger.warn("{} not implemented.", currentDataType.name());
        }
    } else if (dimensions.length == 2) {
        // Case for 2D data sets.  These are projected as lists of lists or maps of maps
        int cols = dimensions[1];
        int rows = dimensions[0];
        // TODO Add Boolean, Small and TinyInt data types
        switch(HDF5Utils.getDataType(dataset.getDataType())) {
            case INT:
                int[][] colData = (int[][]) dataset.getData();
                mapIntMatrixField(colData, cols, rows, rowWriter);
                break;
            case FLOAT4:
                float[][] floatData = (float[][]) dataset.getData();
                mapFloatMatrixField(floatData, cols, rows, rowWriter);
                break;
            case FLOAT8:
                double[][] doubleData = (double[][]) dataset.getData();
                mapDoubleMatrixField(doubleData, cols, rows, rowWriter);
                break;
            case BIGINT:
                long[][] longData = (long[][]) dataset.getData();
                mapBigIntMatrixField(longData, cols, rows, rowWriter);
                break;
            default:
                logger.warn("{} not implemented.", HDF5Utils.getDataType(dataset.getDataType()));
        }
    } else if (dimensions.length > 2) {
        // Case for data sets with dimensions > 2
        int cols = dimensions[1];
        int rows = dimensions[0];
        switch(HDF5Utils.getDataType(dataset.getDataType())) {
            case INT:
                int[][] intMatrix = HDF5Utils.toIntMatrix((Object[]) dataset.getData());
                mapIntMatrixField(intMatrix, cols, rows, rowWriter);
                break;
            case FLOAT4:
                float[][] floatData = HDF5Utils.toFloatMatrix((Object[]) dataset.getData());
                mapFloatMatrixField(floatData, cols, rows, rowWriter);
                break;
            case FLOAT8:
                double[][] doubleData = HDF5Utils.toDoubleMatrix((Object[]) dataset.getData());
                mapDoubleMatrixField(doubleData, cols, rows, rowWriter);
                break;
            case BIGINT:
                long[][] longData = HDF5Utils.toLongMatrix((Object[]) dataset.getData());
                mapBigIntMatrixField(longData, cols, rows, rowWriter);
                break;
            default:
                logger.warn("{} not implemented.", HDF5Utils.getDataType(dataset.getDataType()));
        }
    }
}
Also used : Dataset(io.jhdf.api.Dataset) UserException(org.apache.drill.common.exceptions.UserException) HdfException(io.jhdf.exceptions.HdfException) IOException(java.io.IOException) MinorType(org.apache.drill.common.types.TypeProtos.MinorType)

Aggregations

MinorType (org.apache.drill.common.types.TypeProtos.MinorType)86 MajorType (org.apache.drill.common.types.TypeProtos.MajorType)32 MaterializedField (org.apache.drill.exec.record.MaterializedField)17 ValueVector (org.apache.drill.exec.vector.ValueVector)11 DataMode (org.apache.drill.common.types.TypeProtos.DataMode)10 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)8 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)7 SubOperatorTest (org.apache.drill.test.SubOperatorTest)6 Test (org.junit.Test)6 ImmutableList (com.google.common.collect.ImmutableList)5 SchemaPath (org.apache.drill.common.expression.SchemaPath)5 ValueHolder (org.apache.drill.exec.expr.holders.ValueHolder)5 IOException (java.io.IOException)4 UserException (org.apache.drill.common.exceptions.UserException)4 OriginalType (org.apache.parquet.schema.OriginalType)4 PrimitiveType (org.apache.parquet.schema.PrimitiveType)4 SQLException (java.sql.SQLException)3 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)3 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)3 ExtendableRowSet (org.apache.drill.exec.physical.rowSet.RowSet.ExtendableRowSet)3