Search in sources :

Example 6 with ColumnTypeMetadata_v4

use of org.apache.drill.exec.store.parquet.metadata.Metadata_V4.ColumnTypeMetadata_v4 in project drill by apache.

the class ParquetTableMetadataUtils method getRowGroupFields.

/**
 * Returns map of column names with their drill types for specified {@code rowGroup}.
 *
 * @param parquetTableMetadata the source of primitive and original column types
 * @param rowGroup             row group whose columns should be discovered
 * @return map of column names with their drill types
 */
public static Map<SchemaPath, TypeProtos.MajorType> getRowGroupFields(MetadataBase.ParquetTableMetadataBase parquetTableMetadata, MetadataBase.RowGroupMetadata rowGroup) {
    Map<SchemaPath, TypeProtos.MajorType> columns = new LinkedHashMap<>();
    if (new MetadataVersion(parquetTableMetadata.getMetadataVersion()).isHigherThan(4, 0) && !((Metadata_V4.ParquetTableMetadata_v4) parquetTableMetadata).isAllColumnsInteresting()) {
        // adds non-interesting fields from table metadata
        for (MetadataBase.ColumnTypeMetadata columnTypeMetadata : parquetTableMetadata.getColumnTypeInfoList()) {
            Metadata_V4.ColumnTypeMetadata_v4 metadata = (Metadata_V4.ColumnTypeMetadata_v4) columnTypeMetadata;
            if (!metadata.isInteresting) {
                TypeProtos.MajorType columnType = getColumnType(metadata.name, metadata.primitiveType, metadata.originalType, parquetTableMetadata);
                SchemaPath columnPath = SchemaPath.getCompoundPath(metadata.name);
                putType(columns, columnPath, columnType);
            }
        }
    }
    for (MetadataBase.ColumnMetadata column : rowGroup.getColumns()) {
        TypeProtos.MajorType columnType = getColumnType(parquetTableMetadata, column);
        SchemaPath columnPath = SchemaPath.getCompoundPath(column.getName());
        putType(columns, columnPath, columnType);
    }
    return columns;
}
Also used : TypeProtos(org.apache.drill.common.types.TypeProtos) LinkedHashMap(java.util.LinkedHashMap) MetadataVersion(org.apache.drill.exec.store.parquet.metadata.MetadataVersion) Metadata_V4(org.apache.drill.exec.store.parquet.metadata.Metadata_V4) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetadataBase(org.apache.drill.exec.store.parquet.metadata.MetadataBase)

Example 7 with ColumnTypeMetadata_v4

use of org.apache.drill.exec.store.parquet.metadata.Metadata_V4.ColumnTypeMetadata_v4 in project drill by apache.

the class ParquetTableMetadataUtils method getNonInterestingColumnsMeta.

/**
 * Returns the non-interesting column's metadata
 * @param parquetTableMetadata the source of column metadata for non-interesting column's statistics
 * @return returns non-interesting columns metadata
 */
public static NonInterestingColumnsMetadata getNonInterestingColumnsMeta(MetadataBase.ParquetTableMetadataBase parquetTableMetadata) {
    Map<SchemaPath, ColumnStatistics<?>> columnsStatistics = new HashMap<>();
    if (parquetTableMetadata instanceof Metadata_V4.ParquetTableMetadata_v4) {
        Map<Metadata_V4.ColumnTypeMetadata_v4.Key, Metadata_V4.ColumnTypeMetadata_v4> columnTypeInfoMap = ((Metadata_V4.ParquetTableMetadata_v4) parquetTableMetadata).getColumnTypeInfoMap();
        if (columnTypeInfoMap == null) {
            return new NonInterestingColumnsMetadata(columnsStatistics);
        }
        for (Metadata_V4.ColumnTypeMetadata_v4 columnTypeMetadata : columnTypeInfoMap.values()) {
            if (!columnTypeMetadata.isInteresting) {
                SchemaPath schemaPath = SchemaPath.getCompoundPath(columnTypeMetadata.name);
                List<StatisticsHolder<?>> statistics = new ArrayList<>();
                statistics.add(new StatisticsHolder<>(Statistic.NO_COLUMN_STATS, ColumnStatisticsKind.NULLS_COUNT));
                PrimitiveType.PrimitiveTypeName primitiveType = columnTypeMetadata.primitiveType;
                OriginalType originalType = columnTypeMetadata.originalType;
                TypeProtos.MinorType type = ParquetReaderUtility.getMinorType(primitiveType, originalType);
                columnsStatistics.put(schemaPath, new ColumnStatistics<>(statistics, type));
            }
        }
        return new NonInterestingColumnsMetadata(columnsStatistics);
    }
    return new NonInterestingColumnsMetadata(columnsStatistics);
}
Also used : ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) NonInterestingColumnsMetadata(org.apache.drill.metastore.metadata.NonInterestingColumnsMetadata) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) TypeProtos(org.apache.drill.common.types.TypeProtos) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) OriginalType(org.apache.parquet.schema.OriginalType) Metadata_V4(org.apache.drill.exec.store.parquet.metadata.Metadata_V4) SchemaPath(org.apache.drill.common.expression.SchemaPath) PrimitiveType(org.apache.parquet.schema.PrimitiveType)

Aggregations

SchemaPath (org.apache.drill.common.expression.SchemaPath)7 LinkedHashMap (java.util.LinkedHashMap)5 Metadata_V4 (org.apache.drill.exec.store.parquet.metadata.Metadata_V4)4 TypeProtos (org.apache.drill.common.types.TypeProtos)3 ColumnTypeMetadata_v4 (org.apache.drill.exec.store.parquet.metadata.Metadata_V4.ColumnTypeMetadata_v4)3 MetadataSummary (org.apache.drill.exec.store.parquet.metadata.Metadata_V4.MetadataSummary)3 ParquetTableMetadata_v4 (org.apache.drill.exec.store.parquet.metadata.Metadata_V4.ParquetTableMetadata_v4)3 Path (org.apache.hadoop.fs.Path)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 SimpleModule (com.fasterxml.jackson.databind.module.SimpleModule)2 AfterburnerModule (com.fasterxml.jackson.module.afterburner.AfterburnerModule)2 IOException (java.io.IOException)2 InputStream (java.io.InputStream)2 MetadataBase (org.apache.drill.exec.store.parquet.metadata.MetadataBase)2 ParquetFileMetadata (org.apache.drill.exec.store.parquet.metadata.MetadataBase.ParquetFileMetadata)2 MetadataVersion (org.apache.drill.exec.store.parquet.metadata.MetadataVersion)2 FileMetadata (org.apache.drill.exec.store.parquet.metadata.Metadata_V4.FileMetadata)2 ParquetFileAndRowCountMetadata (org.apache.drill.exec.store.parquet.metadata.Metadata_V4.ParquetFileAndRowCountMetadata)2 Stopwatch (org.apache.drill.shaded.guava.com.google.common.base.Stopwatch)2 OriginalType (org.apache.parquet.schema.OriginalType)2