Search in sources :

Example 96 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project drill by apache.

the class ParquetRecordWriter method getPrimitiveType.

protected PrimitiveType getPrimitiveType(MaterializedField field) {
    MinorType minorType = field.getType().getMinorType();
    String name = field.getName();
    int length = ParquetTypeHelper.getLengthForMinorType(minorType);
    PrimitiveTypeName primitiveTypeName = ParquetTypeHelper.getPrimitiveTypeNameForMinorType(minorType);
    if (Types.isDecimalType(minorType)) {
        primitiveTypeName = logicalTypeForDecimals;
        if (usePrimitiveTypesForDecimals) {
            if (field.getPrecision() <= ParquetTypeHelper.getMaxPrecisionForPrimitiveType(PrimitiveTypeName.INT32)) {
                primitiveTypeName = PrimitiveTypeName.INT32;
            } else if (field.getPrecision() <= ParquetTypeHelper.getMaxPrecisionForPrimitiveType(PrimitiveTypeName.INT64)) {
                primitiveTypeName = PrimitiveTypeName.INT64;
            }
        }
        length = DecimalUtility.getMaxBytesSizeForPrecision(field.getPrecision());
    }
    Repetition repetition = ParquetTypeHelper.getRepetitionForDataMode(field.getDataMode());
    OriginalType originalType = ParquetTypeHelper.getOriginalTypeForMinorType(minorType);
    DecimalMetadata decimalMetadata = ParquetTypeHelper.getDecimalMetadataForField(field);
    return new PrimitiveType(repetition, primitiveTypeName, length, name, originalType, decimalMetadata, null);
}
Also used : OriginalType(org.apache.parquet.schema.OriginalType) MinorType(org.apache.drill.common.types.TypeProtos.MinorType) PrimitiveType(org.apache.parquet.schema.PrimitiveType) DecimalMetadata(org.apache.parquet.schema.DecimalMetadata) Repetition(org.apache.parquet.schema.Type.Repetition) PrimitiveTypeName(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName)

Example 97 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project drill by apache.

the class ParquetTableMetadataUtils method getRowGroupColumnStatistics.

/**
 * Converts specified {@link MetadataBase.RowGroupMetadata} into the map of {@link ColumnStatistics}
 * instances with column names as keys.
 *
 * @param tableMetadata    the source of column types
 * @param rowGroupMetadata metadata to convert
 * @return map with converted row group metadata
 */
public static Map<SchemaPath, ColumnStatistics<?>> getRowGroupColumnStatistics(MetadataBase.ParquetTableMetadataBase tableMetadata, MetadataBase.RowGroupMetadata rowGroupMetadata) {
    Map<SchemaPath, ColumnStatistics<?>> columnsStatistics = new HashMap<>();
    for (MetadataBase.ColumnMetadata column : rowGroupMetadata.getColumns()) {
        SchemaPath colPath = SchemaPath.getCompoundPath(column.getName());
        Long nulls = column.getNulls();
        if (hasInvalidStatistics(column, tableMetadata)) {
            nulls = Statistic.NO_COLUMN_STATS;
        }
        PrimitiveType.PrimitiveTypeName primitiveType = getPrimitiveTypeName(tableMetadata, column);
        OriginalType originalType = getOriginalType(tableMetadata, column);
        TypeProtos.MinorType type = ParquetReaderUtility.getMinorType(primitiveType, originalType);
        List<StatisticsHolder<?>> statistics = new ArrayList<>();
        statistics.add(new StatisticsHolder<>(getValue(column.getMinValue(), primitiveType, originalType), ColumnStatisticsKind.MIN_VALUE));
        statistics.add(new StatisticsHolder<>(getValue(column.getMaxValue(), primitiveType, originalType), ColumnStatisticsKind.MAX_VALUE));
        statistics.add(new StatisticsHolder<>(nulls, ColumnStatisticsKind.NULLS_COUNT));
        columnsStatistics.put(colPath, new ColumnStatistics<>(statistics, type));
    }
    return columnsStatistics;
}
Also used : ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) TypeProtos(org.apache.drill.common.types.TypeProtos) OriginalType(org.apache.parquet.schema.OriginalType) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetadataBase(org.apache.drill.exec.store.parquet.metadata.MetadataBase) PrimitiveType(org.apache.parquet.schema.PrimitiveType)

Example 98 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project drill by apache.

the class ParquetTableMetadataUtils method getNonInterestingColumnsMeta.

/**
 * Returns the non-interesting column's metadata
 * @param parquetTableMetadata the source of column metadata for non-interesting column's statistics
 * @return returns non-interesting columns metadata
 */
public static NonInterestingColumnsMetadata getNonInterestingColumnsMeta(MetadataBase.ParquetTableMetadataBase parquetTableMetadata) {
    Map<SchemaPath, ColumnStatistics<?>> columnsStatistics = new HashMap<>();
    if (parquetTableMetadata instanceof Metadata_V4.ParquetTableMetadata_v4) {
        Map<Metadata_V4.ColumnTypeMetadata_v4.Key, Metadata_V4.ColumnTypeMetadata_v4> columnTypeInfoMap = ((Metadata_V4.ParquetTableMetadata_v4) parquetTableMetadata).getColumnTypeInfoMap();
        if (columnTypeInfoMap == null) {
            return new NonInterestingColumnsMetadata(columnsStatistics);
        }
        for (Metadata_V4.ColumnTypeMetadata_v4 columnTypeMetadata : columnTypeInfoMap.values()) {
            if (!columnTypeMetadata.isInteresting) {
                SchemaPath schemaPath = SchemaPath.getCompoundPath(columnTypeMetadata.name);
                List<StatisticsHolder<?>> statistics = new ArrayList<>();
                statistics.add(new StatisticsHolder<>(Statistic.NO_COLUMN_STATS, ColumnStatisticsKind.NULLS_COUNT));
                PrimitiveType.PrimitiveTypeName primitiveType = columnTypeMetadata.primitiveType;
                OriginalType originalType = columnTypeMetadata.originalType;
                TypeProtos.MinorType type = ParquetReaderUtility.getMinorType(primitiveType, originalType);
                columnsStatistics.put(schemaPath, new ColumnStatistics<>(statistics, type));
            }
        }
        return new NonInterestingColumnsMetadata(columnsStatistics);
    }
    return new NonInterestingColumnsMetadata(columnsStatistics);
}
Also used : ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) NonInterestingColumnsMetadata(org.apache.drill.metastore.metadata.NonInterestingColumnsMetadata) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) TypeProtos(org.apache.drill.common.types.TypeProtos) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) OriginalType(org.apache.parquet.schema.OriginalType) Metadata_V4(org.apache.drill.exec.store.parquet.metadata.Metadata_V4) SchemaPath(org.apache.drill.common.expression.SchemaPath) PrimitiveType(org.apache.parquet.schema.PrimitiveType)

Example 99 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project drill by apache.

the class ParquetTableMetadataUtils method getColumnType.

private static TypeProtos.MajorType getColumnType(MetadataBase.ParquetTableMetadataBase parquetTableMetadata, MetadataBase.ColumnMetadata column) {
    PrimitiveType.PrimitiveTypeName primitiveType = getPrimitiveTypeName(parquetTableMetadata, column);
    OriginalType originalType = getOriginalType(parquetTableMetadata, column);
    String[] name = column.getName();
    return getColumnType(name, primitiveType, originalType, parquetTableMetadata);
}
Also used : OriginalType(org.apache.parquet.schema.OriginalType) PrimitiveType(org.apache.parquet.schema.PrimitiveType)

Example 100 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestStatistics method testSpecBuilderForFloat.

@Test
public void testSpecBuilderForFloat() {
    PrimitiveType type = Types.required(FLOAT).named("test_float");
    Statistics.Builder builder = Statistics.getBuilderForReading(type);
    Statistics<?> stats = builder.withMin(intToBytes(floatToIntBits(Float.NaN))).withMax(intToBytes(floatToIntBits(42.0f))).withNumNulls(0).build();
    assertTrue(stats.isNumNullsSet());
    assertEquals(0, stats.getNumNulls());
    assertFalse(stats.hasNonNullValue());
    builder = Statistics.getBuilderForReading(type);
    stats = builder.withMin(intToBytes(floatToIntBits(-42.0f))).withMax(intToBytes(floatToIntBits(Float.NaN))).withNumNulls(11).build();
    assertTrue(stats.isNumNullsSet());
    assertEquals(11, stats.getNumNulls());
    assertFalse(stats.hasNonNullValue());
    builder = Statistics.getBuilderForReading(type);
    stats = builder.withMin(intToBytes(floatToIntBits(Float.NaN))).withMax(intToBytes(floatToIntBits(Float.NaN))).withNumNulls(42).build();
    assertTrue(stats.isNumNullsSet());
    assertEquals(42, stats.getNumNulls());
    assertFalse(stats.hasNonNullValue());
    builder = Statistics.getBuilderForReading(type);
    stats = builder.withMin(intToBytes(floatToIntBits(0.0f))).withMax(intToBytes(floatToIntBits(42.0f))).build();
    assertEquals(0, Float.compare(-0.0f, (Float) stats.genericGetMin()));
    assertEquals(0, Float.compare(42.0f, (Float) stats.genericGetMax()));
    builder = Statistics.getBuilderForReading(type);
    stats = builder.withMin(intToBytes(floatToIntBits(-42.0f))).withMax(intToBytes(floatToIntBits(-0.0f))).build();
    assertEquals(0, Float.compare(-42.0f, (Float) stats.genericGetMin()));
    assertEquals(0, Float.compare(0.0f, (Float) stats.genericGetMax()));
    builder = Statistics.getBuilderForReading(type);
    stats = builder.withMin(intToBytes(floatToIntBits(0.0f))).withMax(intToBytes(floatToIntBits(-0.0f))).build();
    assertEquals(0, Float.compare(-0.0f, (Float) stats.genericGetMin()));
    assertEquals(0, Float.compare(0.0f, (Float) stats.genericGetMax()));
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) Test(org.junit.Test)

Aggregations

PrimitiveType (org.apache.parquet.schema.PrimitiveType)123 Test (org.junit.Test)66 MessageType (org.apache.parquet.schema.MessageType)41 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)28 BooleanWritable (org.apache.hadoop.io.BooleanWritable)28 BytesWritable (org.apache.hadoop.io.BytesWritable)28 DoubleWritable (org.apache.hadoop.io.DoubleWritable)28 FloatWritable (org.apache.hadoop.io.FloatWritable)28 IntWritable (org.apache.hadoop.io.IntWritable)28 LongWritable (org.apache.hadoop.io.LongWritable)28 Writable (org.apache.hadoop.io.Writable)28 GroupType (org.apache.parquet.schema.GroupType)25 Test (org.testng.annotations.Test)20 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)14 OriginalType (org.apache.parquet.schema.OriginalType)14 Type (org.apache.parquet.schema.Type)12 List (java.util.List)11 ColumnIndex (org.apache.parquet.internal.column.columnindex.ColumnIndex)11 ColumnIndexBuilder (org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder)11 ArrayList (java.util.ArrayList)10