use of org.apache.parquet.schema.PrimitiveType in project drill by apache.
the class ParquetRecordWriter method getPrimitiveType.
protected PrimitiveType getPrimitiveType(MaterializedField field) {
MinorType minorType = field.getType().getMinorType();
String name = field.getName();
int length = ParquetTypeHelper.getLengthForMinorType(minorType);
PrimitiveTypeName primitiveTypeName = ParquetTypeHelper.getPrimitiveTypeNameForMinorType(minorType);
if (Types.isDecimalType(minorType)) {
primitiveTypeName = logicalTypeForDecimals;
if (usePrimitiveTypesForDecimals) {
if (field.getPrecision() <= ParquetTypeHelper.getMaxPrecisionForPrimitiveType(PrimitiveTypeName.INT32)) {
primitiveTypeName = PrimitiveTypeName.INT32;
} else if (field.getPrecision() <= ParquetTypeHelper.getMaxPrecisionForPrimitiveType(PrimitiveTypeName.INT64)) {
primitiveTypeName = PrimitiveTypeName.INT64;
}
}
length = DecimalUtility.getMaxBytesSizeForPrecision(field.getPrecision());
}
Repetition repetition = ParquetTypeHelper.getRepetitionForDataMode(field.getDataMode());
OriginalType originalType = ParquetTypeHelper.getOriginalTypeForMinorType(minorType);
DecimalMetadata decimalMetadata = ParquetTypeHelper.getDecimalMetadataForField(field);
return new PrimitiveType(repetition, primitiveTypeName, length, name, originalType, decimalMetadata, null);
}
use of org.apache.parquet.schema.PrimitiveType in project drill by apache.
the class ParquetTableMetadataUtils method getRowGroupColumnStatistics.
/**
* Converts specified {@link MetadataBase.RowGroupMetadata} into the map of {@link ColumnStatistics}
* instances with column names as keys.
*
* @param tableMetadata the source of column types
* @param rowGroupMetadata metadata to convert
* @return map with converted row group metadata
*/
public static Map<SchemaPath, ColumnStatistics<?>> getRowGroupColumnStatistics(MetadataBase.ParquetTableMetadataBase tableMetadata, MetadataBase.RowGroupMetadata rowGroupMetadata) {
Map<SchemaPath, ColumnStatistics<?>> columnsStatistics = new HashMap<>();
for (MetadataBase.ColumnMetadata column : rowGroupMetadata.getColumns()) {
SchemaPath colPath = SchemaPath.getCompoundPath(column.getName());
Long nulls = column.getNulls();
if (hasInvalidStatistics(column, tableMetadata)) {
nulls = Statistic.NO_COLUMN_STATS;
}
PrimitiveType.PrimitiveTypeName primitiveType = getPrimitiveTypeName(tableMetadata, column);
OriginalType originalType = getOriginalType(tableMetadata, column);
TypeProtos.MinorType type = ParquetReaderUtility.getMinorType(primitiveType, originalType);
List<StatisticsHolder<?>> statistics = new ArrayList<>();
statistics.add(new StatisticsHolder<>(getValue(column.getMinValue(), primitiveType, originalType), ColumnStatisticsKind.MIN_VALUE));
statistics.add(new StatisticsHolder<>(getValue(column.getMaxValue(), primitiveType, originalType), ColumnStatisticsKind.MAX_VALUE));
statistics.add(new StatisticsHolder<>(nulls, ColumnStatisticsKind.NULLS_COUNT));
columnsStatistics.put(colPath, new ColumnStatistics<>(statistics, type));
}
return columnsStatistics;
}
use of org.apache.parquet.schema.PrimitiveType in project drill by apache.
the class ParquetTableMetadataUtils method getNonInterestingColumnsMeta.
/**
* Returns the non-interesting column's metadata
* @param parquetTableMetadata the source of column metadata for non-interesting column's statistics
* @return returns non-interesting columns metadata
*/
public static NonInterestingColumnsMetadata getNonInterestingColumnsMeta(MetadataBase.ParquetTableMetadataBase parquetTableMetadata) {
Map<SchemaPath, ColumnStatistics<?>> columnsStatistics = new HashMap<>();
if (parquetTableMetadata instanceof Metadata_V4.ParquetTableMetadata_v4) {
Map<Metadata_V4.ColumnTypeMetadata_v4.Key, Metadata_V4.ColumnTypeMetadata_v4> columnTypeInfoMap = ((Metadata_V4.ParquetTableMetadata_v4) parquetTableMetadata).getColumnTypeInfoMap();
if (columnTypeInfoMap == null) {
return new NonInterestingColumnsMetadata(columnsStatistics);
}
for (Metadata_V4.ColumnTypeMetadata_v4 columnTypeMetadata : columnTypeInfoMap.values()) {
if (!columnTypeMetadata.isInteresting) {
SchemaPath schemaPath = SchemaPath.getCompoundPath(columnTypeMetadata.name);
List<StatisticsHolder<?>> statistics = new ArrayList<>();
statistics.add(new StatisticsHolder<>(Statistic.NO_COLUMN_STATS, ColumnStatisticsKind.NULLS_COUNT));
PrimitiveType.PrimitiveTypeName primitiveType = columnTypeMetadata.primitiveType;
OriginalType originalType = columnTypeMetadata.originalType;
TypeProtos.MinorType type = ParquetReaderUtility.getMinorType(primitiveType, originalType);
columnsStatistics.put(schemaPath, new ColumnStatistics<>(statistics, type));
}
}
return new NonInterestingColumnsMetadata(columnsStatistics);
}
return new NonInterestingColumnsMetadata(columnsStatistics);
}
use of org.apache.parquet.schema.PrimitiveType in project drill by apache.
the class ParquetTableMetadataUtils method getColumnType.
private static TypeProtos.MajorType getColumnType(MetadataBase.ParquetTableMetadataBase parquetTableMetadata, MetadataBase.ColumnMetadata column) {
PrimitiveType.PrimitiveTypeName primitiveType = getPrimitiveTypeName(parquetTableMetadata, column);
OriginalType originalType = getOriginalType(parquetTableMetadata, column);
String[] name = column.getName();
return getColumnType(name, primitiveType, originalType, parquetTableMetadata);
}
use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.
the class TestStatistics method testSpecBuilderForFloat.
@Test
public void testSpecBuilderForFloat() {
PrimitiveType type = Types.required(FLOAT).named("test_float");
Statistics.Builder builder = Statistics.getBuilderForReading(type);
Statistics<?> stats = builder.withMin(intToBytes(floatToIntBits(Float.NaN))).withMax(intToBytes(floatToIntBits(42.0f))).withNumNulls(0).build();
assertTrue(stats.isNumNullsSet());
assertEquals(0, stats.getNumNulls());
assertFalse(stats.hasNonNullValue());
builder = Statistics.getBuilderForReading(type);
stats = builder.withMin(intToBytes(floatToIntBits(-42.0f))).withMax(intToBytes(floatToIntBits(Float.NaN))).withNumNulls(11).build();
assertTrue(stats.isNumNullsSet());
assertEquals(11, stats.getNumNulls());
assertFalse(stats.hasNonNullValue());
builder = Statistics.getBuilderForReading(type);
stats = builder.withMin(intToBytes(floatToIntBits(Float.NaN))).withMax(intToBytes(floatToIntBits(Float.NaN))).withNumNulls(42).build();
assertTrue(stats.isNumNullsSet());
assertEquals(42, stats.getNumNulls());
assertFalse(stats.hasNonNullValue());
builder = Statistics.getBuilderForReading(type);
stats = builder.withMin(intToBytes(floatToIntBits(0.0f))).withMax(intToBytes(floatToIntBits(42.0f))).build();
assertEquals(0, Float.compare(-0.0f, (Float) stats.genericGetMin()));
assertEquals(0, Float.compare(42.0f, (Float) stats.genericGetMax()));
builder = Statistics.getBuilderForReading(type);
stats = builder.withMin(intToBytes(floatToIntBits(-42.0f))).withMax(intToBytes(floatToIntBits(-0.0f))).build();
assertEquals(0, Float.compare(-42.0f, (Float) stats.genericGetMin()));
assertEquals(0, Float.compare(0.0f, (Float) stats.genericGetMax()));
builder = Statistics.getBuilderForReading(type);
stats = builder.withMin(intToBytes(floatToIntBits(0.0f))).withMax(intToBytes(floatToIntBits(-0.0f))).build();
assertEquals(0, Float.compare(-0.0f, (Float) stats.genericGetMin()));
assertEquals(0, Float.compare(0.0f, (Float) stats.genericGetMax()));
}
Aggregations