Search in sources :

Example 66 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project drill by axbaretto.

the class ParquetMetaStatCollector method collectColStat.

@Override
public Map<SchemaPath, ColumnStatistics> collectColStat(Set<SchemaPath> fields) {
    Stopwatch timer = Stopwatch.createStarted();
    // map from column to ColumnMetadata
    final Map<SchemaPath, Metadata.ColumnMetadata> columnMetadataMap = new HashMap<>();
    // map from column name to column statistics.
    final Map<SchemaPath, ColumnStatistics> statMap = new HashMap<>();
    for (final Metadata.ColumnMetadata columnMetadata : columnMetadataList) {
        SchemaPath schemaPath = SchemaPath.getCompoundPath(columnMetadata.getName());
        columnMetadataMap.put(schemaPath, columnMetadata);
    }
    for (final SchemaPath field : fields) {
        final PrimitiveType.PrimitiveTypeName primitiveType;
        final OriginalType originalType;
        final Metadata.ColumnMetadata columnMetadata = columnMetadataMap.get(field.getUnIndexed());
        if (columnMetadata != null) {
            final Object min = columnMetadata.getMinValue();
            final Object max = columnMetadata.getMaxValue();
            final Long numNull = columnMetadata.getNulls();
            primitiveType = this.parquetTableMetadata.getPrimitiveType(columnMetadata.getName());
            originalType = this.parquetTableMetadata.getOriginalType(columnMetadata.getName());
            int precision = 0;
            int scale = 0;
            // ColumnTypeMetadata_v3 stores information about scale and precision
            if (parquetTableMetadata instanceof Metadata.ParquetTableMetadata_v3) {
                Metadata.ColumnTypeMetadata_v3 columnTypeInfo = ((Metadata.ParquetTableMetadata_v3) parquetTableMetadata).getColumnTypeInfo(columnMetadata.getName());
                scale = columnTypeInfo.scale;
                precision = columnTypeInfo.precision;
            }
            statMap.put(field, getStat(min, max, numNull, primitiveType, originalType, scale, precision));
        } else {
            final String columnName = field.getRootSegment().getPath();
            if (implicitColValues.containsKey(columnName)) {
                TypeProtos.MajorType type = Types.required(TypeProtos.MinorType.VARCHAR);
                Statistics stat = new BinaryStatistics();
                stat.setNumNulls(0);
                byte[] val = implicitColValues.get(columnName).getBytes();
                stat.setMinMaxFromBytes(val, val);
                statMap.put(field, new ColumnStatistics(stat, type));
            }
        }
    }
    if (logger.isDebugEnabled()) {
        logger.debug("Took {} ms to column statistics for row group", timer.elapsed(TimeUnit.MILLISECONDS));
    }
    return statMap;
}
Also used : HashMap(java.util.HashMap) Stopwatch(com.google.common.base.Stopwatch) Metadata(org.apache.drill.exec.store.parquet.Metadata) BinaryStatistics(org.apache.parquet.column.statistics.BinaryStatistics) TypeProtos(org.apache.drill.common.types.TypeProtos) BinaryStatistics(org.apache.parquet.column.statistics.BinaryStatistics) FloatStatistics(org.apache.parquet.column.statistics.FloatStatistics) Statistics(org.apache.parquet.column.statistics.Statistics) IntStatistics(org.apache.parquet.column.statistics.IntStatistics) DoubleStatistics(org.apache.parquet.column.statistics.DoubleStatistics) LongStatistics(org.apache.parquet.column.statistics.LongStatistics) BooleanStatistics(org.apache.parquet.column.statistics.BooleanStatistics) OriginalType(org.apache.parquet.schema.OriginalType) SchemaPath(org.apache.drill.common.expression.SchemaPath) PrimitiveType(org.apache.parquet.schema.PrimitiveType)

Example 67 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project drill by axbaretto.

the class ParquetSchemaMerge method main.

public static void main(String[] args) {
    MessageType message1;
    MessageType message2;
    PrimitiveType c = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT32, "c");
    GroupType b = new GroupType(Repetition.REQUIRED, "b");
    GroupType a = new GroupType(Repetition.OPTIONAL, "a", b);
    message1 = new MessageType("root", a);
    PrimitiveType c2 = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT32, "d");
    GroupType b2 = new GroupType(Repetition.OPTIONAL, "b", c2);
    GroupType a2 = new GroupType(Repetition.OPTIONAL, "a", b2);
    message2 = new MessageType("root", a2);
    MessageType message3 = message1.union(message2);
    StringBuilder builder = new StringBuilder();
    message3.writeToStringBuilder(builder, "");
    System.out.println(builder);
}
Also used : GroupType(org.apache.parquet.schema.GroupType) PrimitiveType(org.apache.parquet.schema.PrimitiveType) MessageType(org.apache.parquet.schema.MessageType)

Example 68 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project hive by apache.

the class TestETypeConverter method testGetIntConverterForFloat.

@Test
public void testGetIntConverterForFloat() throws Exception {
    PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT32).named("value");
    Writable writable = getWritableFromPrimitiveConverter(createHiveTypeInfo("float"), primitiveType, 22225);
    FloatWritable floatWritable = (FloatWritable) writable;
    assertEquals((float) 22225, (float) floatWritable.get(), 0);
}
Also used : FloatWritable(org.apache.hadoop.io.FloatWritable) Writable(org.apache.hadoop.io.Writable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) LongWritable(org.apache.hadoop.io.LongWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) PrimitiveType(org.apache.parquet.schema.PrimitiveType) Test(org.junit.Test)

Example 69 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project hive by apache.

the class TestETypeConverter method testGetSmallBigIntConverter.

@Test
public void testGetSmallBigIntConverter() {
    Timestamp timestamp = Timestamp.valueOf("1998-10-03 09:58:31.231");
    long msTime = timestamp.toEpochMilli();
    ByteBuffer buf = ByteBuffer.allocate(12);
    buf.order(ByteOrder.LITTLE_ENDIAN);
    buf.putLong(msTime);
    buf.flip();
    // Need TimeStamp logicalType annotation here
    PrimitiveType primitiveType = createInt64TimestampType(false, TimeUnit.MILLIS);
    Writable writable = getWritableFromBinaryConverter(createHiveTypeInfo("bigint"), primitiveType, Binary.fromByteBuffer(buf));
    // Retrieve as BigInt
    LongWritable longWritable = (LongWritable) writable;
    assertEquals(msTime, longWritable.get());
}
Also used : Writable(org.apache.hadoop.io.Writable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) LongWritable(org.apache.hadoop.io.LongWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) PrimitiveType(org.apache.parquet.schema.PrimitiveType) LongWritable(org.apache.hadoop.io.LongWritable) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test)

Example 70 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project hive by apache.

the class TestETypeConverter method testGetInt64MillisTimestampConverter.

@Test
public void testGetInt64MillisTimestampConverter() throws Exception {
    Timestamp timestamp = Timestamp.valueOf("2018-07-15 15:12:20.112");
    PrimitiveType primitiveType = createInt64TimestampType(false, TimeUnit.MILLIS);
    Writable writable = getWritableFromPrimitiveConverter(null, primitiveType, timestamp.toEpochMilli());
    TimestampWritableV2 timestampWritable = (TimestampWritableV2) writable;
    assertEquals(timestamp.toEpochMilli(), timestampWritable.getTimestamp().toEpochMilli());
}
Also used : Writable(org.apache.hadoop.io.Writable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) LongWritable(org.apache.hadoop.io.LongWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) PrimitiveType(org.apache.parquet.schema.PrimitiveType) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) Test(org.junit.Test)

Aggregations

PrimitiveType (org.apache.parquet.schema.PrimitiveType)123 Test (org.junit.Test)66 MessageType (org.apache.parquet.schema.MessageType)41 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)28 BooleanWritable (org.apache.hadoop.io.BooleanWritable)28 BytesWritable (org.apache.hadoop.io.BytesWritable)28 DoubleWritable (org.apache.hadoop.io.DoubleWritable)28 FloatWritable (org.apache.hadoop.io.FloatWritable)28 IntWritable (org.apache.hadoop.io.IntWritable)28 LongWritable (org.apache.hadoop.io.LongWritable)28 Writable (org.apache.hadoop.io.Writable)28 GroupType (org.apache.parquet.schema.GroupType)25 Test (org.testng.annotations.Test)20 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)14 OriginalType (org.apache.parquet.schema.OriginalType)14 Type (org.apache.parquet.schema.Type)12 List (java.util.List)11 ColumnIndex (org.apache.parquet.internal.column.columnindex.ColumnIndex)11 ColumnIndexBuilder (org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder)11 ArrayList (java.util.ArrayList)10