use of org.apache.parquet.schema.PrimitiveType in project drill by axbaretto.
the class ParquetMetaStatCollector method collectColStat.
@Override
public Map<SchemaPath, ColumnStatistics> collectColStat(Set<SchemaPath> fields) {
Stopwatch timer = Stopwatch.createStarted();
// map from column to ColumnMetadata
final Map<SchemaPath, Metadata.ColumnMetadata> columnMetadataMap = new HashMap<>();
// map from column name to column statistics.
final Map<SchemaPath, ColumnStatistics> statMap = new HashMap<>();
for (final Metadata.ColumnMetadata columnMetadata : columnMetadataList) {
SchemaPath schemaPath = SchemaPath.getCompoundPath(columnMetadata.getName());
columnMetadataMap.put(schemaPath, columnMetadata);
}
for (final SchemaPath field : fields) {
final PrimitiveType.PrimitiveTypeName primitiveType;
final OriginalType originalType;
final Metadata.ColumnMetadata columnMetadata = columnMetadataMap.get(field.getUnIndexed());
if (columnMetadata != null) {
final Object min = columnMetadata.getMinValue();
final Object max = columnMetadata.getMaxValue();
final Long numNull = columnMetadata.getNulls();
primitiveType = this.parquetTableMetadata.getPrimitiveType(columnMetadata.getName());
originalType = this.parquetTableMetadata.getOriginalType(columnMetadata.getName());
int precision = 0;
int scale = 0;
// ColumnTypeMetadata_v3 stores information about scale and precision
if (parquetTableMetadata instanceof Metadata.ParquetTableMetadata_v3) {
Metadata.ColumnTypeMetadata_v3 columnTypeInfo = ((Metadata.ParquetTableMetadata_v3) parquetTableMetadata).getColumnTypeInfo(columnMetadata.getName());
scale = columnTypeInfo.scale;
precision = columnTypeInfo.precision;
}
statMap.put(field, getStat(min, max, numNull, primitiveType, originalType, scale, precision));
} else {
final String columnName = field.getRootSegment().getPath();
if (implicitColValues.containsKey(columnName)) {
TypeProtos.MajorType type = Types.required(TypeProtos.MinorType.VARCHAR);
Statistics stat = new BinaryStatistics();
stat.setNumNulls(0);
byte[] val = implicitColValues.get(columnName).getBytes();
stat.setMinMaxFromBytes(val, val);
statMap.put(field, new ColumnStatistics(stat, type));
}
}
}
if (logger.isDebugEnabled()) {
logger.debug("Took {} ms to column statistics for row group", timer.elapsed(TimeUnit.MILLISECONDS));
}
return statMap;
}
use of org.apache.parquet.schema.PrimitiveType in project drill by axbaretto.
the class ParquetSchemaMerge method main.
public static void main(String[] args) {
MessageType message1;
MessageType message2;
PrimitiveType c = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT32, "c");
GroupType b = new GroupType(Repetition.REQUIRED, "b");
GroupType a = new GroupType(Repetition.OPTIONAL, "a", b);
message1 = new MessageType("root", a);
PrimitiveType c2 = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT32, "d");
GroupType b2 = new GroupType(Repetition.OPTIONAL, "b", c2);
GroupType a2 = new GroupType(Repetition.OPTIONAL, "a", b2);
message2 = new MessageType("root", a2);
MessageType message3 = message1.union(message2);
StringBuilder builder = new StringBuilder();
message3.writeToStringBuilder(builder, "");
System.out.println(builder);
}
use of org.apache.parquet.schema.PrimitiveType in project hive by apache.
the class TestETypeConverter method testGetIntConverterForFloat.
@Test
public void testGetIntConverterForFloat() throws Exception {
PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT32).named("value");
Writable writable = getWritableFromPrimitiveConverter(createHiveTypeInfo("float"), primitiveType, 22225);
FloatWritable floatWritable = (FloatWritable) writable;
assertEquals((float) 22225, (float) floatWritable.get(), 0);
}
use of org.apache.parquet.schema.PrimitiveType in project hive by apache.
the class TestETypeConverter method testGetSmallBigIntConverter.
@Test
public void testGetSmallBigIntConverter() {
Timestamp timestamp = Timestamp.valueOf("1998-10-03 09:58:31.231");
long msTime = timestamp.toEpochMilli();
ByteBuffer buf = ByteBuffer.allocate(12);
buf.order(ByteOrder.LITTLE_ENDIAN);
buf.putLong(msTime);
buf.flip();
// Need TimeStamp logicalType annotation here
PrimitiveType primitiveType = createInt64TimestampType(false, TimeUnit.MILLIS);
Writable writable = getWritableFromBinaryConverter(createHiveTypeInfo("bigint"), primitiveType, Binary.fromByteBuffer(buf));
// Retrieve as BigInt
LongWritable longWritable = (LongWritable) writable;
assertEquals(msTime, longWritable.get());
}
use of org.apache.parquet.schema.PrimitiveType in project hive by apache.
the class TestETypeConverter method testGetInt64MillisTimestampConverter.
@Test
public void testGetInt64MillisTimestampConverter() throws Exception {
Timestamp timestamp = Timestamp.valueOf("2018-07-15 15:12:20.112");
PrimitiveType primitiveType = createInt64TimestampType(false, TimeUnit.MILLIS);
Writable writable = getWritableFromPrimitiveConverter(null, primitiveType, timestamp.toEpochMilli());
TimestampWritableV2 timestampWritable = (TimestampWritableV2) writable;
assertEquals(timestamp.toEpochMilli(), timestampWritable.getTimestamp().toEpochMilli());
}
Aggregations