use of org.apache.parquet.schema.PrimitiveType in project drill by apache.
the class Metadata method getColTypeInfo.
private ColTypeInfo getColTypeInfo(MessageType schema, Type type, String[] path, int depth) {
if (type.isPrimitive()) {
PrimitiveType primitiveType = (PrimitiveType) type;
int precision = 0;
int scale = 0;
if (primitiveType.getDecimalMetadata() != null) {
precision = primitiveType.getDecimalMetadata().getPrecision();
scale = primitiveType.getDecimalMetadata().getScale();
}
int repetitionLevel = schema.getMaxRepetitionLevel(path);
int definitionLevel = schema.getMaxDefinitionLevel(path);
return new ColTypeInfo(type.getOriginalType(), precision, scale, repetitionLevel, definitionLevel);
}
Type t = ((GroupType) type).getType(path[depth]);
return getColTypeInfo(schema, t, path, depth + 1);
}
use of org.apache.parquet.schema.PrimitiveType in project drill by apache.
the class ParquetMetaStatCollector method collectColStat.
@Override
public Map<SchemaPath, ColumnStatistics> collectColStat(Set<SchemaPath> fields) {
Stopwatch timer = Stopwatch.createStarted();
// map from column to ColumnMetadata
final Map<SchemaPath, Metadata.ColumnMetadata> columnMetadataMap = new HashMap<>();
// map from column name to column statistics.
final Map<SchemaPath, ColumnStatistics> statMap = new HashMap<>();
for (final Metadata.ColumnMetadata columnMetadata : columnMetadataList) {
SchemaPath schemaPath = SchemaPath.getCompoundPath(columnMetadata.getName());
columnMetadataMap.put(schemaPath, columnMetadata);
}
for (final SchemaPath schemaPath : fields) {
final PrimitiveType.PrimitiveTypeName primitiveType;
final OriginalType originalType;
final Metadata.ColumnMetadata columnMetadata = columnMetadataMap.get(schemaPath);
if (columnMetadata != null) {
final Object min = columnMetadata.getMinValue();
final Object max = columnMetadata.getMaxValue();
final Long numNull = columnMetadata.getNulls();
primitiveType = this.parquetTableMetadata.getPrimitiveType(columnMetadata.getName());
originalType = this.parquetTableMetadata.getOriginalType(columnMetadata.getName());
final Integer repetitionLevel = this.parquetTableMetadata.getRepetitionLevel(columnMetadata.getName());
statMap.put(schemaPath, getStat(min, max, numNull, primitiveType, originalType, repetitionLevel));
} else {
final String columnName = schemaPath.getRootSegment().getPath();
if (implicitColValues.containsKey(columnName)) {
TypeProtos.MajorType type = Types.required(TypeProtos.MinorType.VARCHAR);
Statistics stat = new BinaryStatistics();
stat.setNumNulls(0);
byte[] val = implicitColValues.get(columnName).getBytes();
stat.setMinMaxFromBytes(val, val);
statMap.put(schemaPath, new ColumnStatistics(stat, type));
}
}
}
if (logger.isDebugEnabled()) {
logger.debug("Took {} ms to column statistics for row group", timer.elapsed(TimeUnit.MILLISECONDS));
}
return statMap;
}
use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.
the class TestParquetFileWriter method testMergeMetadata.
@Test
public void testMergeMetadata() {
FileMetaData md1 = new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b")), new HashMap<String, String>(), "test");
FileMetaData md2 = new FileMetaData(new MessageType("root2", new PrimitiveType(REQUIRED, BINARY, "c")), new HashMap<String, String>(), "test2");
GlobalMetaData merged = ParquetFileWriter.mergeInto(md2, ParquetFileWriter.mergeInto(md1, null));
assertEquals(merged.getSchema(), new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b"), new PrimitiveType(REQUIRED, BINARY, "c")));
}
use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.
the class TestParquetFileWriter method testMergeFooters.
@Test
public void testMergeFooters() {
List<BlockMetaData> oneBlocks = new ArrayList<BlockMetaData>();
oneBlocks.add(new BlockMetaData());
oneBlocks.add(new BlockMetaData());
List<BlockMetaData> twoBlocks = new ArrayList<BlockMetaData>();
twoBlocks.add(new BlockMetaData());
List<BlockMetaData> expected = new ArrayList<BlockMetaData>();
expected.addAll(oneBlocks);
expected.addAll(twoBlocks);
Footer one = new Footer(new Path("file:/tmp/output/one.parquet"), new ParquetMetadata(new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b")), new HashMap<String, String>(), "test"), oneBlocks));
Footer two = new Footer(new Path("/tmp/output/two.parquet"), new ParquetMetadata(new FileMetaData(new MessageType("root2", new PrimitiveType(REQUIRED, BINARY, "c")), new HashMap<String, String>(), "test2"), twoBlocks));
List<Footer> footers = new ArrayList<Footer>();
footers.add(one);
footers.add(two);
ParquetMetadata merged = ParquetFileWriter.mergeFooters(new Path("/tmp"), footers);
assertEquals(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b"), new PrimitiveType(REQUIRED, BINARY, "c")), merged.getFileMetaData().getSchema());
assertEquals("Should have all blocks", expected, merged.getBlocks());
}
use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.
the class AvroSchemaConverter method convertField.
@SuppressWarnings("deprecation")
private Type convertField(String fieldName, Schema schema, Type.Repetition repetition) {
Types.PrimitiveBuilder<PrimitiveType> builder;
Schema.Type type = schema.getType();
if (type.equals(Schema.Type.BOOLEAN)) {
builder = Types.primitive(BOOLEAN, repetition);
} else if (type.equals(Schema.Type.INT)) {
builder = Types.primitive(INT32, repetition);
} else if (type.equals(Schema.Type.LONG)) {
builder = Types.primitive(INT64, repetition);
} else if (type.equals(Schema.Type.FLOAT)) {
builder = Types.primitive(FLOAT, repetition);
} else if (type.equals(Schema.Type.DOUBLE)) {
builder = Types.primitive(DOUBLE, repetition);
} else if (type.equals(Schema.Type.BYTES)) {
builder = Types.primitive(BINARY, repetition);
} else if (type.equals(Schema.Type.STRING)) {
builder = Types.primitive(BINARY, repetition).as(UTF8);
} else if (type.equals(Schema.Type.RECORD)) {
return new GroupType(repetition, fieldName, convertFields(schema.getFields()));
} else if (type.equals(Schema.Type.ENUM)) {
builder = Types.primitive(BINARY, repetition).as(ENUM);
} else if (type.equals(Schema.Type.ARRAY)) {
if (writeOldListStructure) {
return ConversionPatterns.listType(repetition, fieldName, convertField("array", schema.getElementType(), REPEATED));
} else {
return ConversionPatterns.listOfElements(repetition, fieldName, convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType()));
}
} else if (type.equals(Schema.Type.MAP)) {
Type valType = convertField("value", schema.getValueType());
// avro map key type is always string
return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType);
} else if (type.equals(Schema.Type.FIXED)) {
builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition).length(schema.getFixedSize());
} else if (type.equals(Schema.Type.UNION)) {
return convertUnion(fieldName, schema, repetition);
} else {
throw new UnsupportedOperationException("Cannot convert Avro type " + type);
}
// schema translation can only be done for known logical types because this
// creates an equivalence
LogicalType logicalType = schema.getLogicalType();
if (logicalType != null) {
if (logicalType instanceof LogicalTypes.Decimal) {
builder = builder.as(DECIMAL).precision(((LogicalTypes.Decimal) logicalType).getPrecision()).scale(((LogicalTypes.Decimal) logicalType).getScale());
} else {
OriginalType annotation = convertLogicalType(logicalType);
if (annotation != null) {
builder.as(annotation);
}
}
}
return builder.named(fieldName);
}
Aggregations