use of org.apache.parquet.schema.PrimitiveType in project presto by prestodb.
the class ParquetTestUtils method convertToRequiredType.
private static org.apache.parquet.schema.Type convertToRequiredType(org.apache.parquet.schema.Type type) {
if (type instanceof GroupType) {
GroupType groupType = (GroupType) type;
List<org.apache.parquet.schema.Type> fields = groupType.getFields();
List<org.apache.parquet.schema.Type> newFields = new ArrayList<>();
for (org.apache.parquet.schema.Type field : fields) {
newFields.add(convertToRequiredType(field));
}
return new GroupType(REPEATED, groupType.getName(), newFields);
} else if (type instanceof PrimitiveType) {
PrimitiveType primitiveType = (PrimitiveType) type;
Types.PrimitiveBuilder<PrimitiveType> builder = Types.primitive(primitiveType.getPrimitiveTypeName(), REQUIRED);
if (primitiveType.getDecimalMetadata() != null) {
builder = (Types.PrimitiveBuilder<PrimitiveType>) builder.scale(primitiveType.getDecimalMetadata().getScale()).precision(primitiveType.getDecimalMetadata().getPrecision());
}
return builder.length(primitiveType.getTypeLength()).named(primitiveType.getName()).asPrimitiveType();
}
throw new UnsupportedOperationException();
}
use of org.apache.parquet.schema.PrimitiveType in project presto by prestodb.
the class TestTupleDomainParquetPredicate method testVarcharMatchesWithDictionaryDescriptor.
@Test
public void testVarcharMatchesWithDictionaryDescriptor() {
ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] { "path" }, new PrimitiveType(OPTIONAL, BINARY, 0, ""), 0, 0);
RichColumnDescriptor column = new RichColumnDescriptor(columnDescriptor, new PrimitiveType(OPTIONAL, BINARY, "Test column"));
TupleDomain<ColumnDescriptor> effectivePredicate = getEffectivePredicate(column, createVarcharType(255), EMPTY_SLICE);
TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column));
DictionaryPage page = new DictionaryPage(Slices.wrappedBuffer(new byte[] { 0, 0, 0, 0 }), 1, PLAIN_DICTIONARY);
assertTrue(parquetPredicate.matches(new DictionaryDescriptor(column, Optional.of(page))));
}
use of org.apache.parquet.schema.PrimitiveType in project presto by prestodb.
the class TestTupleDomainParquetPredicate method testBigintMatchesWithStatistics.
@Test
public void testBigintMatchesWithStatistics() throws ParquetCorruptionException {
RichColumnDescriptor column = new RichColumnDescriptor(new ColumnDescriptor(new String[] { "path" }, INT64, 0, 0), new PrimitiveType(OPTIONAL, INT64, "Test column"));
TupleDomain<ColumnDescriptor> effectivePredicate = TupleDomain.withColumnDomains(ImmutableMap.of(column, Domain.create(ValueSet.of(BIGINT, 42L, 43L, 44L, 404L), false)));
TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column));
assertTrue(parquetPredicate.matches(2, ImmutableMap.of(column, longColumnStats(32, 42)), ID));
assertFalse(parquetPredicate.matches(2, ImmutableMap.of(column, longColumnStats(30, 40)), ID));
assertFalse(parquetPredicate.matches(2, ImmutableMap.of(column, longColumnStats(1024, 0x10000 + 42)), ID));
}
use of org.apache.parquet.schema.PrimitiveType in project flink by apache.
the class ParquetWriterUtil method writeParquetFile.
public static void writeParquetFile(Path path, MessageType schema, List<Row> records, int rowGroupSize) throws IOException {
WriteSupport<Row> support = new WriteSupport<Row>() {
private RecordConsumer consumer;
@Override
public WriteContext init(Configuration configuration) {
return new WriteContext(schema, new HashMap<>());
}
@Override
public void prepareForWrite(RecordConsumer consumer) {
this.consumer = consumer;
}
@Override
public void write(Row row) {
consumer.startMessage();
for (int i = 0; i < row.getArity(); i++) {
PrimitiveType type = schema.getColumns().get(i).getPrimitiveType();
Object field = row.getField(i);
if (field != null) {
consumer.startField("f" + i, i);
switch(type.getPrimitiveTypeName()) {
case INT64:
consumer.addLong(((Number) field).longValue());
break;
case INT32:
consumer.addInteger(((Number) field).intValue());
break;
case BOOLEAN:
consumer.addBoolean((Boolean) field);
break;
case BINARY:
if (field instanceof String) {
field = ((String) field).getBytes(StandardCharsets.UTF_8);
} else if (field instanceof BigDecimal) {
field = ((BigDecimal) field).unscaledValue().toByteArray();
}
consumer.addBinary(Binary.fromConstantByteArray((byte[]) field));
break;
case FLOAT:
consumer.addFloat(((Number) field).floatValue());
break;
case DOUBLE:
consumer.addDouble(((Number) field).doubleValue());
break;
case INT96:
consumer.addBinary(timestampToInt96((LocalDateTime) field));
break;
case FIXED_LEN_BYTE_ARRAY:
byte[] bytes = ((BigDecimal) field).unscaledValue().toByteArray();
byte signByte = (byte) (bytes[0] < 0 ? -1 : 0);
int numBytes = 16;
byte[] newBytes = new byte[numBytes];
Arrays.fill(newBytes, 0, numBytes - bytes.length, signByte);
System.arraycopy(bytes, 0, newBytes, numBytes - bytes.length, bytes.length);
consumer.addBinary(Binary.fromConstantByteArray(newBytes));
break;
}
consumer.endField("f" + i, i);
}
}
consumer.endMessage();
}
};
ParquetWriter<Row> writer = new ParquetWriterBuilder(new org.apache.hadoop.fs.Path(path.getPath()), support).withRowGroupSize(rowGroupSize).build();
for (Row record : records) {
writer.write(record);
}
writer.close();
}
use of org.apache.parquet.schema.PrimitiveType in project hive by apache.
the class TestETypeConverter method testGetTimestampConverter.
@Test
public void testGetTimestampConverter() throws Exception {
Timestamp timestamp = Timestamp.valueOf("2018-06-15 15:12:20.0");
NanoTime nanoTime = NanoTimeUtils.getNanoTime(timestamp, ZoneOffset.UTC, false);
PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT96).named("value");
Writable writable = getWritableFromBinaryConverter(null, primitiveType, nanoTime.toBinary());
TimestampWritableV2 timestampWritable = (TimestampWritableV2) writable;
assertEquals(timestamp.getNanos(), timestampWritable.getNanos());
}
Aggregations