Search in sources :

Example 41 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project presto by prestodb.

the class ParquetTestUtils method convertToRequiredType.

private static org.apache.parquet.schema.Type convertToRequiredType(org.apache.parquet.schema.Type type) {
    if (type instanceof GroupType) {
        GroupType groupType = (GroupType) type;
        List<org.apache.parquet.schema.Type> fields = groupType.getFields();
        List<org.apache.parquet.schema.Type> newFields = new ArrayList<>();
        for (org.apache.parquet.schema.Type field : fields) {
            newFields.add(convertToRequiredType(field));
        }
        return new GroupType(REPEATED, groupType.getName(), newFields);
    } else if (type instanceof PrimitiveType) {
        PrimitiveType primitiveType = (PrimitiveType) type;
        Types.PrimitiveBuilder<PrimitiveType> builder = Types.primitive(primitiveType.getPrimitiveTypeName(), REQUIRED);
        if (primitiveType.getDecimalMetadata() != null) {
            builder = (Types.PrimitiveBuilder<PrimitiveType>) builder.scale(primitiveType.getDecimalMetadata().getScale()).precision(primitiveType.getDecimalMetadata().getPrecision());
        }
        return builder.length(primitiveType.getTypeLength()).named(primitiveType.getName()).asPrimitiveType();
    }
    throw new UnsupportedOperationException();
}
Also used : DecimalType(com.facebook.presto.common.type.DecimalType) ArrayType(com.facebook.presto.common.type.ArrayType) CharType(com.facebook.presto.common.type.CharType) RowType(com.facebook.presto.common.type.RowType) PrimitiveType(org.apache.parquet.schema.PrimitiveType) TinyintType(com.facebook.presto.common.type.TinyintType) GroupType(org.apache.parquet.schema.GroupType) VarcharType(com.facebook.presto.common.type.VarcharType) MessageType(org.apache.parquet.schema.MessageType) SmallintType(com.facebook.presto.common.type.SmallintType) VarbinaryType(com.facebook.presto.common.type.VarbinaryType) Type(com.facebook.presto.common.type.Type) DateType(com.facebook.presto.common.type.DateType) GroupType(org.apache.parquet.schema.GroupType) ArrayList(java.util.ArrayList) PrimitiveType(org.apache.parquet.schema.PrimitiveType)

Example 42 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project presto by prestodb.

the class TestTupleDomainParquetPredicate method testVarcharMatchesWithDictionaryDescriptor.

@Test
public void testVarcharMatchesWithDictionaryDescriptor() {
    ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] { "path" }, new PrimitiveType(OPTIONAL, BINARY, 0, ""), 0, 0);
    RichColumnDescriptor column = new RichColumnDescriptor(columnDescriptor, new PrimitiveType(OPTIONAL, BINARY, "Test column"));
    TupleDomain<ColumnDescriptor> effectivePredicate = getEffectivePredicate(column, createVarcharType(255), EMPTY_SLICE);
    TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column));
    DictionaryPage page = new DictionaryPage(Slices.wrappedBuffer(new byte[] { 0, 0, 0, 0 }), 1, PLAIN_DICTIONARY);
    assertTrue(parquetPredicate.matches(new DictionaryDescriptor(column, Optional.of(page))));
}
Also used : DictionaryDescriptor(com.facebook.presto.parquet.predicate.DictionaryDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) TupleDomainParquetPredicate(com.facebook.presto.parquet.predicate.TupleDomainParquetPredicate) PrimitiveType(org.apache.parquet.schema.PrimitiveType) Test(org.testng.annotations.Test)

Example 43 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project presto by prestodb.

the class TestTupleDomainParquetPredicate method testBigintMatchesWithStatistics.

@Test
public void testBigintMatchesWithStatistics() throws ParquetCorruptionException {
    RichColumnDescriptor column = new RichColumnDescriptor(new ColumnDescriptor(new String[] { "path" }, INT64, 0, 0), new PrimitiveType(OPTIONAL, INT64, "Test column"));
    TupleDomain<ColumnDescriptor> effectivePredicate = TupleDomain.withColumnDomains(ImmutableMap.of(column, Domain.create(ValueSet.of(BIGINT, 42L, 43L, 44L, 404L), false)));
    TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column));
    assertTrue(parquetPredicate.matches(2, ImmutableMap.of(column, longColumnStats(32, 42)), ID));
    assertFalse(parquetPredicate.matches(2, ImmutableMap.of(column, longColumnStats(30, 40)), ID));
    assertFalse(parquetPredicate.matches(2, ImmutableMap.of(column, longColumnStats(1024, 0x10000 + 42)), ID));
}
Also used : ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) TupleDomainParquetPredicate(com.facebook.presto.parquet.predicate.TupleDomainParquetPredicate) PrimitiveType(org.apache.parquet.schema.PrimitiveType) Test(org.testng.annotations.Test)

Example 44 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project flink by apache.

the class ParquetWriterUtil method writeParquetFile.

public static void writeParquetFile(Path path, MessageType schema, List<Row> records, int rowGroupSize) throws IOException {
    WriteSupport<Row> support = new WriteSupport<Row>() {

        private RecordConsumer consumer;

        @Override
        public WriteContext init(Configuration configuration) {
            return new WriteContext(schema, new HashMap<>());
        }

        @Override
        public void prepareForWrite(RecordConsumer consumer) {
            this.consumer = consumer;
        }

        @Override
        public void write(Row row) {
            consumer.startMessage();
            for (int i = 0; i < row.getArity(); i++) {
                PrimitiveType type = schema.getColumns().get(i).getPrimitiveType();
                Object field = row.getField(i);
                if (field != null) {
                    consumer.startField("f" + i, i);
                    switch(type.getPrimitiveTypeName()) {
                        case INT64:
                            consumer.addLong(((Number) field).longValue());
                            break;
                        case INT32:
                            consumer.addInteger(((Number) field).intValue());
                            break;
                        case BOOLEAN:
                            consumer.addBoolean((Boolean) field);
                            break;
                        case BINARY:
                            if (field instanceof String) {
                                field = ((String) field).getBytes(StandardCharsets.UTF_8);
                            } else if (field instanceof BigDecimal) {
                                field = ((BigDecimal) field).unscaledValue().toByteArray();
                            }
                            consumer.addBinary(Binary.fromConstantByteArray((byte[]) field));
                            break;
                        case FLOAT:
                            consumer.addFloat(((Number) field).floatValue());
                            break;
                        case DOUBLE:
                            consumer.addDouble(((Number) field).doubleValue());
                            break;
                        case INT96:
                            consumer.addBinary(timestampToInt96((LocalDateTime) field));
                            break;
                        case FIXED_LEN_BYTE_ARRAY:
                            byte[] bytes = ((BigDecimal) field).unscaledValue().toByteArray();
                            byte signByte = (byte) (bytes[0] < 0 ? -1 : 0);
                            int numBytes = 16;
                            byte[] newBytes = new byte[numBytes];
                            Arrays.fill(newBytes, 0, numBytes - bytes.length, signByte);
                            System.arraycopy(bytes, 0, newBytes, numBytes - bytes.length, bytes.length);
                            consumer.addBinary(Binary.fromConstantByteArray(newBytes));
                            break;
                    }
                    consumer.endField("f" + i, i);
                }
            }
            consumer.endMessage();
        }
    };
    ParquetWriter<Row> writer = new ParquetWriterBuilder(new org.apache.hadoop.fs.Path(path.getPath()), support).withRowGroupSize(rowGroupSize).build();
    for (Row record : records) {
        writer.write(record);
    }
    writer.close();
}
Also used : LocalDateTime(java.time.LocalDateTime) Configuration(org.apache.hadoop.conf.Configuration) WriteSupport(org.apache.parquet.hadoop.api.WriteSupport) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) BigDecimal(java.math.BigDecimal) PrimitiveType(org.apache.parquet.schema.PrimitiveType) Row(org.apache.flink.types.Row)

Example 45 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project hive by apache.

the class TestETypeConverter method testGetTimestampConverter.

@Test
public void testGetTimestampConverter() throws Exception {
    Timestamp timestamp = Timestamp.valueOf("2018-06-15 15:12:20.0");
    NanoTime nanoTime = NanoTimeUtils.getNanoTime(timestamp, ZoneOffset.UTC, false);
    PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT96).named("value");
    Writable writable = getWritableFromBinaryConverter(null, primitiveType, nanoTime.toBinary());
    TimestampWritableV2 timestampWritable = (TimestampWritableV2) writable;
    assertEquals(timestamp.getNanos(), timestampWritable.getNanos());
}
Also used : NanoTime(org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime) Writable(org.apache.hadoop.io.Writable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) LongWritable(org.apache.hadoop.io.LongWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) PrimitiveType(org.apache.parquet.schema.PrimitiveType) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) Test(org.junit.Test)

Aggregations

PrimitiveType (org.apache.parquet.schema.PrimitiveType)123 Test (org.junit.Test)66 MessageType (org.apache.parquet.schema.MessageType)41 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)28 BooleanWritable (org.apache.hadoop.io.BooleanWritable)28 BytesWritable (org.apache.hadoop.io.BytesWritable)28 DoubleWritable (org.apache.hadoop.io.DoubleWritable)28 FloatWritable (org.apache.hadoop.io.FloatWritable)28 IntWritable (org.apache.hadoop.io.IntWritable)28 LongWritable (org.apache.hadoop.io.LongWritable)28 Writable (org.apache.hadoop.io.Writable)28 GroupType (org.apache.parquet.schema.GroupType)25 Test (org.testng.annotations.Test)20 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)14 OriginalType (org.apache.parquet.schema.OriginalType)14 Type (org.apache.parquet.schema.Type)12 List (java.util.List)11 ColumnIndex (org.apache.parquet.internal.column.columnindex.ColumnIndex)11 ColumnIndexBuilder (org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder)11 ArrayList (java.util.ArrayList)10