Search in sources :

Example 11 with StructType

use of org.apache.parquet.thrift.struct.ThriftType.StructType in project parquet-mr by apache.

the class ParquetWriteProtocol method getProtocol.

private TProtocol getProtocol(ThriftField field, ColumnIO columnIO, Events returnClause) {
    TProtocol p;
    final ThriftType type = field.getType();
    switch(type.getType()) {
        case STOP:
        case VOID:
        default:
            throw new UnsupportedOperationException("can't convert type of " + field);
        case BOOL:
        case BYTE:
        case DOUBLE:
        case I16:
        case I32:
        case I64:
        case STRING:
            p = new PrimitiveWriteProtocol((PrimitiveColumnIO) columnIO, returnClause);
            break;
        case STRUCT:
            p = new StructWriteProtocol((GroupColumnIO) columnIO, (StructType) type, returnClause);
            break;
        case MAP:
            p = new MapWriteProtocol((GroupColumnIO) columnIO, (MapType) type, returnClause);
            break;
        case SET:
            p = new ListWriteProtocol((GroupColumnIO) columnIO, ((SetType) type).getValues(), returnClause);
            break;
        case LIST:
            p = new ListWriteProtocol((GroupColumnIO) columnIO, ((ListType) type).getValues(), returnClause);
            break;
        case ENUM:
            p = new EnumWriteProtocol((PrimitiveColumnIO) columnIO, (EnumType) type, returnClause);
            break;
    }
    return p;
}
Also used : ThriftType(org.apache.parquet.thrift.struct.ThriftType) StructType(org.apache.parquet.thrift.struct.ThriftType.StructType) PrimitiveColumnIO(org.apache.parquet.io.PrimitiveColumnIO) MapType(org.apache.parquet.thrift.struct.ThriftType.MapType) TProtocol(org.apache.thrift.protocol.TProtocol) GroupColumnIO(org.apache.parquet.io.GroupColumnIO) SetType(org.apache.parquet.thrift.struct.ThriftType.SetType) EnumType(org.apache.parquet.thrift.struct.ThriftType.EnumType) ListType(org.apache.parquet.thrift.struct.ThriftType.ListType)

Example 12 with StructType

use of org.apache.parquet.thrift.struct.ThriftType.StructType in project parquet-mr by apache.

the class BufferedProtocolReadToWrite method readOneValue.

/**
 * @return true when all value is consumed, false when some values is ignored due to the field is not defined in expectedType
 * @throws TException
 */
private boolean readOneValue(TProtocol in, byte type, List<Action> buffer, ThriftType expectedType) throws TException {
    if (expectedType != null && expectedType.getType().getSerializedThriftType() != type) {
        throw new DecodingSchemaMismatchException("the data type does not match the expected thrift structure: expected " + expectedType + " got " + typeName(type));
    }
    boolean hasFieldsIgnored = false;
    switch(type) {
        case TType.LIST:
            hasFieldsIgnored = readOneList(in, buffer, (ListType) expectedType);
            break;
        case TType.MAP:
            hasFieldsIgnored = readOneMap(in, buffer, (MapType) expectedType);
            break;
        case TType.SET:
            hasFieldsIgnored = readOneSet(in, buffer, (SetType) expectedType);
            break;
        case TType.STRUCT:
            hasFieldsIgnored = readOneStruct(in, buffer, (StructType) expectedType);
            break;
        case TType.STOP:
            break;
        case TType.BOOL:
            final boolean bool = in.readBool();
            writeBoolAction(buffer, bool);
            break;
        case TType.BYTE:
            final byte b = in.readByte();
            writeByteAction(buffer, b);
            break;
        case TType.DOUBLE:
            final double d = in.readDouble();
            writeDoubleAction(buffer, d);
            break;
        case TType.I16:
            final short s = in.readI16();
            writeShortAction(buffer, s);
            break;
        // same as i32 => actually never seen in the protocol layer as enums are written as a i32 field
        case TType.ENUM:
        case TType.I32:
            final int i = in.readI32();
            checkEnum(expectedType, i);
            writeIntAction(buffer, i);
            break;
        case TType.I64:
            final long l = in.readI64();
            writeLongAction(buffer, l);
            break;
        case TType.STRING:
            final ByteBuffer bin = in.readBinary();
            writeStringAction(buffer, bin);
            break;
        case TType.VOID:
            break;
        default:
            throw new TException("Unknown type: " + type);
    }
    return hasFieldsIgnored;
}
Also used : TException(org.apache.thrift.TException) StructType(org.apache.parquet.thrift.struct.ThriftType.StructType) ByteBuffer(java.nio.ByteBuffer) MapType(org.apache.parquet.thrift.struct.ThriftType.MapType) SetType(org.apache.parquet.thrift.struct.ThriftType.SetType) ListType(org.apache.parquet.thrift.struct.ThriftType.ListType)

Example 13 with StructType

use of org.apache.parquet.thrift.struct.ThriftType.StructType in project parquet-mr by apache.

the class ThriftMetaData method fromThriftClass.

/**
 * Creates ThriftMetaData from a Thrift-generated class.
 *
 * @param thriftClass a Thrift-generated class
 * @return ThriftMetaData for the given class
 */
@SuppressWarnings("unchecked")
public static ThriftMetaData fromThriftClass(Class<?> thriftClass) {
    if (thriftClass != null && TBase.class.isAssignableFrom(thriftClass)) {
        Class<? extends TBase<?, ?>> tClass = (Class<? extends TBase<?, ?>>) thriftClass;
        StructType descriptor = new ThriftSchemaConverter().toStructType(tClass);
        return new ThriftMetaData(thriftClass.getName(), descriptor);
    }
    return null;
}
Also used : StructType(org.apache.parquet.thrift.struct.ThriftType.StructType) TBase(org.apache.thrift.TBase)

Example 14 with StructType

use of org.apache.parquet.thrift.struct.ThriftType.StructType in project parquet-mr by apache.

the class TestParquetReadProtocol method validate.

private <T extends TBase<?, ?>> void validate(T expected) throws TException {
    @SuppressWarnings("unchecked") final Class<T> thriftClass = (Class<T>) expected.getClass();
    final MemPageStore memPageStore = new MemPageStore(1);
    final ThriftSchemaConverter schemaConverter = new ThriftSchemaConverter();
    final MessageType schema = schemaConverter.convert(thriftClass);
    LOG.info("{}", schema);
    final MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
    final ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(memPageStore, ParquetProperties.builder().withPageSize(10000).withDictionaryEncoding(false).build());
    final RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
    final StructType thriftType = schemaConverter.toStructType(thriftClass);
    ParquetWriteProtocol parquetWriteProtocol = new ParquetWriteProtocol(recordWriter, columnIO, thriftType);
    expected.write(parquetWriteProtocol);
    recordWriter.flush();
    columns.flush();
    ThriftRecordConverter<T> converter = new TBaseRecordConverter<T>(thriftClass, schema, thriftType);
    final RecordReader<T> recordReader = columnIO.getRecordReader(memPageStore, converter);
    final T result = recordReader.read();
    assertEquals(expected, result);
}
Also used : StructType(org.apache.parquet.thrift.struct.ThriftType.StructType) ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) ColumnIOFactory(org.apache.parquet.io.ColumnIOFactory) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) MessageType(org.apache.parquet.schema.MessageType)

Aggregations

StructType (org.apache.parquet.thrift.struct.ThriftType.StructType)14 Test (org.junit.Test)6 MessageType (org.apache.parquet.schema.MessageType)4 ColumnIOFactory (org.apache.parquet.io.ColumnIOFactory)3 MessageColumnIO (org.apache.parquet.io.MessageColumnIO)3 ThriftType (org.apache.parquet.thrift.struct.ThriftType)3 ListType (org.apache.parquet.thrift.struct.ThriftType.ListType)3 MapType (org.apache.parquet.thrift.struct.ThriftType.MapType)3 SetType (org.apache.parquet.thrift.struct.ThriftType.SetType)3 RecordConsumerLoggingWrapper (org.apache.parquet.io.RecordConsumerLoggingWrapper)2 RecordConsumer (org.apache.parquet.io.api.RecordConsumer)2 ThriftField (org.apache.parquet.thrift.struct.ThriftField)2 EnumType (org.apache.parquet.thrift.struct.ThriftType.EnumType)2 TBase (org.apache.thrift.TBase)2 TException (org.apache.thrift.TException)2 TProtocol (org.apache.thrift.protocol.TProtocol)2 ThriftToPig (com.twitter.elephantbird.pig.util.ThriftToPig)1 File (java.io.File)1 ByteBuffer (java.nio.ByteBuffer)1 ArrayList (java.util.ArrayList)1