use of org.apache.parquet.pig.convert.TupleRecordMaterializer in project parquet-mr by apache.
the class TupleReadSupport method prepareForRead.
@Override
public RecordMaterializer<Tuple> prepareForRead(Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema, ReadContext readContext) {
MessageType requestedSchema = readContext.getRequestedSchema();
Schema requestedPigSchema = getPigSchema(configuration);
if (requestedPigSchema == null) {
throw new ParquetDecodingException("Missing Pig schema: ParquetLoader sets the schema in the job conf");
}
boolean elephantBirdCompatible = configuration.getBoolean(PARQUET_PIG_ELEPHANT_BIRD_COMPATIBLE, false);
boolean columnIndexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false);
if (elephantBirdCompatible) {
LOG.info("Numbers will default to 0 instead of NULL; Boolean will be converted to Int");
}
return new TupleRecordMaterializer(requestedSchema, requestedPigSchema, elephantBirdCompatible, columnIndexAccess);
}
use of org.apache.parquet.pig.convert.TupleRecordMaterializer in project parquet-mr by apache.
the class TestThriftToPigCompatibility method validateSameTupleAsEB.
/**
* <ul> steps:
* <li>Writes using the thrift mapping
* <li>Reads using the pig mapping
* <li>Use Elephant bird to convert from thrift to pig
* <li>Check that both transformations give the same result
* @param o the object to convert
* @throws TException
*/
public static <T extends TBase<?, ?>> void validateSameTupleAsEB(T o) throws TException {
final ThriftSchemaConverter thriftSchemaConverter = new ThriftSchemaConverter();
@SuppressWarnings("unchecked") final Class<T> class1 = (Class<T>) o.getClass();
final MessageType schema = thriftSchemaConverter.convert(class1);
final StructType structType = ThriftSchemaConverter.toStructType(class1);
final ThriftToPig<T> thriftToPig = new ThriftToPig<T>(class1);
final Schema pigSchema = thriftToPig.toSchema();
final TupleRecordMaterializer tupleRecordConverter = new TupleRecordMaterializer(schema, pigSchema, true);
RecordConsumer recordConsumer = new ConverterConsumer(tupleRecordConverter.getRootConverter(), schema);
final MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);
ParquetWriteProtocol p = new ParquetWriteProtocol(new RecordConsumerLoggingWrapper(recordConsumer), columnIO, structType);
o.write(p);
final Tuple t = tupleRecordConverter.getCurrentRecord();
final Tuple expected = thriftToPig.getPigTuple(o);
assertEquals(expected.toString(), t.toString());
final MessageType filtered = new PigSchemaConverter().filter(schema, pigSchema);
assertEquals(schema.toString(), filtered.toString());
}
Aggregations