use of org.apache.parquet.io.ConverterConsumer in project parquet-mr by apache.
the class TestTupleRecordConsumer method newTupleWriter.
private <T> TupleWriteSupport newTupleWriter(String pigSchemaString, RecordMaterializer<T> recordConsumer) throws ParserException {
TupleWriteSupport tupleWriter = TupleWriteSupport.fromPigSchema(pigSchemaString);
tupleWriter.init(null);
tupleWriter.prepareForWrite(new ConverterConsumer(recordConsumer.getRootConverter(), tupleWriter.getParquetSchema()));
return tupleWriter;
}
use of org.apache.parquet.io.ConverterConsumer in project parquet-mr by apache.
the class TestThriftToPigCompatibility method validateSameTupleAsEB.
/**
* <ul> steps:
* <li>Writes using the thrift mapping
* <li>Reads using the pig mapping
* <li>Use Elephant bird to convert from thrift to pig
* <li>Check that both transformations give the same result
* @param o the object to convert
* @throws TException
*/
public static <T extends TBase<?, ?>> void validateSameTupleAsEB(T o) throws TException {
final ThriftSchemaConverter thriftSchemaConverter = new ThriftSchemaConverter();
@SuppressWarnings("unchecked") final Class<T> class1 = (Class<T>) o.getClass();
final MessageType schema = thriftSchemaConverter.convert(class1);
final StructType structType = ThriftSchemaConverter.toStructType(class1);
final ThriftToPig<T> thriftToPig = new ThriftToPig<T>(class1);
final Schema pigSchema = thriftToPig.toSchema();
final TupleRecordMaterializer tupleRecordConverter = new TupleRecordMaterializer(schema, pigSchema, true);
RecordConsumer recordConsumer = new ConverterConsumer(tupleRecordConverter.getRootConverter(), schema);
final MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);
ParquetWriteProtocol p = new ParquetWriteProtocol(new RecordConsumerLoggingWrapper(recordConsumer), columnIO, structType);
o.write(p);
final Tuple t = tupleRecordConverter.getCurrentRecord();
final Tuple expected = thriftToPig.getPigTuple(o);
assertEquals(expected.toString(), t.toString());
final MessageType filtered = new PigSchemaConverter().filter(schema, pigSchema);
assertEquals(schema.toString(), filtered.toString());
}
use of org.apache.parquet.io.ConverterConsumer in project parquet-mr by apache.
the class TestTupleRecordConsumer method testFromGroups.
private void testFromGroups(String pigSchemaString, List<Group> input) throws ParserException {
List<Tuple> tuples = new ArrayList<Tuple>();
MessageType schema = getMessageType(pigSchemaString);
RecordMaterializer<Tuple> pigRecordConsumer = newPigRecordConsumer(pigSchemaString);
GroupWriter groupWriter = new GroupWriter(new RecordConsumerLoggingWrapper(new ConverterConsumer(pigRecordConsumer.getRootConverter(), schema)), schema);
for (Group group : input) {
groupWriter.write(group);
final Tuple tuple = pigRecordConsumer.getCurrentRecord();
tuples.add(tuple);
LOG.debug("in: {}\nout:{}", group, tuple);
}
List<Group> groups = new ArrayList<Group>();
GroupRecordConverter recordConsumer = new GroupRecordConverter(schema);
TupleWriteSupport tupleWriter = newTupleWriter(pigSchemaString, recordConsumer);
for (Tuple t : tuples) {
LOG.debug("{}", t);
tupleWriter.write(t);
groups.add(recordConsumer.getCurrentRecord());
}
assertEquals(input.size(), groups.size());
for (int i = 0; i < input.size(); i++) {
Group in = input.get(i);
LOG.debug("{}", in);
Group out = groups.get(i);
assertEquals(in.toString(), out.toString());
}
}
Aggregations