use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project parquet-mr by apache.
the class DictionaryFilterTest method prepareFile.
@BeforeClass
public static void prepareFile() throws IOException {
cleanup();
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory f = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = ExampleParquetWriter.builder(file).withWriterVersion(PARQUET_1_0).withCompressionCodec(GZIP).withRowGroupSize(1024 * 1024).withPageSize(1024).enableDictionaryEncoding().withDictionaryPageSize(2 * 1024).withConf(conf).build();
writeData(f, writer);
}
use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project drill by axbaretto.
the class ParquetSimpleTestFileGenerator method main.
public static void main(String[] args) throws IOException {
SimpleGroupFactory sgf = new SimpleGroupFactory(simpleSchema);
GroupFactory gf = new SimpleGroupFactory(complexSchema);
SimpleGroupFactory sngf = new SimpleGroupFactory(simpleNullableSchema);
GroupFactory ngf = new SimpleGroupFactory(complexNullableSchema);
ParquetWriter<Group> simpleWriter = initWriter(simpleSchema, "drill/parquet_test_file_simple");
ParquetWriter<Group> complexWriter = initWriter(complexSchema, "drill/parquet_test_file_complex");
ParquetWriter<Group> simpleNullableWriter = initWriter(simpleNullableSchema, "drill/parquet_test_file_simple_nullable");
ParquetWriter<Group> complexNullableWriter = initWriter(complexNullableSchema, "drill/parquet_test_file_complex_nullable");
ParquetSimpleTestFileGenerator.writeSimpleValues(sgf, simpleWriter, false);
ParquetSimpleTestFileGenerator.writeSimpleValues(sngf, simpleNullableWriter, true);
ParquetSimpleTestFileGenerator.writeComplexValues(gf, complexWriter, false);
ParquetSimpleTestFileGenerator.writeComplexValues(ngf, complexNullableWriter, true);
simpleWriter.close();
complexWriter.close();
simpleNullableWriter.close();
complexNullableWriter.close();
}
use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project hive by apache.
the class TestVectorizedMapColumnReader method writeMapData.
protected static void writeMapData(ParquetWriter<Group> writer, boolean isDictionaryEncoding, int elementNum) throws IOException {
SimpleGroupFactory f = new SimpleGroupFactory(schema);
int mapMaxSize = 4;
int mapElementIndex = 0;
for (int i = 0; i < elementNum; i++) {
boolean isNull = isNull(i);
Group group = f.newGroup();
int mapSize = i % mapMaxSize + 1;
if (!isNull) {
// the map_field is to test multiple level map definition
Group multipleLevelGroup = group.addGroup("map_field");
for (int j = 0; j < mapSize; j++) {
int intValForMap = getIntValue(isDictionaryEncoding, mapElementIndex);
long longValForMap = getLongValue(isDictionaryEncoding, mapElementIndex);
double doubleValForMap = getDoubleValue(isDictionaryEncoding, mapElementIndex);
float floatValForMap = getFloatValue(isDictionaryEncoding, mapElementIndex);
Binary binaryValForMap = getBinaryValue(isDictionaryEncoding, mapElementIndex);
HiveDecimal hd = getDecimal(isDictionaryEncoding, mapElementIndex).setScale(2);
HiveDecimalWritable hdw = new HiveDecimalWritable(hd);
Binary decimalValForMap = Binary.fromConstantByteArray(hdw.getInternalStorage());
group.addGroup("map_int32").append("key", intValForMap).append("value", intValForMap);
group.addGroup("map_int64").append("key", longValForMap).append("value", longValForMap);
group.addGroup("map_double").append("key", doubleValForMap).append("value", doubleValForMap);
group.addGroup("map_float").append("key", floatValForMap).append("value", floatValForMap);
group.addGroup("map_binary").append("key", binaryValForMap).append("value", binaryValForMap);
group.addGroup("map_decimal").append("key", decimalValForMap).append("value", decimalValForMap);
multipleLevelGroup.addGroup("map").append("key", binaryValForMap).append("value", binaryValForMap);
mapElementIndex++;
}
}
writer.write(group);
}
writer.close();
}
use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project hive by apache.
the class TestVectorizedMapColumnReader method writeRepeateMapData.
protected static void writeRepeateMapData(ParquetWriter<Group> writer, int elementNum, boolean isNull) throws IOException {
SimpleGroupFactory f = new SimpleGroupFactory(schema);
int mapMaxSize = 4;
for (int i = 0; i < elementNum; i++) {
Group group = f.newGroup();
if (!isNull) {
for (int j = 0; j < mapMaxSize; j++) {
group.addGroup("map_int32_for_repeat_test").append("key", j).append("value", j);
}
}
writer.write(group);
}
writer.close();
}
use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project presto by prestodb.
the class ParquetTester method nonHiveParquetWriter.
private static void nonHiveParquetWriter(JobConf jobConf, File outputFile, org.apache.parquet.hadoop.metadata.CompressionCodecName compressionCodecName, SettableStructObjectInspector objectInspector, Iterator<?>[] valuesByField, org.apache.parquet.schema.MessageType parquetSchema) throws Exception {
GroupWriteSupport.setSchema(parquetSchema, jobConf);
org.apache.parquet.hadoop.ParquetWriter writer = ExampleParquetWriter.builder(new Path(outputFile.toURI())).withType(parquetSchema).withCompressionCodec(compressionCodecName).withConf(jobConf).withDictionaryEncoding(true).build();
List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
SimpleGroupFactory groupFactory = new SimpleGroupFactory(parquetSchema);
while (stream(valuesByField).allMatch(Iterator::hasNext)) {
Group group = groupFactory.newGroup();
for (int field = 0; field < fields.size(); field++) {
Object value = valuesByField[field].next();
if (value == null) {
continue;
}
String fieldName = fields.get(field).getFieldName();
String typeName = fields.get(field).getFieldObjectInspector().getTypeName();
switch(typeName) {
case "timestamp":
case "bigint":
group.add(fieldName, (long) value);
break;
default:
throw new RuntimeException(String.format("unhandled type for column %s type %s", fieldName, typeName));
}
}
writer.write(group);
}
writer.close();
}
Aggregations