use of org.apache.parquet.example.data.Group in project hive by apache.
the class VectorizedColumnReaderTestBase method writeData.
protected static void writeData(ParquetWriter<Group> writer, boolean isDictionaryEncoding) throws IOException {
SimpleGroupFactory f = new SimpleGroupFactory(schema);
for (int i = 0; i < nElements; i++) {
boolean isNull = isNull(i);
int intVal = getIntValue(isDictionaryEncoding, i);
long longVal = getLongValue(isDictionaryEncoding, i);
Binary timeStamp = getTimestamp(isDictionaryEncoding, i);
HiveDecimal decimalVal = getDecimal(isDictionaryEncoding, i).setScale(2);
double doubleVal = getDoubleValue(isDictionaryEncoding, i);
float floatVal = getFloatValue(isDictionaryEncoding, i);
boolean booleanVal = getBooleanValue(i);
Binary binary = getBinaryValue(isDictionaryEncoding, i);
Group group = f.newGroup().append("int32_field", intVal).append("int64_field", longVal).append("int96_field", timeStamp).append("double_field", doubleVal).append("float_field", floatVal).append("boolean_field", booleanVal).append("flba_field", "abc");
if (!isNull) {
group.append("some_null_field", "x");
}
group.append("binary_field", binary);
if (!isNull) {
group.append("binary_field_some_null", binary);
}
HiveDecimalWritable w = new HiveDecimalWritable(decimalVal);
group.append("value", Binary.fromConstantByteArray(w.getInternalStorage()));
group.addGroup("struct_field").append("a", intVal).append("b", doubleVal);
Group g = group.addGroup("nested_struct_field");
g.addGroup("nsf").append("c", intVal).append("d", intVal);
g.append("e", doubleVal);
Group some_null_g = group.addGroup("struct_field_some_null");
if (i % 2 != 0) {
some_null_g.append("f", intVal);
}
if (i % 3 != 0) {
some_null_g.append("g", doubleVal);
}
Group mapGroup = group.addGroup("map_field");
if (i % 13 != 1) {
mapGroup.addGroup("map").append("key", binary).append("value", "abc");
} else {
mapGroup.addGroup("map").append("key", binary);
}
Group arrayGroup = group.addGroup("array_list");
for (int j = 0; j < i % 4; j++) {
arrayGroup.addGroup("bag").append("array_element", intVal);
}
writer.write(group);
}
writer.close();
}
use of org.apache.parquet.example.data.Group in project h2o-3 by h2oai.
the class ParquetFileGenerator method generateParquetFile.
static File generateParquetFile(File parentDir, String filename, int nrows, Date date) throws IOException {
File f = new File(parentDir, filename);
Configuration conf = new Configuration();
MessageType schema = parseMessageType("message test { " + "required int32 int32_field; " + "required int64 int64_field; " + "required float float_field; " + "required double double_field; " + "required int64 timestamp_field (TIMESTAMP_MILLIS);" + "} ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(), UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
try {
for (int i = 0; i < nrows; i++) {
writer.write(fact.newGroup().append("int32_field", 32 + i).append("int64_field", 64L + i).append("float_field", 1.0f + i).append("double_field", 2.0d + i).append("timestamp_field", date.getTime() + (i * 117)));
}
} finally {
writer.close();
}
return f;
}
use of org.apache.parquet.example.data.Group in project h2o-3 by h2oai.
the class ParquetFileGenerator method generateSparseParquetFile.
static File generateSparseParquetFile(File parentDir, String filename, int nrows) throws IOException {
File f = new File(parentDir, filename);
Configuration conf = new Configuration();
MessageType schema = parseMessageType("message test { optional int32 int32_field; optional binary string_field (UTF8); required int32 row; optional int32 int32_field2; } ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(), UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
try {
for (int i = 0; i < nrows; i++) {
Group g = fact.newGroup();
if (i % 10 == 0) {
g = g.append("int32_field", i);
}
if (i % 10 == 0) {
g = g.append("string_field", "CAT_" + (i % 10));
}
if (i % 10 == 0) {
g = g.append("int32_field2", i);
}
writer.write(g.append("row", i));
}
} finally {
writer.close();
}
return f;
}
Aggregations