use of org.apache.parquet.hadoop.ParquetWriter in project hive by apache.
the class AbstractTestParquetDirect method writeDirect.
public Path writeDirect(String name, MessageType type, DirectWriter writer) throws IOException {
File temp = tempDir.newFile(name + ".parquet");
temp.deleteOnExit();
temp.delete();
Path path = new Path(temp.getPath());
ParquetWriter<Void> parquetWriter = new ParquetWriter<Void>(path, new DirectWriteSupport(type, writer, new HashMap<String, String>()));
parquetWriter.write(null);
parquetWriter.close();
return path;
}
use of org.apache.parquet.hadoop.ParquetWriter in project h2o-3 by h2oai.
the class ParquetFileGenerator method generateParquetFile.
static File generateParquetFile(File parentDir, String filename, int nrows, Date date) throws IOException {
File f = new File(parentDir, filename);
Configuration conf = new Configuration();
MessageType schema = parseMessageType("message test { " + "required int32 int32_field; " + "required int64 int64_field; " + "required float float_field; " + "required double double_field; " + "required int64 timestamp_field (TIMESTAMP_MILLIS);" + "} ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(), UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
try {
for (int i = 0; i < nrows; i++) {
writer.write(fact.newGroup().append("int32_field", 32 + i).append("int64_field", 64L + i).append("float_field", 1.0f + i).append("double_field", 2.0d + i).append("timestamp_field", date.getTime() + (i * 117)));
}
} finally {
writer.close();
}
return f;
}
use of org.apache.parquet.hadoop.ParquetWriter in project h2o-3 by h2oai.
the class ParquetFileGenerator method generateSparseParquetFile.
static File generateSparseParquetFile(File parentDir, String filename, int nrows) throws IOException {
File f = new File(parentDir, filename);
Configuration conf = new Configuration();
MessageType schema = parseMessageType("message test { optional int32 int32_field; optional binary string_field (UTF8); required int32 row; optional int32 int32_field2; } ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(), UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
try {
for (int i = 0; i < nrows; i++) {
Group g = fact.newGroup();
if (i % 10 == 0) {
g = g.append("int32_field", i);
}
if (i % 10 == 0) {
g = g.append("string_field", "CAT_" + (i % 10));
}
if (i % 10 == 0) {
g = g.append("int32_field2", i);
}
writer.write(g.append("row", i));
}
} finally {
writer.close();
}
return f;
}
Aggregations