Search in sources :

Example 1 with ParquetWriter

use of org.apache.parquet.hadoop.ParquetWriter in project hive by apache.

the class AbstractTestParquetDirect method writeDirect.

public Path writeDirect(String name, MessageType type, DirectWriter writer) throws IOException {
    File temp = tempDir.newFile(name + ".parquet");
    temp.deleteOnExit();
    temp.delete();
    Path path = new Path(temp.getPath());
    ParquetWriter<Void> parquetWriter = new ParquetWriter<Void>(path, new DirectWriteSupport(type, writer, new HashMap<String, String>()));
    parquetWriter.write(null);
    parquetWriter.close();
    return path;
}
Also used : Path(org.apache.hadoop.fs.Path) ParquetWriter(org.apache.parquet.hadoop.ParquetWriter) HashMap(java.util.HashMap) File(java.io.File)

Example 2 with ParquetWriter

use of org.apache.parquet.hadoop.ParquetWriter in project h2o-3 by h2oai.

the class ParquetFileGenerator method generateParquetFile.

static File generateParquetFile(File parentDir, String filename, int nrows, Date date) throws IOException {
    File f = new File(parentDir, filename);
    Configuration conf = new Configuration();
    MessageType schema = parseMessageType("message test { " + "required int32 int32_field; " + "required int64 int64_field; " + "required float float_field; " + "required double double_field; " + "required int64 timestamp_field (TIMESTAMP_MILLIS);" + "} ");
    GroupWriteSupport.setSchema(schema, conf);
    SimpleGroupFactory fact = new SimpleGroupFactory(schema);
    ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(), UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
    try {
        for (int i = 0; i < nrows; i++) {
            writer.write(fact.newGroup().append("int32_field", 32 + i).append("int64_field", 64L + i).append("float_field", 1.0f + i).append("double_field", 2.0d + i).append("timestamp_field", date.getTime() + (i * 117)));
        }
    } finally {
        writer.close();
    }
    return f;
}
Also used : Path(org.apache.hadoop.fs.Path) GroupWriteSupport(org.apache.parquet.hadoop.example.GroupWriteSupport) Group(org.apache.parquet.example.data.Group) Configuration(org.apache.hadoop.conf.Configuration) ParquetWriter(org.apache.parquet.hadoop.ParquetWriter) AvroParquetWriter(org.apache.parquet.avro.AvroParquetWriter) SimpleGroupFactory(org.apache.parquet.example.data.simple.SimpleGroupFactory) File(java.io.File) MessageTypeParser.parseMessageType(org.apache.parquet.schema.MessageTypeParser.parseMessageType) MessageType(org.apache.parquet.schema.MessageType)

Example 3 with ParquetWriter

use of org.apache.parquet.hadoop.ParquetWriter in project h2o-3 by h2oai.

the class ParquetFileGenerator method generateSparseParquetFile.

static File generateSparseParquetFile(File parentDir, String filename, int nrows) throws IOException {
    File f = new File(parentDir, filename);
    Configuration conf = new Configuration();
    MessageType schema = parseMessageType("message test { optional int32 int32_field; optional binary string_field (UTF8); required int32 row; optional int32 int32_field2; } ");
    GroupWriteSupport.setSchema(schema, conf);
    SimpleGroupFactory fact = new SimpleGroupFactory(schema);
    ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(), UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
    try {
        for (int i = 0; i < nrows; i++) {
            Group g = fact.newGroup();
            if (i % 10 == 0) {
                g = g.append("int32_field", i);
            }
            if (i % 10 == 0) {
                g = g.append("string_field", "CAT_" + (i % 10));
            }
            if (i % 10 == 0) {
                g = g.append("int32_field2", i);
            }
            writer.write(g.append("row", i));
        }
    } finally {
        writer.close();
    }
    return f;
}
Also used : Path(org.apache.hadoop.fs.Path) GroupWriteSupport(org.apache.parquet.hadoop.example.GroupWriteSupport) Group(org.apache.parquet.example.data.Group) Configuration(org.apache.hadoop.conf.Configuration) ParquetWriter(org.apache.parquet.hadoop.ParquetWriter) AvroParquetWriter(org.apache.parquet.avro.AvroParquetWriter) SimpleGroupFactory(org.apache.parquet.example.data.simple.SimpleGroupFactory) File(java.io.File) MessageTypeParser.parseMessageType(org.apache.parquet.schema.MessageTypeParser.parseMessageType) MessageType(org.apache.parquet.schema.MessageType)

Aggregations

File (java.io.File)3 Path (org.apache.hadoop.fs.Path)3 ParquetWriter (org.apache.parquet.hadoop.ParquetWriter)3 Configuration (org.apache.hadoop.conf.Configuration)2 AvroParquetWriter (org.apache.parquet.avro.AvroParquetWriter)2 Group (org.apache.parquet.example.data.Group)2 SimpleGroupFactory (org.apache.parquet.example.data.simple.SimpleGroupFactory)2 GroupWriteSupport (org.apache.parquet.hadoop.example.GroupWriteSupport)2 MessageType (org.apache.parquet.schema.MessageType)2 MessageTypeParser.parseMessageType (org.apache.parquet.schema.MessageTypeParser.parseMessageType)2 HashMap (java.util.HashMap)1