Search in sources :

Example 41 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project hive by apache.

the class TestHBaseSerDe method getTestAvroBytesFromSchema.

private byte[] getTestAvroBytesFromSchema(String schemaToUse) throws IOException {
    Schema s = Schema.parse(schemaToUse);
    GenericData.Record record = new GenericData.Record(s);
    GenericData.Record innerRecord = new GenericData.Record(s.getField("aRecord").schema());
    innerRecord.put("int1", 42);
    innerRecord.put("boolean1", true);
    innerRecord.put("long1", 42432234234l);
    if (schemaToUse.equals(RECORD_SCHEMA_EVOLVED)) {
        innerRecord.put("string1", "new value");
    }
    record.put("aRecord", innerRecord);
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(s);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
    dataFileWriter.create(s, out);
    dataFileWriter.append(record);
    dataFileWriter.close();
    byte[] data = out.toByteArray();
    out.close();
    return data;
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericRecord(org.apache.avro.generic.GenericRecord) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericRecord(org.apache.avro.generic.GenericRecord) GenericData(org.apache.avro.generic.GenericData)

Example 42 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project hive by apache.

the class AvroContainerOutputFormat method getHiveRecordWriter.

@Override
public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException {
    Schema schema;
    try {
        schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties);
    } catch (AvroSerdeException e) {
        throw new IOException(e);
    }
    GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
    if (isCompressed) {
        int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
        dfw.setCodec(factory);
    }
    // add writer.time.zone property to file metadata
    dfw.setMeta(AvroSerDe.WRITER_TIME_ZONE, TimeZone.getDefault().toZoneId().toString());
    dfw.setMeta(AvroSerDe.WRITER_PROLEPTIC, String.valueOf(HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN)));
    dfw.setMeta(AvroSerDe.WRITER_ZONE_CONVERSION_LEGACY, String.valueOf(HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_AVRO_TIMESTAMP_WRITE_LEGACY_CONVERSION_ENABLED)));
    dfw.create(schema, path.getFileSystem(jobConf).create(path));
    return new AvroGenericRecordWriter(dfw);
}
Also used : AvroSerdeException(org.apache.hadoop.hive.serde2.avro.AvroSerdeException) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) IOException(java.io.IOException) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) CodecFactory(org.apache.avro.file.CodecFactory) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 43 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project apex-malhar by apache.

the class AvroFileToPojoModuleTest method writeAvroFile.

private void writeAvroFile(File outputFile) {
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(new Schema.Parser().parse(AVRO_SCHEMA));
    try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
        dataFileWriter.create(new Schema.Parser().parse(AVRO_SCHEMA), outputFile);
        for (GenericRecord record : recordList) {
            dataFileWriter.append(record);
        }
        FileUtils.moveFileToDirectory(new File(outputFile.getAbsolutePath()), new File(testMeta.dir), true);
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) IOException(java.io.IOException) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 44 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project apex-malhar by apache.

the class AvroFileInputOperatorTest method writeAvroFile.

private void writeAvroFile(File outputFile) throws IOException {
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(new Schema.Parser().parse(AVRO_SCHEMA));
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
    dataFileWriter.create(new Schema.Parser().parse(AVRO_SCHEMA), outputFile);
    for (GenericRecord record : recordList) {
        dataFileWriter.append(record);
    }
    dataFileWriter.close();
    FileUtils.moveFileToDirectory(new File(outputFile.getAbsolutePath()), new File(testMeta.dir), true);
}
Also used : DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 45 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project spf4j by zolyfarkas.

the class AvroMeasurementStore method initWriter.

private <T extends SpecificRecord> AvroFileInfo<T> initWriter(final String fileNameBase, final Path destinationPath, final boolean countEntries, final Class<T> clasz) throws IOException {
    DataFileWriter<T> writer = new DataFileWriter<>(new SpecificDatumWriter<>(clasz));
    if (codecFact != null) {
        writer.setCodec(codecFact);
    }
    long epoch = System.currentTimeMillis();
    writer.setMeta("timeRef", epoch);
    String fileName = fileNameBase + '.' + clasz.getSimpleName().toLowerCase(Locale.US) + ".avro";
    Path file = destinationPath.resolve(fileName);
    long initNrRecords;
    if (Files.isWritable(file)) {
        try (DataFileStream<T> streamReader = new DataFileStream<>(Files.newInputStream(file), new SpecificDatumReader<>(clasz))) {
            if (countEntries) {
                long count = 0L;
                while (streamReader.hasNext()) {
                    count += streamReader.getBlockCount();
                    streamReader.nextBlock();
                }
                initNrRecords = count;
            } else {
                initNrRecords = -1L;
            }
            epoch = streamReader.getMetaLong("timeRef");
        }
        writer = writer.appendTo(file.toFile());
    } else {
        try {
            writer.create(clasz.getConstructor().newInstance().getSchema(), file.toFile());
        } catch (InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException ex) {
            throw new RuntimeException(ex);
        }
        initNrRecords = 0L;
    }
    return new AvroFileInfo<>(file, writer, epoch, initNrRecords);
}
Also used : Path(java.nio.file.Path) DataFileWriter(org.apache.avro.file.DataFileWriter) DataFileStream(org.apache.avro.file.DataFileStream) InvocationTargetException(java.lang.reflect.InvocationTargetException)

Aggregations

DataFileWriter (org.apache.avro.file.DataFileWriter)102 GenericRecord (org.apache.avro.generic.GenericRecord)58 Schema (org.apache.avro.Schema)50 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)47 File (java.io.File)38 ByteArrayOutputStream (java.io.ByteArrayOutputStream)22 IOException (java.io.IOException)22 GenericData (org.apache.avro.generic.GenericData)17 FileOutputStream (java.io.FileOutputStream)15 Test (org.junit.Test)14 HashMap (java.util.HashMap)11 InputStream (java.io.InputStream)10 SpecificDatumWriter (org.apache.avro.specific.SpecificDatumWriter)10 ArrayList (java.util.ArrayList)9 Path (org.apache.hadoop.fs.Path)9 ByteArrayInputStream (java.io.ByteArrayInputStream)8 OutputStream (java.io.OutputStream)8 ByteBuffer (java.nio.ByteBuffer)7 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)7 MockFlowFile (org.apache.nifi.util.MockFlowFile)7