Search in sources :

Example 36 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project beam by apache.

the class AvroSourceTest method generateTestFile.

/**
 * Generates an input Avro file containing the given records in the temporary directory and
 * returns the full path of the file.
 */
private <T> String generateTestFile(String filename, List<T> elems, SyncBehavior syncBehavior, int syncInterval, AvroCoder<T> coder, String codec) throws IOException {
    Random random = new Random(0);
    File tmpFile = tmpFolder.newFile(filename);
    String path = tmpFile.toString();
    FileOutputStream os = new FileOutputStream(tmpFile);
    DatumWriter<T> datumWriter = coder.getType().equals(GenericRecord.class) ? new GenericDatumWriter<>(coder.getSchema()) : new ReflectDatumWriter<>(coder.getSchema());
    try (DataFileWriter<T> writer = new DataFileWriter<>(datumWriter)) {
        writer.setCodec(CodecFactory.fromString(codec));
        writer.create(coder.getSchema(), os);
        int recordIndex = 0;
        int syncIndex = syncBehavior == SyncBehavior.SYNC_RANDOM ? random.nextInt(syncInterval) : 0;
        for (T elem : elems) {
            writer.append(elem);
            recordIndex++;
            switch(syncBehavior) {
                case SYNC_REGULAR:
                    if (recordIndex == syncInterval) {
                        recordIndex = 0;
                        writer.sync();
                    }
                    break;
                case SYNC_RANDOM:
                    if (recordIndex == syncIndex) {
                        recordIndex = 0;
                        writer.sync();
                        syncIndex = random.nextInt(syncInterval);
                    }
                    break;
                case SYNC_DEFAULT:
                default:
            }
        }
    }
    return path;
}
Also used : Random(java.util.Random) FileOutputStream(java.io.FileOutputStream) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 37 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project beam by apache.

the class AvroPipelineTest method populateGenericFile.

private void populateGenericFile(List<GenericRecord> genericRecords, Schema schema) throws IOException {
    FileOutputStream outputStream = new FileOutputStream(this.inputFile);
    GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<>(schema);
    try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(genericDatumWriter)) {
        dataFileWriter.create(schema, outputStream);
        for (GenericRecord record : genericRecords) {
            dataFileWriter.append(record);
        }
    }
    outputStream.close();
}
Also used : FileOutputStream(java.io.FileOutputStream) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 38 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project hazelcast by hazelcast.

the class FileUtil method createAvroPayload.

static byte[] createAvroPayload() {
    try (ByteArrayOutputStream output = new ByteArrayOutputStream();
        DataFileWriter<GenericRecord> writer = new DataFileWriter<>(new GenericDatumWriter<>())) {
        writer.create(AVRO_RECORD.getSchema(), output);
        writer.append(AVRO_RECORD);
        writer.flush();
        return output.toByteArray();
    } catch (IOException ioe) {
        throw sneakyThrow(ioe);
    }
}
Also used : DataFileWriter(org.apache.avro.file.DataFileWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 39 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project haivvreo by jghoman.

the class AvroContainerOutputFormat method getHiveRecordWriter.

@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException {
    Schema schema;
    try {
        schema = HaivvreoUtils.determineSchemaOrThrowException(jobConf, properties);
    } catch (HaivvreoException e) {
        throw new IOException(e);
    }
    GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
    if (isCompressed) {
        int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
        dfw.setCodec(factory);
    }
    dfw.create(schema, path.getFileSystem(jobConf).create(path));
    return new AvroGenericRecordWriter(dfw);
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) IOException(java.io.IOException) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) CodecFactory(org.apache.avro.file.CodecFactory) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 40 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project hive by apache.

the class TestHBaseSerDe method getTestAvroBytesFromClass1.

private byte[] getTestAvroBytesFromClass1(int i) throws IOException {
    Employee employee = new Employee();
    employee.setEmployeeName("Avro Employee" + i);
    employee.setEmployeeID(11111L);
    employee.setGender(Gender.FEMALE);
    employee.setAge(25L);
    Address address = new Address();
    address.setAddress1("Avro First Address" + i);
    address.setAddress2("Avro Second Address" + i);
    address.setCity("Avro City" + i);
    address.setZipcode(123456L);
    Map<CharSequence, CharSequence> metadata = new HashMap<CharSequence, CharSequence>();
    metadata.put("testkey", "testvalue");
    address.setMetadata(metadata);
    HomePhone hPhone = new HomePhone();
    hPhone.setAreaCode(999L);
    hPhone.setNumber(1234567890L);
    OfficePhone oPhone = new OfficePhone();
    oPhone.setAreaCode(999L);
    oPhone.setNumber(1234455555L);
    ContactInfo contact = new ContactInfo();
    List<Address> addresses = new ArrayList<Address>();
    // set value for the union type
    address.setCounty(hPhone);
    addresses.add(address);
    addresses.add(address);
    contact.setAddress(addresses);
    contact.setHomePhone(hPhone);
    contact.setOfficePhone(oPhone);
    employee.setContactInfo(contact);
    DatumWriter<Employee> datumWriter = new SpecificDatumWriter<Employee>(Employee.class);
    DataFileWriter<Employee> dataFileWriter = new DataFileWriter<Employee>(datumWriter);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    dataFileWriter.create(employee.getSchema(), out);
    dataFileWriter.append(employee);
    dataFileWriter.close();
    return out.toByteArray();
}
Also used : OfficePhone(org.apache.hadoop.hive.hbase.avro.OfficePhone) Address(org.apache.hadoop.hive.hbase.avro.Address) HashMap(java.util.HashMap) DataFileWriter(org.apache.avro.file.DataFileWriter) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) SpecificDatumWriter(org.apache.avro.specific.SpecificDatumWriter) Employee(org.apache.hadoop.hive.hbase.avro.Employee) HomePhone(org.apache.hadoop.hive.hbase.avro.HomePhone) ContactInfo(org.apache.hadoop.hive.hbase.avro.ContactInfo)

Aggregations

DataFileWriter (org.apache.avro.file.DataFileWriter)102 GenericRecord (org.apache.avro.generic.GenericRecord)58 Schema (org.apache.avro.Schema)50 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)47 File (java.io.File)38 ByteArrayOutputStream (java.io.ByteArrayOutputStream)22 IOException (java.io.IOException)22 GenericData (org.apache.avro.generic.GenericData)17 FileOutputStream (java.io.FileOutputStream)15 Test (org.junit.Test)14 HashMap (java.util.HashMap)11 InputStream (java.io.InputStream)10 SpecificDatumWriter (org.apache.avro.specific.SpecificDatumWriter)10 ArrayList (java.util.ArrayList)9 Path (org.apache.hadoop.fs.Path)9 ByteArrayInputStream (java.io.ByteArrayInputStream)8 OutputStream (java.io.OutputStream)8 ByteBuffer (java.nio.ByteBuffer)7 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)7 MockFlowFile (org.apache.nifi.util.MockFlowFile)7