use of org.apache.avro.file.DataFileWriter in project beam by apache.
the class AvroSourceTest method generateTestFile.
/**
* Generates an input Avro file containing the given records in the temporary directory and
* returns the full path of the file.
*/
private <T> String generateTestFile(String filename, List<T> elems, SyncBehavior syncBehavior, int syncInterval, AvroCoder<T> coder, String codec) throws IOException {
Random random = new Random(0);
File tmpFile = tmpFolder.newFile(filename);
String path = tmpFile.toString();
FileOutputStream os = new FileOutputStream(tmpFile);
DatumWriter<T> datumWriter = coder.getType().equals(GenericRecord.class) ? new GenericDatumWriter<>(coder.getSchema()) : new ReflectDatumWriter<>(coder.getSchema());
try (DataFileWriter<T> writer = new DataFileWriter<>(datumWriter)) {
writer.setCodec(CodecFactory.fromString(codec));
writer.create(coder.getSchema(), os);
int recordIndex = 0;
int syncIndex = syncBehavior == SyncBehavior.SYNC_RANDOM ? random.nextInt(syncInterval) : 0;
for (T elem : elems) {
writer.append(elem);
recordIndex++;
switch(syncBehavior) {
case SYNC_REGULAR:
if (recordIndex == syncInterval) {
recordIndex = 0;
writer.sync();
}
break;
case SYNC_RANDOM:
if (recordIndex == syncIndex) {
recordIndex = 0;
writer.sync();
syncIndex = random.nextInt(syncInterval);
}
break;
case SYNC_DEFAULT:
default:
}
}
}
return path;
}
use of org.apache.avro.file.DataFileWriter in project beam by apache.
the class AvroPipelineTest method populateGenericFile.
private void populateGenericFile(List<GenericRecord> genericRecords, Schema schema) throws IOException {
FileOutputStream outputStream = new FileOutputStream(this.inputFile);
GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<>(schema);
try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(genericDatumWriter)) {
dataFileWriter.create(schema, outputStream);
for (GenericRecord record : genericRecords) {
dataFileWriter.append(record);
}
}
outputStream.close();
}
use of org.apache.avro.file.DataFileWriter in project hazelcast by hazelcast.
the class FileUtil method createAvroPayload.
static byte[] createAvroPayload() {
try (ByteArrayOutputStream output = new ByteArrayOutputStream();
DataFileWriter<GenericRecord> writer = new DataFileWriter<>(new GenericDatumWriter<>())) {
writer.create(AVRO_RECORD.getSchema(), output);
writer.append(AVRO_RECORD);
writer.flush();
return output.toByteArray();
} catch (IOException ioe) {
throw sneakyThrow(ioe);
}
}
use of org.apache.avro.file.DataFileWriter in project haivvreo by jghoman.
the class AvroContainerOutputFormat method getHiveRecordWriter.
@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException {
Schema schema;
try {
schema = HaivvreoUtils.determineSchemaOrThrowException(jobConf, properties);
} catch (HaivvreoException e) {
throw new IOException(e);
}
GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
if (isCompressed) {
int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC);
CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
dfw.setCodec(factory);
}
dfw.create(schema, path.getFileSystem(jobConf).create(path));
return new AvroGenericRecordWriter(dfw);
}
use of org.apache.avro.file.DataFileWriter in project hive by apache.
the class TestHBaseSerDe method getTestAvroBytesFromClass1.
private byte[] getTestAvroBytesFromClass1(int i) throws IOException {
Employee employee = new Employee();
employee.setEmployeeName("Avro Employee" + i);
employee.setEmployeeID(11111L);
employee.setGender(Gender.FEMALE);
employee.setAge(25L);
Address address = new Address();
address.setAddress1("Avro First Address" + i);
address.setAddress2("Avro Second Address" + i);
address.setCity("Avro City" + i);
address.setZipcode(123456L);
Map<CharSequence, CharSequence> metadata = new HashMap<CharSequence, CharSequence>();
metadata.put("testkey", "testvalue");
address.setMetadata(metadata);
HomePhone hPhone = new HomePhone();
hPhone.setAreaCode(999L);
hPhone.setNumber(1234567890L);
OfficePhone oPhone = new OfficePhone();
oPhone.setAreaCode(999L);
oPhone.setNumber(1234455555L);
ContactInfo contact = new ContactInfo();
List<Address> addresses = new ArrayList<Address>();
// set value for the union type
address.setCounty(hPhone);
addresses.add(address);
addresses.add(address);
contact.setAddress(addresses);
contact.setHomePhone(hPhone);
contact.setOfficePhone(oPhone);
employee.setContactInfo(contact);
DatumWriter<Employee> datumWriter = new SpecificDatumWriter<Employee>(Employee.class);
DataFileWriter<Employee> dataFileWriter = new DataFileWriter<Employee>(datumWriter);
ByteArrayOutputStream out = new ByteArrayOutputStream();
dataFileWriter.create(employee.getSchema(), out);
dataFileWriter.append(employee);
dataFileWriter.close();
return out.toByteArray();
}
Aggregations