use of org.apache.avro.file.DataFileWriter in project hive by apache.
the class TestHBaseSerDe method getTestAvroBytesFromSchema.
private byte[] getTestAvroBytesFromSchema(String schemaToUse) throws IOException {
Schema s = Schema.parse(schemaToUse);
GenericData.Record record = new GenericData.Record(s);
GenericData.Record innerRecord = new GenericData.Record(s.getField("aRecord").schema());
innerRecord.put("int1", 42);
innerRecord.put("boolean1", true);
innerRecord.put("long1", 42432234234l);
if (schemaToUse.equals(RECORD_SCHEMA_EVOLVED)) {
innerRecord.put("string1", "new value");
}
record.put("aRecord", innerRecord);
DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(s);
ByteArrayOutputStream out = new ByteArrayOutputStream();
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
dataFileWriter.create(s, out);
dataFileWriter.append(record);
dataFileWriter.close();
byte[] data = out.toByteArray();
out.close();
return data;
}
use of org.apache.avro.file.DataFileWriter in project hive by apache.
the class AvroContainerOutputFormat method getHiveRecordWriter.
@Override
public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException {
Schema schema;
try {
schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties);
} catch (AvroSerdeException e) {
throw new IOException(e);
}
GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
if (isCompressed) {
int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC);
CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
dfw.setCodec(factory);
}
// add writer.time.zone property to file metadata
dfw.setMeta(AvroSerDe.WRITER_TIME_ZONE, TimeZone.getDefault().toZoneId().toString());
dfw.setMeta(AvroSerDe.WRITER_PROLEPTIC, String.valueOf(HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN)));
dfw.setMeta(AvroSerDe.WRITER_ZONE_CONVERSION_LEGACY, String.valueOf(HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_AVRO_TIMESTAMP_WRITE_LEGACY_CONVERSION_ENABLED)));
dfw.create(schema, path.getFileSystem(jobConf).create(path));
return new AvroGenericRecordWriter(dfw);
}
use of org.apache.avro.file.DataFileWriter in project apex-malhar by apache.
the class AvroFileToPojoModuleTest method writeAvroFile.
private void writeAvroFile(File outputFile) {
DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(new Schema.Parser().parse(AVRO_SCHEMA));
try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
dataFileWriter.create(new Schema.Parser().parse(AVRO_SCHEMA), outputFile);
for (GenericRecord record : recordList) {
dataFileWriter.append(record);
}
FileUtils.moveFileToDirectory(new File(outputFile.getAbsolutePath()), new File(testMeta.dir), true);
} catch (IOException e) {
e.printStackTrace();
}
}
use of org.apache.avro.file.DataFileWriter in project apex-malhar by apache.
the class AvroFileInputOperatorTest method writeAvroFile.
private void writeAvroFile(File outputFile) throws IOException {
DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(new Schema.Parser().parse(AVRO_SCHEMA));
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
dataFileWriter.create(new Schema.Parser().parse(AVRO_SCHEMA), outputFile);
for (GenericRecord record : recordList) {
dataFileWriter.append(record);
}
dataFileWriter.close();
FileUtils.moveFileToDirectory(new File(outputFile.getAbsolutePath()), new File(testMeta.dir), true);
}
use of org.apache.avro.file.DataFileWriter in project spf4j by zolyfarkas.
the class AvroMeasurementStore method initWriter.
private <T extends SpecificRecord> AvroFileInfo<T> initWriter(final String fileNameBase, final Path destinationPath, final boolean countEntries, final Class<T> clasz) throws IOException {
DataFileWriter<T> writer = new DataFileWriter<>(new SpecificDatumWriter<>(clasz));
if (codecFact != null) {
writer.setCodec(codecFact);
}
long epoch = System.currentTimeMillis();
writer.setMeta("timeRef", epoch);
String fileName = fileNameBase + '.' + clasz.getSimpleName().toLowerCase(Locale.US) + ".avro";
Path file = destinationPath.resolve(fileName);
long initNrRecords;
if (Files.isWritable(file)) {
try (DataFileStream<T> streamReader = new DataFileStream<>(Files.newInputStream(file), new SpecificDatumReader<>(clasz))) {
if (countEntries) {
long count = 0L;
while (streamReader.hasNext()) {
count += streamReader.getBlockCount();
streamReader.nextBlock();
}
initNrRecords = count;
} else {
initNrRecords = -1L;
}
epoch = streamReader.getMetaLong("timeRef");
}
writer = writer.appendTo(file.toFile());
} else {
try {
writer.create(clasz.getConstructor().newInstance().getSchema(), file.toFile());
} catch (InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException ex) {
throw new RuntimeException(ex);
}
initNrRecords = 0L;
}
return new AvroFileInfo<>(file, writer, epoch, initNrRecords);
}
Aggregations