use of org.apache.avro.file.DataFileWriter in project drill by apache.
the class AvroDataGenerator method generateMultiDimensionalArray.
public String generateMultiDimensionalArray(int numRecords, int arraySize) throws Exception {
File file = File.createTempFile("avro-multi-dimensional-array-test", ".avro", dirTestWatcher.getRootDir());
String colTwoDimsName = "col_array_two_dims";
Schema schema = SchemaBuilder.record("rec").fields().name(colTwoDimsName).type().array().items().array().items().stringType().noDefault().endRecord();
try (DataFileWriter<GenericRecord> writer = new DataFileWriter<>(new GenericDatumWriter<>(schema))) {
writer.create(schema, file);
for (int i = 0; i < numRecords; i++) {
GenericRecord record = new GenericData.Record(schema);
Schema twoDimsSchema = schema.getField(colTwoDimsName).schema();
GenericArray<GenericArray<String>> arrayTwoDims = new GenericData.Array<>(numRecords, twoDimsSchema);
for (int a = 0; a < arraySize; a++) {
GenericArray<String> nestedArray = new GenericData.Array<>(2, twoDimsSchema.getElementType());
nestedArray.add(String.format("val_%s_%s_0", i, a));
nestedArray.add(String.format("val_%s_%s_1", i, a));
arrayTwoDims.add(nestedArray);
}
record.put(colTwoDimsName, arrayTwoDims);
writer.append(record);
}
}
return file.getName();
}
use of org.apache.avro.file.DataFileWriter in project drill by apache.
the class AvroDataGenerator method generateDuration.
public String generateDuration(int numRows) throws Exception {
File file = File.createTempFile("avro-duration-test", ".avro", dirTestWatcher.getRootDir());
Schema durationSchema = new LogicalType("duration").addToSchema(SchemaBuilder.builder().fixed("duration_fixed").size(12));
Schema schema = SchemaBuilder.record("record").fields().name("col_duration").type(durationSchema).noDefault().endRecord();
try (DataFileWriter<GenericRecord> writer = new DataFileWriter<>(new GenericDatumWriter<>(schema))) {
writer.create(schema, file);
for (int i = 0; i < numRows; i++) {
GenericRecord record = new GenericData.Record(schema);
ByteBuffer bb = ByteBuffer.allocate(12);
bb.order(ByteOrder.LITTLE_ENDIAN);
// month
bb.putInt(10 + i);
// days
bb.putInt(100 + i);
// milliseconds
bb.putInt(1000 + i);
GenericData.Fixed fixed = new GenericData.Fixed(durationSchema, bb.array());
record.put("col_duration", fixed);
writer.append(record);
}
}
return file.getName();
}
use of org.apache.avro.file.DataFileWriter in project drill by apache.
the class AvroDataGenerator method generateDecimalData.
public String generateDecimalData(int numRecords) throws Exception {
File file = File.createTempFile("avro-decimal-test", ".avro", dirTestWatcher.getRootDir());
Schema decBytes = LogicalTypes.decimal(10, 2).addToSchema(SchemaBuilder.builder().bytesType());
Schema decFixed = LogicalTypes.decimal(5, 2).addToSchema(SchemaBuilder.builder().fixed("dec_fixed").size(5));
Schema schema = SchemaBuilder.record("rec").fields().name("col_dec_pos_bytes").type(decBytes).noDefault().name("col_dec_neg_bytes").type(decBytes).noDefault().name("col_dec_pos_fixed").type(decFixed).noDefault().name("col_dec_neg_fixed").type(decFixed).noDefault().endRecord();
try (DataFileWriter<GenericRecord> writer = new DataFileWriter<>(new GenericDatumWriter<>(schema))) {
writer.create(schema, file);
for (int i = 0; i < numRecords; i++) {
GenericRecord record = new GenericData.Record(schema);
ByteBuffer posBytes = ByteBuffer.wrap(BigInteger.valueOf(100 + i).toByteArray());
record.put("col_dec_pos_bytes", posBytes);
ByteBuffer negBytes = ByteBuffer.wrap(BigInteger.valueOf(-200 + i).toByteArray());
record.put("col_dec_neg_bytes", negBytes);
byte[] posFixedBytes = new byte[5];
byte[] posValueBytes = BigInteger.valueOf(300 + i).toByteArray();
int posDiff = posFixedBytes.length - posValueBytes.length;
assert posDiff > -1;
System.arraycopy(posValueBytes, 0, posFixedBytes, posDiff, posValueBytes.length);
Arrays.fill(posFixedBytes, 0, posDiff, (byte) 0);
GenericData.Fixed posFixed = new GenericData.Fixed(decFixed, posFixedBytes);
record.put("col_dec_pos_fixed", posFixed);
byte[] negFixedBytes = new byte[5];
byte[] negValueBytes = BigInteger.valueOf(-400 + i).toByteArray();
int negDiff = negFixedBytes.length - negValueBytes.length;
assert negDiff > -1;
System.arraycopy(negValueBytes, 0, negFixedBytes, negDiff, negValueBytes.length);
Arrays.fill(negFixedBytes, 0, negDiff, (byte) -1);
GenericData.Fixed negFixed = new GenericData.Fixed(decFixed, negFixedBytes);
record.put("col_dec_neg_fixed", negFixed);
writer.append(record);
}
}
return file.getName();
}
use of org.apache.avro.file.DataFileWriter in project incubator-gobblin by apache.
the class FsSpecProducer method writeAvroJobSpec.
private void writeAvroJobSpec(AvroJobSpec jobSpec) throws IOException {
DatumWriter<AvroJobSpec> datumWriter = new SpecificDatumWriter<>(AvroJobSpec.SCHEMA$);
DataFileWriter<AvroJobSpec> dataFileWriter = new DataFileWriter<>(datumWriter);
Path jobSpecPath = new Path(this.specConsumerPath, annotateSpecFileName(jobSpec.getUri()));
// Write the new JobSpec to a temporary path first.
Path tmpDir = new Path(this.specConsumerPath, UUID.randomUUID().toString());
if (!fs.exists(tmpDir)) {
fs.mkdirs(tmpDir);
}
Path tmpJobSpecPath = new Path(tmpDir, jobSpec.getUri());
OutputStream out = fs.create(tmpJobSpecPath);
dataFileWriter.create(AvroJobSpec.SCHEMA$, out);
dataFileWriter.append(jobSpec);
dataFileWriter.close();
// Rename the JobSpec from temporary to final location.
HadoopUtils.renamePath(fs, tmpJobSpecPath, jobSpecPath, true);
// Delete the temporary path once the jobspec has been moved to its final publish location.
log.info("Deleting {}", tmpJobSpecPath.getParent().toString());
fs.delete(tmpJobSpecPath.getParent(), true);
}
use of org.apache.avro.file.DataFileWriter in project carbondata by apache.
the class AvroCarbonWriter method write.
/**
* Write single row data, input row is Avro Record
*/
@Override
public void write(Object object) throws IOException {
try {
GenericData.Record record = null;
if (object instanceof GenericData.Record) {
record = (GenericData.Record) object;
} else if (object instanceof String) {
String json = (String) object;
InputStream input = null;
DataFileWriter writer = null;
ByteArrayOutputStream output = null;
try {
GenericDatumReader reader = new GenericDatumReader(this.avroSchema);
input = new ByteArrayInputStream(json.getBytes(CarbonCommonConstants.DEFAULT_CHARSET));
output = new ByteArrayOutputStream();
DataInputStream din = new DataInputStream(input);
writer = new DataFileWriter(new GenericDatumWriter());
writer.create(this.avroSchema, output);
JsonDecoder decoder = DecoderFactory.get().jsonDecoder(this.avroSchema, din);
record = (GenericData.Record) reader.read(null, decoder);
} finally {
if (input != null) {
input.close();
}
if (writer != null) {
writer.close();
}
}
} else {
throw new UnsupportedOperationException("carbon not support " + object + ", only support GenericData.Record " + "and String for " + this.getClass().getName());
}
// convert Avro record to CSV String[]
Object[] csvRecord = avroToCsv(record);
writable.set(csvRecord);
recordWriter.write(NullWritable.get(), writable);
} catch (Exception e) {
close();
throw new IOException(e);
}
}
Aggregations