use of org.apache.avro.file.DataFileWriter in project parquet-mr by apache.
the class ToAvroCommand method run.
@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
Preconditions.checkArgument(targets != null && targets.size() == 1, "A data file is required.");
String source = targets.get(0);
CodecFactory codecFactory = Codecs.avroCodec(compressionCodecName);
final Schema schema;
if (avroSchemaFile != null) {
schema = Schemas.fromAvsc(open(avroSchemaFile));
} else {
schema = getAvroSchema(source);
}
final Schema projection = filterSchema(schema, columns);
Iterable<Record> reader = openDataFile(source, projection);
boolean threw = true;
long count = 0;
DatumWriter<Record> datumWriter = new GenericDatumWriter<>(schema);
try (DataFileWriter<Record> fileWriter = new DataFileWriter<>(datumWriter)) {
fileWriter.setCodec(codecFactory);
try (OutputStream os = overwrite ? create(outputPath) : createWithNoOverwrite(outputPath);
DataFileWriter<Record> writer = fileWriter.create(projection, os)) {
for (Record record : reader) {
writer.append(record);
count += 1;
}
}
threw = false;
} catch (RuntimeException e) {
throw new RuntimeException("Failed on record " + count, e);
} finally {
if (reader instanceof Closeable) {
Closeables.close((Closeable) reader, threw);
}
}
return 0;
}
use of org.apache.avro.file.DataFileWriter in project carbondata by apache.
the class TestUtil method jsonToAvro.
public static GenericData.Record jsonToAvro(String json, String avroSchema) throws IOException {
InputStream input = null;
DataFileWriter writer = null;
ByteArrayOutputStream output = null;
try {
org.apache.avro.Schema schema = new org.apache.avro.Schema.Parser().parse(avroSchema);
GenericDatumReader reader = new GenericDatumReader(schema);
input = new ByteArrayInputStream(json.getBytes(CarbonCommonConstants.DEFAULT_CHARSET));
output = new ByteArrayOutputStream();
DataInputStream din = new DataInputStream(input);
writer = new DataFileWriter(new GenericDatumWriter());
writer.create(schema, output);
JsonDecoder decoder = DecoderFactory.get().jsonDecoder(schema, din);
return (GenericData.Record) reader.read(null, decoder);
} finally {
if (input != null) {
input.close();
}
if (writer != null) {
writer.close();
}
}
}
use of org.apache.avro.file.DataFileWriter in project ksql by confluentinc.
the class AvroSerializer method serialize.
/**
* Serializes the record as an in-memory representation of a standard Avro file.
*
* <p>That is, the returned bytes include a standard Avro header that contains a magic byte, the
* record's Avro schema (and so on), followed by the byte representation of the record.
*
* <p>Implementation detail: This method uses Avro's {@code DataFileWriter}.
*
* @return Avro-encoded record (bytes) that includes the Avro schema
*/
public byte[] serialize(final GenericContainer record) throws IOException {
if (record != null) {
final DatumWriter<GenericContainer> datumWriter = new GenericDatumWriter<>(record.getSchema());
final ByteArrayOutputStream out = new ByteArrayOutputStream();
final DataFileWriter<GenericContainer> writer = new DataFileWriter<>(datumWriter);
writer.create(record.getSchema(), out);
writer.append(record);
writer.close();
out.close();
return out.toByteArray();
} else {
return null;
}
}
use of org.apache.avro.file.DataFileWriter in project mist by snuspl.
the class AvroExecutionVertexStore method saveAvroPhysicalOperatorChain.
/**
* Saves the AvroPhysicalOperatorChain as operatorChainId.chain to disk.
*/
public void saveAvroPhysicalOperatorChain(final Tuple<String, AvroPhysicalOperatorChain> tuple) {
try {
final AvroPhysicalOperatorChain avroPhysicalOperatorChain = tuple.getValue();
// Create file with the name of the PhysicalOperatorChain Id.
final File avroPhysicalOperatorChainFile = getAvroPhysicalOperatorChainFile(tuple.getKey());
final DataFileWriter<AvroPhysicalOperatorChain> dataFileWriter = new DataFileWriter<>(operatorChainDatumWriter);
dataFileWriter.create(avroPhysicalOperatorChain.getSchema(), avroPhysicalOperatorChainFile);
dataFileWriter.append(avroPhysicalOperatorChain);
dataFileWriter.close();
} catch (IOException e) {
throw new RuntimeException("Writing AvroPhysicalOperatorChain has failed.");
}
}
use of org.apache.avro.file.DataFileWriter in project mist by snuspl.
the class AvroExecutionVertexStore method saveAvroPhysicalSourceOutgoingEdgesInfo.
/**
* Saves the AvroPhysicalSourceOutgoingEdgesInfo of the source to be reactivated.
*/
public void saveAvroPhysicalSourceOutgoingEdgesInfo(final Tuple<String, AvroPhysicalSourceOutgoingEdgesInfo> tuple) {
try {
final AvroPhysicalSourceOutgoingEdgesInfo avroPhysicalSourceOutgoingEdgesInfo = tuple.getValue();
// Create file with the name of the PhysicalOperatorChain Id.
final File avroPhysicalSourceOutgoingEdgesInfoFile = getAvroPhysicalSourceOutgoingEdgesInfoFile(tuple.getKey());
final DataFileWriter<AvroPhysicalSourceOutgoingEdgesInfo> dataFileWriter = new DataFileWriter<>(sourceDatumWriter);
dataFileWriter.create(avroPhysicalSourceOutgoingEdgesInfo.getSchema(), avroPhysicalSourceOutgoingEdgesInfoFile);
dataFileWriter.append(avroPhysicalSourceOutgoingEdgesInfo);
dataFileWriter.close();
} catch (IOException e) {
throw new RuntimeException("Writing AvroPhysicalSourceOutgoingEdgesInfo has failed.");
}
}
Aggregations