Search in sources :

Example 51 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project parquet-mr by apache.

the class ToAvroCommand method run.

@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
    Preconditions.checkArgument(targets != null && targets.size() == 1, "A data file is required.");
    String source = targets.get(0);
    CodecFactory codecFactory = Codecs.avroCodec(compressionCodecName);
    final Schema schema;
    if (avroSchemaFile != null) {
        schema = Schemas.fromAvsc(open(avroSchemaFile));
    } else {
        schema = getAvroSchema(source);
    }
    final Schema projection = filterSchema(schema, columns);
    Iterable<Record> reader = openDataFile(source, projection);
    boolean threw = true;
    long count = 0;
    DatumWriter<Record> datumWriter = new GenericDatumWriter<>(schema);
    try (DataFileWriter<Record> fileWriter = new DataFileWriter<>(datumWriter)) {
        fileWriter.setCodec(codecFactory);
        try (OutputStream os = overwrite ? create(outputPath) : createWithNoOverwrite(outputPath);
            DataFileWriter<Record> writer = fileWriter.create(projection, os)) {
            for (Record record : reader) {
                writer.append(record);
                count += 1;
            }
        }
        threw = false;
    } catch (RuntimeException e) {
        throw new RuntimeException("Failed on record " + count, e);
    } finally {
        if (reader instanceof Closeable) {
            Closeables.close((Closeable) reader, threw);
        }
    }
    return 0;
}
Also used : Schema(org.apache.avro.Schema) Expressions.filterSchema(org.apache.parquet.cli.util.Expressions.filterSchema) DataFileWriter(org.apache.avro.file.DataFileWriter) OutputStream(java.io.OutputStream) Closeable(java.io.Closeable) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) Record(org.apache.avro.generic.GenericData.Record) CodecFactory(org.apache.avro.file.CodecFactory)

Example 52 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project carbondata by apache.

the class TestUtil method jsonToAvro.

public static GenericData.Record jsonToAvro(String json, String avroSchema) throws IOException {
    InputStream input = null;
    DataFileWriter writer = null;
    ByteArrayOutputStream output = null;
    try {
        org.apache.avro.Schema schema = new org.apache.avro.Schema.Parser().parse(avroSchema);
        GenericDatumReader reader = new GenericDatumReader(schema);
        input = new ByteArrayInputStream(json.getBytes(CarbonCommonConstants.DEFAULT_CHARSET));
        output = new ByteArrayOutputStream();
        DataInputStream din = new DataInputStream(input);
        writer = new DataFileWriter(new GenericDatumWriter());
        writer.create(schema, output);
        JsonDecoder decoder = DecoderFactory.get().jsonDecoder(schema, din);
        return (GenericData.Record) reader.read(null, decoder);
    } finally {
        if (input != null) {
            input.close();
        }
        if (writer != null) {
            writer.close();
        }
    }
}
Also used : DataInputStream(java.io.DataInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) DataFileWriter(org.apache.avro.file.DataFileWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) DataInputStream(java.io.DataInputStream) JsonDecoder(org.apache.avro.io.JsonDecoder) ByteArrayInputStream(java.io.ByteArrayInputStream)

Example 53 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project ksql by confluentinc.

the class AvroSerializer method serialize.

/**
 * Serializes the record as an in-memory representation of a standard Avro file.
 *
 * <p>That is, the returned bytes include a standard Avro header that contains a magic byte, the
 * record's Avro schema (and so on), followed by the byte representation of the record.
 *
 * <p>Implementation detail:  This method uses Avro's {@code DataFileWriter}.
 *
 * @return Avro-encoded record (bytes) that includes the Avro schema
 */
public byte[] serialize(final GenericContainer record) throws IOException {
    if (record != null) {
        final DatumWriter<GenericContainer> datumWriter = new GenericDatumWriter<>(record.getSchema());
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        final DataFileWriter<GenericContainer> writer = new DataFileWriter<>(datumWriter);
        writer.create(record.getSchema(), out);
        writer.append(record);
        writer.close();
        out.close();
        return out.toByteArray();
    } else {
        return null;
    }
}
Also used : DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericContainer(org.apache.avro.generic.GenericContainer)

Example 54 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project mist by snuspl.

the class AvroExecutionVertexStore method saveAvroPhysicalOperatorChain.

/**
 * Saves the AvroPhysicalOperatorChain as operatorChainId.chain to disk.
 */
public void saveAvroPhysicalOperatorChain(final Tuple<String, AvroPhysicalOperatorChain> tuple) {
    try {
        final AvroPhysicalOperatorChain avroPhysicalOperatorChain = tuple.getValue();
        // Create file with the name of the PhysicalOperatorChain Id.
        final File avroPhysicalOperatorChainFile = getAvroPhysicalOperatorChainFile(tuple.getKey());
        final DataFileWriter<AvroPhysicalOperatorChain> dataFileWriter = new DataFileWriter<>(operatorChainDatumWriter);
        dataFileWriter.create(avroPhysicalOperatorChain.getSchema(), avroPhysicalOperatorChainFile);
        dataFileWriter.append(avroPhysicalOperatorChain);
        dataFileWriter.close();
    } catch (IOException e) {
        throw new RuntimeException("Writing AvroPhysicalOperatorChain has failed.");
    }
}
Also used : DataFileWriter(org.apache.avro.file.DataFileWriter) AvroPhysicalOperatorChain(edu.snu.mist.formats.avro.AvroPhysicalOperatorChain) IOException(java.io.IOException) File(java.io.File)

Example 55 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project mist by snuspl.

the class AvroExecutionVertexStore method saveAvroPhysicalSourceOutgoingEdgesInfo.

/**
 * Saves the AvroPhysicalSourceOutgoingEdgesInfo of the source to be reactivated.
 */
public void saveAvroPhysicalSourceOutgoingEdgesInfo(final Tuple<String, AvroPhysicalSourceOutgoingEdgesInfo> tuple) {
    try {
        final AvroPhysicalSourceOutgoingEdgesInfo avroPhysicalSourceOutgoingEdgesInfo = tuple.getValue();
        // Create file with the name of the PhysicalOperatorChain Id.
        final File avroPhysicalSourceOutgoingEdgesInfoFile = getAvroPhysicalSourceOutgoingEdgesInfoFile(tuple.getKey());
        final DataFileWriter<AvroPhysicalSourceOutgoingEdgesInfo> dataFileWriter = new DataFileWriter<>(sourceDatumWriter);
        dataFileWriter.create(avroPhysicalSourceOutgoingEdgesInfo.getSchema(), avroPhysicalSourceOutgoingEdgesInfoFile);
        dataFileWriter.append(avroPhysicalSourceOutgoingEdgesInfo);
        dataFileWriter.close();
    } catch (IOException e) {
        throw new RuntimeException("Writing AvroPhysicalSourceOutgoingEdgesInfo has failed.");
    }
}
Also used : DataFileWriter(org.apache.avro.file.DataFileWriter) IOException(java.io.IOException) File(java.io.File) AvroPhysicalSourceOutgoingEdgesInfo(edu.snu.mist.formats.avro.AvroPhysicalSourceOutgoingEdgesInfo)

Aggregations

DataFileWriter (org.apache.avro.file.DataFileWriter)102 GenericRecord (org.apache.avro.generic.GenericRecord)58 Schema (org.apache.avro.Schema)50 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)47 File (java.io.File)38 ByteArrayOutputStream (java.io.ByteArrayOutputStream)22 IOException (java.io.IOException)22 GenericData (org.apache.avro.generic.GenericData)17 FileOutputStream (java.io.FileOutputStream)15 Test (org.junit.Test)14 HashMap (java.util.HashMap)11 InputStream (java.io.InputStream)10 SpecificDatumWriter (org.apache.avro.specific.SpecificDatumWriter)10 ArrayList (java.util.ArrayList)9 Path (org.apache.hadoop.fs.Path)9 ByteArrayInputStream (java.io.ByteArrayInputStream)8 OutputStream (java.io.OutputStream)8 ByteBuffer (java.nio.ByteBuffer)7 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)7 MockFlowFile (org.apache.nifi.util.MockFlowFile)7