Search in sources :

Example 96 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project drill by apache.

the class AvroDataGenerator method generateMultiDimensionalArray.

public String generateMultiDimensionalArray(int numRecords, int arraySize) throws Exception {
    File file = File.createTempFile("avro-multi-dimensional-array-test", ".avro", dirTestWatcher.getRootDir());
    String colTwoDimsName = "col_array_two_dims";
    Schema schema = SchemaBuilder.record("rec").fields().name(colTwoDimsName).type().array().items().array().items().stringType().noDefault().endRecord();
    try (DataFileWriter<GenericRecord> writer = new DataFileWriter<>(new GenericDatumWriter<>(schema))) {
        writer.create(schema, file);
        for (int i = 0; i < numRecords; i++) {
            GenericRecord record = new GenericData.Record(schema);
            Schema twoDimsSchema = schema.getField(colTwoDimsName).schema();
            GenericArray<GenericArray<String>> arrayTwoDims = new GenericData.Array<>(numRecords, twoDimsSchema);
            for (int a = 0; a < arraySize; a++) {
                GenericArray<String> nestedArray = new GenericData.Array<>(2, twoDimsSchema.getElementType());
                nestedArray.add(String.format("val_%s_%s_0", i, a));
                nestedArray.add(String.format("val_%s_%s_1", i, a));
                arrayTwoDims.add(nestedArray);
            }
            record.put(colTwoDimsName, arrayTwoDims);
            writer.append(record);
        }
    }
    return file.getName();
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericArray(org.apache.avro.generic.GenericArray) GenericArray(org.apache.avro.generic.GenericArray) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 97 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project drill by apache.

the class AvroDataGenerator method generateDuration.

public String generateDuration(int numRows) throws Exception {
    File file = File.createTempFile("avro-duration-test", ".avro", dirTestWatcher.getRootDir());
    Schema durationSchema = new LogicalType("duration").addToSchema(SchemaBuilder.builder().fixed("duration_fixed").size(12));
    Schema schema = SchemaBuilder.record("record").fields().name("col_duration").type(durationSchema).noDefault().endRecord();
    try (DataFileWriter<GenericRecord> writer = new DataFileWriter<>(new GenericDatumWriter<>(schema))) {
        writer.create(schema, file);
        for (int i = 0; i < numRows; i++) {
            GenericRecord record = new GenericData.Record(schema);
            ByteBuffer bb = ByteBuffer.allocate(12);
            bb.order(ByteOrder.LITTLE_ENDIAN);
            // month
            bb.putInt(10 + i);
            // days
            bb.putInt(100 + i);
            // milliseconds
            bb.putInt(1000 + i);
            GenericData.Fixed fixed = new GenericData.Fixed(durationSchema, bb.array());
            record.put("col_duration", fixed);
            writer.append(record);
        }
    }
    return file.getName();
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) LogicalType(org.apache.avro.LogicalType) ByteBuffer(java.nio.ByteBuffer) GenericData(org.apache.avro.generic.GenericData) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 98 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project drill by apache.

the class AvroDataGenerator method generateDecimalData.

public String generateDecimalData(int numRecords) throws Exception {
    File file = File.createTempFile("avro-decimal-test", ".avro", dirTestWatcher.getRootDir());
    Schema decBytes = LogicalTypes.decimal(10, 2).addToSchema(SchemaBuilder.builder().bytesType());
    Schema decFixed = LogicalTypes.decimal(5, 2).addToSchema(SchemaBuilder.builder().fixed("dec_fixed").size(5));
    Schema schema = SchemaBuilder.record("rec").fields().name("col_dec_pos_bytes").type(decBytes).noDefault().name("col_dec_neg_bytes").type(decBytes).noDefault().name("col_dec_pos_fixed").type(decFixed).noDefault().name("col_dec_neg_fixed").type(decFixed).noDefault().endRecord();
    try (DataFileWriter<GenericRecord> writer = new DataFileWriter<>(new GenericDatumWriter<>(schema))) {
        writer.create(schema, file);
        for (int i = 0; i < numRecords; i++) {
            GenericRecord record = new GenericData.Record(schema);
            ByteBuffer posBytes = ByteBuffer.wrap(BigInteger.valueOf(100 + i).toByteArray());
            record.put("col_dec_pos_bytes", posBytes);
            ByteBuffer negBytes = ByteBuffer.wrap(BigInteger.valueOf(-200 + i).toByteArray());
            record.put("col_dec_neg_bytes", negBytes);
            byte[] posFixedBytes = new byte[5];
            byte[] posValueBytes = BigInteger.valueOf(300 + i).toByteArray();
            int posDiff = posFixedBytes.length - posValueBytes.length;
            assert posDiff > -1;
            System.arraycopy(posValueBytes, 0, posFixedBytes, posDiff, posValueBytes.length);
            Arrays.fill(posFixedBytes, 0, posDiff, (byte) 0);
            GenericData.Fixed posFixed = new GenericData.Fixed(decFixed, posFixedBytes);
            record.put("col_dec_pos_fixed", posFixed);
            byte[] negFixedBytes = new byte[5];
            byte[] negValueBytes = BigInteger.valueOf(-400 + i).toByteArray();
            int negDiff = negFixedBytes.length - negValueBytes.length;
            assert negDiff > -1;
            System.arraycopy(negValueBytes, 0, negFixedBytes, negDiff, negValueBytes.length);
            Arrays.fill(negFixedBytes, 0, negDiff, (byte) -1);
            GenericData.Fixed negFixed = new GenericData.Fixed(decFixed, negFixedBytes);
            record.put("col_dec_neg_fixed", negFixed);
            writer.append(record);
        }
    }
    return file.getName();
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) ByteBuffer(java.nio.ByteBuffer) GenericData(org.apache.avro.generic.GenericData) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 99 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project incubator-gobblin by apache.

the class FsSpecProducer method writeAvroJobSpec.

private void writeAvroJobSpec(AvroJobSpec jobSpec) throws IOException {
    DatumWriter<AvroJobSpec> datumWriter = new SpecificDatumWriter<>(AvroJobSpec.SCHEMA$);
    DataFileWriter<AvroJobSpec> dataFileWriter = new DataFileWriter<>(datumWriter);
    Path jobSpecPath = new Path(this.specConsumerPath, annotateSpecFileName(jobSpec.getUri()));
    // Write the new JobSpec to a temporary path first.
    Path tmpDir = new Path(this.specConsumerPath, UUID.randomUUID().toString());
    if (!fs.exists(tmpDir)) {
        fs.mkdirs(tmpDir);
    }
    Path tmpJobSpecPath = new Path(tmpDir, jobSpec.getUri());
    OutputStream out = fs.create(tmpJobSpecPath);
    dataFileWriter.create(AvroJobSpec.SCHEMA$, out);
    dataFileWriter.append(jobSpec);
    dataFileWriter.close();
    // Rename the JobSpec from temporary to final location.
    HadoopUtils.renamePath(fs, tmpJobSpecPath, jobSpecPath, true);
    // Delete the temporary path once the jobspec has been moved to its final publish location.
    log.info("Deleting {}", tmpJobSpecPath.getParent().toString());
    fs.delete(tmpJobSpecPath.getParent(), true);
}
Also used : Path(org.apache.hadoop.fs.Path) DataFileWriter(org.apache.avro.file.DataFileWriter) OutputStream(java.io.OutputStream) AvroJobSpec(org.apache.gobblin.runtime.job_spec.AvroJobSpec) SpecificDatumWriter(org.apache.avro.specific.SpecificDatumWriter)

Example 100 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project carbondata by apache.

the class AvroCarbonWriter method write.

/**
 * Write single row data, input row is Avro Record
 */
@Override
public void write(Object object) throws IOException {
    try {
        GenericData.Record record = null;
        if (object instanceof GenericData.Record) {
            record = (GenericData.Record) object;
        } else if (object instanceof String) {
            String json = (String) object;
            InputStream input = null;
            DataFileWriter writer = null;
            ByteArrayOutputStream output = null;
            try {
                GenericDatumReader reader = new GenericDatumReader(this.avroSchema);
                input = new ByteArrayInputStream(json.getBytes(CarbonCommonConstants.DEFAULT_CHARSET));
                output = new ByteArrayOutputStream();
                DataInputStream din = new DataInputStream(input);
                writer = new DataFileWriter(new GenericDatumWriter());
                writer.create(this.avroSchema, output);
                JsonDecoder decoder = DecoderFactory.get().jsonDecoder(this.avroSchema, din);
                record = (GenericData.Record) reader.read(null, decoder);
            } finally {
                if (input != null) {
                    input.close();
                }
                if (writer != null) {
                    writer.close();
                }
            }
        } else {
            throw new UnsupportedOperationException("carbon not support " + object + ", only support GenericData.Record " + "and String for " + this.getClass().getName());
        }
        // convert Avro record to CSV String[]
        Object[] csvRecord = avroToCsv(record);
        writable.set(csvRecord);
        recordWriter.write(NullWritable.get(), writable);
    } catch (Exception e) {
        close();
        throw new IOException(e);
    }
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) DataInputStream(java.io.DataInputStream) InputStream(java.io.InputStream) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) DataFileWriter(org.apache.avro.file.DataFileWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) GenericData(org.apache.avro.generic.GenericData) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) CarbonDataLoadingException(org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException) JsonDecoder(org.apache.avro.io.JsonDecoder) ByteArrayInputStream(java.io.ByteArrayInputStream) StructObject(org.apache.carbondata.processing.loading.complexobjects.StructObject) ArrayObject(org.apache.carbondata.processing.loading.complexobjects.ArrayObject)

Aggregations

DataFileWriter (org.apache.avro.file.DataFileWriter)102 GenericRecord (org.apache.avro.generic.GenericRecord)58 Schema (org.apache.avro.Schema)50 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)47 File (java.io.File)38 ByteArrayOutputStream (java.io.ByteArrayOutputStream)22 IOException (java.io.IOException)22 GenericData (org.apache.avro.generic.GenericData)17 FileOutputStream (java.io.FileOutputStream)15 Test (org.junit.Test)14 HashMap (java.util.HashMap)11 InputStream (java.io.InputStream)10 SpecificDatumWriter (org.apache.avro.specific.SpecificDatumWriter)10 ArrayList (java.util.ArrayList)9 Path (org.apache.hadoop.fs.Path)9 ByteArrayInputStream (java.io.ByteArrayInputStream)8 OutputStream (java.io.OutputStream)8 ByteBuffer (java.nio.ByteBuffer)7 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)7 MockFlowFile (org.apache.nifi.util.MockFlowFile)7