Search in sources :

Example 56 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project crunch by cloudera.

the class AvroTypeSortTest method writeAvroFile.

private void writeAvroFile(List<Person> people, File avroFile) throws IOException {
    FileOutputStream outputStream = new FileOutputStream(avroFile);
    SpecificDatumWriter<Person> writer = new SpecificDatumWriter<Person>(Person.class);
    DataFileWriter<Person> dataFileWriter = new DataFileWriter<Person>(writer);
    dataFileWriter.create(Person.SCHEMA$, outputStream);
    for (Person person : people) {
        dataFileWriter.append(person);
    }
    dataFileWriter.close();
    outputStream.close();
}
Also used : FileOutputStream(java.io.FileOutputStream) DataFileWriter(org.apache.avro.file.DataFileWriter) Person(org.apache.crunch.test.Person) SpecificDatumWriter(org.apache.avro.specific.SpecificDatumWriter)

Example 57 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project crunch by cloudera.

the class AvroFileReaderFactoryTest method populateGenericFile.

private void populateGenericFile(List<GenericRecord> genericRecords, Schema outputSchema) throws IOException {
    FileOutputStream outputStream = new FileOutputStream(this.avroFile);
    GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(outputSchema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(genericDatumWriter);
    dataFileWriter.create(outputSchema, outputStream);
    for (GenericRecord record : genericRecords) {
        dataFileWriter.append(record);
    }
    dataFileWriter.close();
    outputStream.close();
}
Also used : FileOutputStream(java.io.FileOutputStream) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 58 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project h2o-3 by h2oai.

the class AvroFileGenerator method generateUnionTypes.

public static File generateUnionTypes(String filename, int nrows) throws IOException {
    File parentDir = Files.createTempDir();
    File f = new File(parentDir, filename);
    DatumWriter<GenericRecord> w = new GenericDatumWriter<GenericRecord>();
    DataFileWriter<GenericRecord> dw = new DataFileWriter<GenericRecord>(w);
    // Based on SchemaBuilder javadoc:
    // * The below two field declarations are equivalent:
    // * <pre>
    // *  .name("f").type().unionOf().nullType().and().longType().endUnion().nullDefault()
    // *  .name("f").type().optional().longType()
    // * </pre>
    Schema schema = SchemaBuilder.builder().record("test_union_types").fields().name("CUString").type().optional().stringType().name("CUBytes").type().optional().bytesType().name("CUInt").type().optional().intType().name("CULong").type().optional().longType().name("CUFloat").type().optional().floatType().name("CUDouble").type().optional().doubleType().name("CUBoolean").type().optional().booleanType().endRecord();
    try {
        dw.create(schema, f);
        for (int i = 0; i < nrows; i++) {
            GenericRecord gr = new GenericData.Record(schema);
            gr.put("CUString", i == 0 ? null : String.valueOf(i));
            gr.put("CUBytes", i == 0 ? null : ByteBuffer.wrap(StringUtils.toBytes(i)));
            gr.put("CUInt", i == 0 ? null : i);
            gr.put("CULong", i == 0 ? null : Long.valueOf(i));
            gr.put("CUFloat", i == 0 ? null : Float.valueOf(i));
            gr.put("CUDouble", i == 0 ? null : Double.valueOf(i));
            gr.put("CUBoolean", i == 0 ? null : (i & 1) == 1);
            dw.append(gr);
        }
        return f;
    } finally {
        dw.close();
        ;
    }
}
Also used : DataFileWriter(org.apache.avro.file.DataFileWriter) Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 59 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project h2o-3 by h2oai.

the class AvroFileGenerator method generatePrimitiveTypes.

public static File generatePrimitiveTypes(String filename, int nrows) throws IOException {
    File parentDir = Files.createTempDir();
    File f = new File(parentDir, filename);
    // Write output records
    DatumWriter<GenericRecord> w = new GenericDatumWriter<GenericRecord>();
    DataFileWriter<GenericRecord> dw = new DataFileWriter<GenericRecord>(w);
    Schema schema = SchemaBuilder.builder().record("test_primitive_types").fields().name("CString").type("string").noDefault().name("CBytes").type("bytes").noDefault().name("CInt").type("int").noDefault().name("CLong").type("long").noDefault().name("CFloat").type("float").noDefault().name("CDouble").type("double").noDefault().name("CBoolean").type("boolean").noDefault().name("CNull").type("null").noDefault().endRecord();
    try {
        dw.create(schema, f);
        for (int i = 0; i < nrows; i++) {
            GenericRecord gr = new GenericData.Record(schema);
            gr.put("CString", String.valueOf(i));
            gr.put("CBytes", ByteBuffer.wrap(StringUtils.toBytes(i)));
            gr.put("CInt", i);
            gr.put("CLong", Long.valueOf(i));
            gr.put("CFloat", Float.valueOf(i));
            gr.put("CDouble", Double.valueOf(i));
            gr.put("CBoolean", (i & 1) == 1);
            gr.put("CNull", null);
            dw.append(gr);
        }
        return f;
    } finally {
        dw.close();
    }
}
Also used : DataFileWriter(org.apache.avro.file.DataFileWriter) Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 60 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project h2o-3 by h2oai.

the class AvroFileGenerator method generateEnumTypes.

public static File generateEnumTypes(String filename, int nrows, String[][] categories) throws IOException {
    assert categories.length == 2 : "Needs only 2 columns";
    File parentDir = Files.createTempDir();
    File f = new File(parentDir, filename);
    DatumWriter<GenericRecord> w = new GenericDatumWriter<GenericRecord>();
    DataFileWriter<GenericRecord> dw = new DataFileWriter<GenericRecord>(w);
    Schema enumSchema1 = SchemaBuilder.enumeration("CEnum1").symbols(categories[0]);
    Schema enumSchema2 = SchemaBuilder.enumeration("CEnum2").symbols(categories[1]);
    Schema schema = SchemaBuilder.builder().record("test_enum_types").fields().name("CEnum").type(enumSchema1).noDefault().name("CUEnum").type().optional().type(enumSchema2).endRecord();
    System.out.println(schema);
    int numOfCategories1 = categories[0].length;
    int numOfCategories2 = categories[1].length;
    try {
        dw.create(schema, f);
        for (int i = 0; i < nrows; i++) {
            GenericRecord gr = new GenericData.Record(schema);
            gr.put("CEnum", new GenericData.EnumSymbol(enumSchema1, categories[0][i % numOfCategories1]));
            gr.put("CUEnum", i % (numOfCategories2 + 1) == 0 ? null : new GenericData.EnumSymbol(enumSchema2, categories[1][i % numOfCategories2]));
            dw.append(gr);
        }
        return f;
    } finally {
        dw.close();
        ;
    }
}
Also used : DataFileWriter(org.apache.avro.file.DataFileWriter) Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) GenericData(org.apache.avro.generic.GenericData)

Aggregations

DataFileWriter (org.apache.avro.file.DataFileWriter)102 GenericRecord (org.apache.avro.generic.GenericRecord)58 Schema (org.apache.avro.Schema)50 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)47 File (java.io.File)38 ByteArrayOutputStream (java.io.ByteArrayOutputStream)22 IOException (java.io.IOException)22 GenericData (org.apache.avro.generic.GenericData)17 FileOutputStream (java.io.FileOutputStream)15 Test (org.junit.Test)14 HashMap (java.util.HashMap)11 InputStream (java.io.InputStream)10 SpecificDatumWriter (org.apache.avro.specific.SpecificDatumWriter)10 ArrayList (java.util.ArrayList)9 Path (org.apache.hadoop.fs.Path)9 ByteArrayInputStream (java.io.ByteArrayInputStream)8 OutputStream (java.io.OutputStream)8 ByteBuffer (java.nio.ByteBuffer)7 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)7 MockFlowFile (org.apache.nifi.util.MockFlowFile)7