Search in sources :

Example 11 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project flink by apache.

the class AvroRecordInputFormatTest method writeTestFile.

public static void writeTestFile(File testFile) throws IOException {
    ArrayList<CharSequence> stringArray = new ArrayList<CharSequence>();
    stringArray.add(TEST_ARRAY_STRING_1);
    stringArray.add(TEST_ARRAY_STRING_2);
    ArrayList<Boolean> booleanArray = new ArrayList<Boolean>();
    booleanArray.add(TEST_ARRAY_BOOLEAN_1);
    booleanArray.add(TEST_ARRAY_BOOLEAN_2);
    HashMap<CharSequence, Long> longMap = new HashMap<CharSequence, Long>();
    longMap.put(TEST_MAP_KEY1, TEST_MAP_VALUE1);
    longMap.put(TEST_MAP_KEY2, TEST_MAP_VALUE2);
    Address addr = new Address();
    addr.setNum(TEST_NUM);
    addr.setStreet(TEST_STREET);
    addr.setCity(TEST_CITY);
    addr.setState(TEST_STATE);
    addr.setZip(TEST_ZIP);
    User user1 = new User();
    user1.setName(TEST_NAME);
    user1.setFavoriteNumber(256);
    user1.setTypeDoubleTest(123.45d);
    user1.setTypeBoolTest(true);
    user1.setTypeArrayString(stringArray);
    user1.setTypeArrayBoolean(booleanArray);
    user1.setTypeEnum(TEST_ENUM_COLOR);
    user1.setTypeMap(longMap);
    user1.setTypeNested(addr);
    // Construct via builder
    User user2 = User.newBuilder().setName("Charlie").setFavoriteColor("blue").setFavoriteNumber(null).setTypeBoolTest(false).setTypeDoubleTest(1.337d).setTypeNullTest(null).setTypeLongTest(1337L).setTypeArrayString(new ArrayList<CharSequence>()).setTypeArrayBoolean(new ArrayList<Boolean>()).setTypeNullableArray(null).setTypeEnum(Colors.RED).setTypeMap(new HashMap<CharSequence, Long>()).setTypeFixed(null).setTypeUnion(null).setTypeNested(Address.newBuilder().setNum(TEST_NUM).setStreet(TEST_STREET).setCity(TEST_CITY).setState(TEST_STATE).setZip(TEST_ZIP).build()).build();
    DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User.class);
    DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(userDatumWriter);
    dataFileWriter.create(user1.getSchema(), testFile);
    dataFileWriter.append(user1);
    dataFileWriter.append(user2);
    dataFileWriter.close();
}
Also used : User(org.apache.flink.api.io.avro.generated.User) Address(org.apache.flink.api.io.avro.generated.Address) DataFileWriter(org.apache.avro.file.DataFileWriter) SpecificDatumWriter(org.apache.avro.specific.SpecificDatumWriter)

Example 12 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project flink by apache.

the class AvroSplittableInputFormatTest method createFiles.

@Before
public void createFiles() throws IOException {
    testFile = File.createTempFile("AvroSplittableInputFormatTest", null);
    ArrayList<CharSequence> stringArray = new ArrayList<CharSequence>();
    stringArray.add(TEST_ARRAY_STRING_1);
    stringArray.add(TEST_ARRAY_STRING_2);
    ArrayList<Boolean> booleanArray = new ArrayList<Boolean>();
    booleanArray.add(TEST_ARRAY_BOOLEAN_1);
    booleanArray.add(TEST_ARRAY_BOOLEAN_2);
    HashMap<CharSequence, Long> longMap = new HashMap<CharSequence, Long>();
    longMap.put(TEST_MAP_KEY1, TEST_MAP_VALUE1);
    longMap.put(TEST_MAP_KEY2, TEST_MAP_VALUE2);
    Address addr = new Address();
    addr.setNum(new Integer(TEST_NUM));
    addr.setStreet(TEST_STREET);
    addr.setCity(TEST_CITY);
    addr.setState(TEST_STATE);
    addr.setZip(TEST_ZIP);
    User user1 = new User();
    user1.setName(TEST_NAME);
    user1.setFavoriteNumber(256);
    user1.setTypeDoubleTest(123.45d);
    user1.setTypeBoolTest(true);
    user1.setTypeArrayString(stringArray);
    user1.setTypeArrayBoolean(booleanArray);
    user1.setTypeEnum(TEST_ENUM_COLOR);
    user1.setTypeMap(longMap);
    user1.setTypeNested(addr);
    // Construct via builder
    User user2 = User.newBuilder().setName(TEST_NAME).setFavoriteColor("blue").setFavoriteNumber(null).setTypeBoolTest(false).setTypeDoubleTest(1.337d).setTypeNullTest(null).setTypeLongTest(1337L).setTypeArrayString(new ArrayList<CharSequence>()).setTypeArrayBoolean(new ArrayList<Boolean>()).setTypeNullableArray(null).setTypeEnum(Colors.RED).setTypeMap(new HashMap<CharSequence, Long>()).setTypeFixed(new Fixed16()).setTypeUnion(123L).setTypeNested(Address.newBuilder().setNum(TEST_NUM).setStreet(TEST_STREET).setCity(TEST_CITY).setState(TEST_STATE).setZip(TEST_ZIP).build()).build();
    DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User.class);
    DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(userDatumWriter);
    dataFileWriter.create(user1.getSchema(), testFile);
    dataFileWriter.append(user1);
    dataFileWriter.append(user2);
    Random rnd = new Random(1337);
    for (int i = 0; i < NUM_RECORDS - 2; i++) {
        User user = new User();
        user.setName(TEST_NAME + rnd.nextInt());
        user.setFavoriteNumber(rnd.nextInt());
        user.setTypeDoubleTest(rnd.nextDouble());
        user.setTypeBoolTest(true);
        user.setTypeArrayString(stringArray);
        user.setTypeArrayBoolean(booleanArray);
        user.setTypeEnum(TEST_ENUM_COLOR);
        user.setTypeMap(longMap);
        Address address = new Address();
        address.setNum(new Integer(TEST_NUM));
        address.setStreet(TEST_STREET);
        address.setCity(TEST_CITY);
        address.setState(TEST_STATE);
        address.setZip(TEST_ZIP);
        user.setTypeNested(address);
        dataFileWriter.append(user);
    }
    dataFileWriter.close();
}
Also used : User(org.apache.flink.api.io.avro.generated.User) Address(org.apache.flink.api.io.avro.generated.Address) HashMap(java.util.HashMap) DataFileWriter(org.apache.avro.file.DataFileWriter) ArrayList(java.util.ArrayList) SpecificDatumWriter(org.apache.avro.specific.SpecificDatumWriter) Fixed16(org.apache.flink.api.io.avro.generated.Fixed16) Random(java.util.Random) Before(org.junit.Before)

Example 13 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project Plume by tdunning.

the class WriteAvroFile method writeSomething.

@Test
public void writeSomething() throws IOException {
    Schema s = Schema.create(Schema.Type.DOUBLE);
    DataFileWriter<Double> x = new DataFileWriter<Double>(new SpecificDatumWriter<Double>(s));
    File f = new File("x");
    DataFileWriter<Double> z = x.create(s, f);
    f.deleteOnExit();
    for (int i = 0; i < 10; i++) {
        z.append(3.0 * i);
    }
    z.close();
    DataFileReader<Double> in = new DataFileReader<Double>(new File("x"), new SpecificDatumReader<Double>(s));
    int k = 0;
    while (in.hasNext()) {
        assertEquals(3.0 * k++, in.next(), 0);
    }
    in.close();
    final DataFileStream<Double> data = new DataFileStream<Double>(new BufferedInputStream(new FileInputStream("x")), new SpecificDatumReader<Double>(s));
    k = 0;
    while (data.hasNext()) {
        assertEquals(3.0 * k++, data.next(), 0);
    }
    data.close();
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) DataFileStream(org.apache.avro.file.DataFileStream) FileInputStream(java.io.FileInputStream) DataFileReader(org.apache.avro.file.DataFileReader) BufferedInputStream(java.io.BufferedInputStream) File(java.io.File) Test(org.junit.Test)

Example 14 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project Plume by tdunning.

the class LocalPlume method writeAvroFile.

@Override
public <T> void writeAvroFile(String name, PCollection<T> data, PType<T> type) throws IOException {
    Schema schema = AvroTypes.getSchema(type);
    DataFileWriter<T> factory = new DataFileWriter<T>(new SpecificDatumWriter<T>(schema));
    DataFileWriter<T> out = factory.create(schema, new File(name));
    for (T t : data) {
        out.append(t);
    }
    out.close();
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) File(java.io.File)

Example 15 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project beam by apache.

the class AvroSourceTest method generateTestFile.

/**
   * Generates an input Avro file containing the given records in the temporary directory and
   * returns the full path of the file.
   */
private <T> String generateTestFile(String filename, List<T> elems, SyncBehavior syncBehavior, int syncInterval, AvroCoder<T> coder, String codec) throws IOException {
    Random random = new Random(0);
    File tmpFile = tmpFolder.newFile(filename);
    String path = tmpFile.toString();
    FileOutputStream os = new FileOutputStream(tmpFile);
    DatumWriter<T> datumWriter = coder.getType().equals(GenericRecord.class) ? new GenericDatumWriter<T>(coder.getSchema()) : new ReflectDatumWriter<T>(coder.getSchema());
    try (DataFileWriter<T> writer = new DataFileWriter<>(datumWriter)) {
        writer.setCodec(CodecFactory.fromString(codec));
        writer.create(coder.getSchema(), os);
        int recordIndex = 0;
        int syncIndex = syncBehavior == SyncBehavior.SYNC_RANDOM ? random.nextInt(syncInterval) : 0;
        for (T elem : elems) {
            writer.append(elem);
            recordIndex++;
            switch(syncBehavior) {
                case SYNC_REGULAR:
                    if (recordIndex == syncInterval) {
                        recordIndex = 0;
                        writer.sync();
                    }
                    break;
                case SYNC_RANDOM:
                    if (recordIndex == syncIndex) {
                        recordIndex = 0;
                        writer.sync();
                        syncIndex = random.nextInt(syncInterval);
                    }
                    break;
                case SYNC_DEFAULT:
                default:
            }
        }
    }
    return path;
}
Also used : Random(java.util.Random) FileOutputStream(java.io.FileOutputStream) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Aggregations

DataFileWriter (org.apache.avro.file.DataFileWriter)34 Schema (org.apache.avro.Schema)21 GenericRecord (org.apache.avro.generic.GenericRecord)21 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)17 File (java.io.File)14 FileOutputStream (java.io.FileOutputStream)7 SpecificDatumWriter (org.apache.avro.specific.SpecificDatumWriter)7 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 IOException (java.io.IOException)4 GenericData (org.apache.avro.generic.GenericData)4 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 DataFileStream (org.apache.avro.file.DataFileStream)3 Person (org.apache.crunch.test.Person)3 Test (org.junit.Test)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 Random (java.util.Random)2 ThreadLocalRandom (java.util.concurrent.ThreadLocalRandom)2 CodecFactory (org.apache.avro.file.CodecFactory)2