use of org.apache.avro.file.DataFileWriter in project flink by apache.
the class AvroRecordInputFormatTest method writeTestFile.
public static void writeTestFile(File testFile) throws IOException {
ArrayList<CharSequence> stringArray = new ArrayList<CharSequence>();
stringArray.add(TEST_ARRAY_STRING_1);
stringArray.add(TEST_ARRAY_STRING_2);
ArrayList<Boolean> booleanArray = new ArrayList<Boolean>();
booleanArray.add(TEST_ARRAY_BOOLEAN_1);
booleanArray.add(TEST_ARRAY_BOOLEAN_2);
HashMap<CharSequence, Long> longMap = new HashMap<CharSequence, Long>();
longMap.put(TEST_MAP_KEY1, TEST_MAP_VALUE1);
longMap.put(TEST_MAP_KEY2, TEST_MAP_VALUE2);
Address addr = new Address();
addr.setNum(TEST_NUM);
addr.setStreet(TEST_STREET);
addr.setCity(TEST_CITY);
addr.setState(TEST_STATE);
addr.setZip(TEST_ZIP);
User user1 = new User();
user1.setName(TEST_NAME);
user1.setFavoriteNumber(256);
user1.setTypeDoubleTest(123.45d);
user1.setTypeBoolTest(true);
user1.setTypeArrayString(stringArray);
user1.setTypeArrayBoolean(booleanArray);
user1.setTypeEnum(TEST_ENUM_COLOR);
user1.setTypeMap(longMap);
user1.setTypeNested(addr);
// Construct via builder
User user2 = User.newBuilder().setName("Charlie").setFavoriteColor("blue").setFavoriteNumber(null).setTypeBoolTest(false).setTypeDoubleTest(1.337d).setTypeNullTest(null).setTypeLongTest(1337L).setTypeArrayString(new ArrayList<CharSequence>()).setTypeArrayBoolean(new ArrayList<Boolean>()).setTypeNullableArray(null).setTypeEnum(Colors.RED).setTypeMap(new HashMap<CharSequence, Long>()).setTypeFixed(null).setTypeUnion(null).setTypeNested(Address.newBuilder().setNum(TEST_NUM).setStreet(TEST_STREET).setCity(TEST_CITY).setState(TEST_STATE).setZip(TEST_ZIP).build()).build();
DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User.class);
DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(userDatumWriter);
dataFileWriter.create(user1.getSchema(), testFile);
dataFileWriter.append(user1);
dataFileWriter.append(user2);
dataFileWriter.close();
}
use of org.apache.avro.file.DataFileWriter in project flink by apache.
the class AvroSplittableInputFormatTest method createFiles.
@Before
public void createFiles() throws IOException {
testFile = File.createTempFile("AvroSplittableInputFormatTest", null);
ArrayList<CharSequence> stringArray = new ArrayList<CharSequence>();
stringArray.add(TEST_ARRAY_STRING_1);
stringArray.add(TEST_ARRAY_STRING_2);
ArrayList<Boolean> booleanArray = new ArrayList<Boolean>();
booleanArray.add(TEST_ARRAY_BOOLEAN_1);
booleanArray.add(TEST_ARRAY_BOOLEAN_2);
HashMap<CharSequence, Long> longMap = new HashMap<CharSequence, Long>();
longMap.put(TEST_MAP_KEY1, TEST_MAP_VALUE1);
longMap.put(TEST_MAP_KEY2, TEST_MAP_VALUE2);
Address addr = new Address();
addr.setNum(new Integer(TEST_NUM));
addr.setStreet(TEST_STREET);
addr.setCity(TEST_CITY);
addr.setState(TEST_STATE);
addr.setZip(TEST_ZIP);
User user1 = new User();
user1.setName(TEST_NAME);
user1.setFavoriteNumber(256);
user1.setTypeDoubleTest(123.45d);
user1.setTypeBoolTest(true);
user1.setTypeArrayString(stringArray);
user1.setTypeArrayBoolean(booleanArray);
user1.setTypeEnum(TEST_ENUM_COLOR);
user1.setTypeMap(longMap);
user1.setTypeNested(addr);
// Construct via builder
User user2 = User.newBuilder().setName(TEST_NAME).setFavoriteColor("blue").setFavoriteNumber(null).setTypeBoolTest(false).setTypeDoubleTest(1.337d).setTypeNullTest(null).setTypeLongTest(1337L).setTypeArrayString(new ArrayList<CharSequence>()).setTypeArrayBoolean(new ArrayList<Boolean>()).setTypeNullableArray(null).setTypeEnum(Colors.RED).setTypeMap(new HashMap<CharSequence, Long>()).setTypeFixed(new Fixed16()).setTypeUnion(123L).setTypeNested(Address.newBuilder().setNum(TEST_NUM).setStreet(TEST_STREET).setCity(TEST_CITY).setState(TEST_STATE).setZip(TEST_ZIP).build()).build();
DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User.class);
DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(userDatumWriter);
dataFileWriter.create(user1.getSchema(), testFile);
dataFileWriter.append(user1);
dataFileWriter.append(user2);
Random rnd = new Random(1337);
for (int i = 0; i < NUM_RECORDS - 2; i++) {
User user = new User();
user.setName(TEST_NAME + rnd.nextInt());
user.setFavoriteNumber(rnd.nextInt());
user.setTypeDoubleTest(rnd.nextDouble());
user.setTypeBoolTest(true);
user.setTypeArrayString(stringArray);
user.setTypeArrayBoolean(booleanArray);
user.setTypeEnum(TEST_ENUM_COLOR);
user.setTypeMap(longMap);
Address address = new Address();
address.setNum(new Integer(TEST_NUM));
address.setStreet(TEST_STREET);
address.setCity(TEST_CITY);
address.setState(TEST_STATE);
address.setZip(TEST_ZIP);
user.setTypeNested(address);
dataFileWriter.append(user);
}
dataFileWriter.close();
}
use of org.apache.avro.file.DataFileWriter in project Plume by tdunning.
the class WriteAvroFile method writeSomething.
@Test
public void writeSomething() throws IOException {
Schema s = Schema.create(Schema.Type.DOUBLE);
DataFileWriter<Double> x = new DataFileWriter<Double>(new SpecificDatumWriter<Double>(s));
File f = new File("x");
DataFileWriter<Double> z = x.create(s, f);
f.deleteOnExit();
for (int i = 0; i < 10; i++) {
z.append(3.0 * i);
}
z.close();
DataFileReader<Double> in = new DataFileReader<Double>(new File("x"), new SpecificDatumReader<Double>(s));
int k = 0;
while (in.hasNext()) {
assertEquals(3.0 * k++, in.next(), 0);
}
in.close();
final DataFileStream<Double> data = new DataFileStream<Double>(new BufferedInputStream(new FileInputStream("x")), new SpecificDatumReader<Double>(s));
k = 0;
while (data.hasNext()) {
assertEquals(3.0 * k++, data.next(), 0);
}
data.close();
}
use of org.apache.avro.file.DataFileWriter in project Plume by tdunning.
the class LocalPlume method writeAvroFile.
@Override
public <T> void writeAvroFile(String name, PCollection<T> data, PType<T> type) throws IOException {
Schema schema = AvroTypes.getSchema(type);
DataFileWriter<T> factory = new DataFileWriter<T>(new SpecificDatumWriter<T>(schema));
DataFileWriter<T> out = factory.create(schema, new File(name));
for (T t : data) {
out.append(t);
}
out.close();
}
use of org.apache.avro.file.DataFileWriter in project beam by apache.
the class AvroSourceTest method generateTestFile.
/**
* Generates an input Avro file containing the given records in the temporary directory and
* returns the full path of the file.
*/
private <T> String generateTestFile(String filename, List<T> elems, SyncBehavior syncBehavior, int syncInterval, AvroCoder<T> coder, String codec) throws IOException {
Random random = new Random(0);
File tmpFile = tmpFolder.newFile(filename);
String path = tmpFile.toString();
FileOutputStream os = new FileOutputStream(tmpFile);
DatumWriter<T> datumWriter = coder.getType().equals(GenericRecord.class) ? new GenericDatumWriter<T>(coder.getSchema()) : new ReflectDatumWriter<T>(coder.getSchema());
try (DataFileWriter<T> writer = new DataFileWriter<>(datumWriter)) {
writer.setCodec(CodecFactory.fromString(codec));
writer.create(coder.getSchema(), os);
int recordIndex = 0;
int syncIndex = syncBehavior == SyncBehavior.SYNC_RANDOM ? random.nextInt(syncInterval) : 0;
for (T elem : elems) {
writer.append(elem);
recordIndex++;
switch(syncBehavior) {
case SYNC_REGULAR:
if (recordIndex == syncInterval) {
recordIndex = 0;
writer.sync();
}
break;
case SYNC_RANDOM:
if (recordIndex == syncIndex) {
recordIndex = 0;
writer.sync();
syncIndex = random.nextInt(syncInterval);
}
break;
case SYNC_DEFAULT:
default:
}
}
}
return path;
}
Aggregations