Search in sources :

Example 91 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project flink by apache.

the class AvroBulkFormatTest method before.

@BeforeEach
public void before() throws IOException {
    tmpFile = Files.createTempFile("avro-bulk-format-test", ".avro").toFile();
    tmpFile.createNewFile();
    FileOutputStream out = new FileOutputStream(tmpFile);
    Schema schema = AvroSchemaConverter.convertToSchema(ROW_TYPE);
    RowDataToAvroConverters.RowDataToAvroConverter converter = RowDataToAvroConverters.createConverter(ROW_TYPE);
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
    dataFileWriter.create(schema, out);
    dataFileWriter.setSyncInterval(64);
    for (RowData rowData : TEST_DATA) {
        dataFileWriter.append((GenericRecord) converter.convert(schema, rowData));
    }
    dataFileWriter.close();
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) FileOutputStream(java.io.FileOutputStream) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord) BeforeEach(org.junit.jupiter.api.BeforeEach)

Example 92 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project flink by apache.

the class AvroRecordInputFormatTest method writeTestFile.

public static void writeTestFile(File testFile) throws IOException {
    ArrayList<CharSequence> stringArray = new ArrayList<>();
    stringArray.add(TEST_ARRAY_STRING_1);
    stringArray.add(TEST_ARRAY_STRING_2);
    ArrayList<Boolean> booleanArray = new ArrayList<>();
    booleanArray.add(TEST_ARRAY_BOOLEAN_1);
    booleanArray.add(TEST_ARRAY_BOOLEAN_2);
    HashMap<CharSequence, Long> longMap = new HashMap<>();
    longMap.put(TEST_MAP_KEY1, TEST_MAP_VALUE1);
    longMap.put(TEST_MAP_KEY2, TEST_MAP_VALUE2);
    Address addr = new Address();
    addr.setNum(TEST_NUM);
    addr.setStreet(TEST_STREET);
    addr.setCity(TEST_CITY);
    addr.setState(TEST_STATE);
    addr.setZip(TEST_ZIP);
    User user1 = new User();
    user1.setName(TEST_NAME);
    user1.setFavoriteNumber(256);
    user1.setTypeDoubleTest(123.45d);
    user1.setTypeBoolTest(true);
    user1.setTypeArrayString(stringArray);
    user1.setTypeArrayBoolean(booleanArray);
    user1.setTypeEnum(TEST_ENUM_COLOR);
    user1.setTypeMap(longMap);
    user1.setTypeNested(addr);
    user1.setTypeBytes(ByteBuffer.allocate(10));
    user1.setTypeDate(LocalDate.parse("2014-03-01"));
    user1.setTypeTimeMillis(LocalTime.parse("12:12:12"));
    user1.setTypeTimeMicros(LocalTime.ofSecondOfDay(0).plus(123456L, ChronoUnit.MICROS));
    user1.setTypeTimestampMillis(Instant.parse("2014-03-01T12:12:12.321Z"));
    user1.setTypeTimestampMicros(Instant.ofEpochSecond(0).plus(123456L, ChronoUnit.MICROS));
    // 20.00
    user1.setTypeDecimalBytes(ByteBuffer.wrap(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray()));
    // 20.00
    user1.setTypeDecimalFixed(new Fixed2(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray()));
    // Construct via builder
    User user2 = User.newBuilder().setName("Charlie").setFavoriteColor("blue").setFavoriteNumber(null).setTypeBoolTest(false).setTypeDoubleTest(1.337d).setTypeNullTest(null).setTypeLongTest(1337L).setTypeArrayString(new ArrayList<>()).setTypeArrayBoolean(new ArrayList<>()).setTypeNullableArray(null).setTypeEnum(Colors.RED).setTypeMap(new HashMap<>()).setTypeFixed(null).setTypeUnion(null).setTypeNested(Address.newBuilder().setNum(TEST_NUM).setStreet(TEST_STREET).setCity(TEST_CITY).setState(TEST_STATE).setZip(TEST_ZIP).build()).setTypeBytes(ByteBuffer.allocate(10)).setTypeDate(LocalDate.parse("2014-03-01")).setTypeTimeMillis(LocalTime.parse("12:12:12")).setTypeTimeMicros(LocalTime.ofSecondOfDay(0).plus(123456L, ChronoUnit.MICROS)).setTypeTimestampMillis(Instant.parse("2014-03-01T12:12:12.321Z")).setTypeTimestampMicros(Instant.ofEpochSecond(0).plus(123456L, ChronoUnit.MICROS)).setTypeDecimalBytes(ByteBuffer.wrap(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray())).setTypeDecimalFixed(new Fixed2(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray())).build();
    DatumWriter<User> userDatumWriter = new SpecificDatumWriter<>(User.class);
    DataFileWriter<User> dataFileWriter = new DataFileWriter<>(userDatumWriter);
    dataFileWriter.create(user1.getSchema(), testFile);
    dataFileWriter.append(user1);
    dataFileWriter.append(user2);
    dataFileWriter.close();
}
Also used : User(org.apache.flink.formats.avro.generated.User) Address(org.apache.flink.formats.avro.generated.Address) HashMap(java.util.HashMap) DataFileWriter(org.apache.avro.file.DataFileWriter) ArrayList(java.util.ArrayList) Fixed2(org.apache.flink.formats.avro.generated.Fixed2) SpecificDatumWriter(org.apache.avro.specific.SpecificDatumWriter)

Example 93 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project flink by apache.

the class AvroSplittableInputFormatTest method createFiles.

@Before
public void createFiles() throws IOException {
    testFile = File.createTempFile("AvroSplittableInputFormatTest", null);
    ArrayList<CharSequence> stringArray = new ArrayList<>();
    stringArray.add(TEST_ARRAY_STRING_1);
    stringArray.add(TEST_ARRAY_STRING_2);
    ArrayList<Boolean> booleanArray = new ArrayList<>();
    booleanArray.add(TEST_ARRAY_BOOLEAN_1);
    booleanArray.add(TEST_ARRAY_BOOLEAN_2);
    HashMap<CharSequence, Long> longMap = new HashMap<>();
    longMap.put(TEST_MAP_KEY1, TEST_MAP_VALUE1);
    longMap.put(TEST_MAP_KEY2, TEST_MAP_VALUE2);
    Address addr = new Address();
    addr.setNum(TEST_NUM);
    addr.setStreet(TEST_STREET);
    addr.setCity(TEST_CITY);
    addr.setState(TEST_STATE);
    addr.setZip(TEST_ZIP);
    User user1 = new User();
    user1.setName(TEST_NAME);
    user1.setFavoriteNumber(256);
    user1.setTypeDoubleTest(123.45d);
    user1.setTypeBoolTest(true);
    user1.setTypeArrayString(stringArray);
    user1.setTypeArrayBoolean(booleanArray);
    user1.setTypeEnum(TEST_ENUM_COLOR);
    user1.setTypeMap(longMap);
    user1.setTypeNested(addr);
    user1.setTypeBytes(ByteBuffer.allocate(10));
    user1.setTypeDate(LocalDate.parse("2014-03-01"));
    user1.setTypeTimeMillis(LocalTime.parse("12:12:12"));
    user1.setTypeTimeMicros(LocalTime.ofSecondOfDay(0).plus(123456L, ChronoUnit.MICROS));
    user1.setTypeTimestampMillis(Instant.parse("2014-03-01T12:12:12.321Z"));
    user1.setTypeTimestampMicros(Instant.ofEpochSecond(0).plus(123456L, ChronoUnit.MICROS));
    // 20.00
    user1.setTypeDecimalBytes(ByteBuffer.wrap(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray()));
    // 20.00
    user1.setTypeDecimalFixed(new Fixed2(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray()));
    // Construct via builder
    User user2 = User.newBuilder().setName(TEST_NAME).setFavoriteColor("blue").setFavoriteNumber(null).setTypeBoolTest(false).setTypeDoubleTest(1.337d).setTypeNullTest(null).setTypeLongTest(1337L).setTypeArrayString(new ArrayList<>()).setTypeArrayBoolean(new ArrayList<>()).setTypeNullableArray(null).setTypeEnum(Colors.RED).setTypeMap(new HashMap<>()).setTypeFixed(new Fixed16()).setTypeUnion(123L).setTypeNested(Address.newBuilder().setNum(TEST_NUM).setStreet(TEST_STREET).setCity(TEST_CITY).setState(TEST_STATE).setZip(TEST_ZIP).build()).setTypeBytes(ByteBuffer.allocate(10)).setTypeDate(LocalDate.parse("2014-03-01")).setTypeTimeMillis(LocalTime.parse("12:12:12")).setTypeTimeMicros(LocalTime.ofSecondOfDay(0).plus(123456L, ChronoUnit.MICROS)).setTypeTimestampMillis(Instant.parse("2014-03-01T12:12:12.321Z")).setTypeTimestampMicros(Instant.ofEpochSecond(0).plus(123456L, ChronoUnit.MICROS)).setTypeDecimalBytes(ByteBuffer.wrap(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray())).setTypeDecimalFixed(new Fixed2(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray())).build();
    DatumWriter<User> userDatumWriter = new SpecificDatumWriter<>(User.class);
    DataFileWriter<User> dataFileWriter = new DataFileWriter<>(userDatumWriter);
    dataFileWriter.create(user1.getSchema(), testFile);
    dataFileWriter.append(user1);
    dataFileWriter.append(user2);
    Random rnd = new Random(1337);
    for (int i = 0; i < NUM_RECORDS - 2; i++) {
        User user = new User();
        user.setName(TEST_NAME + rnd.nextInt());
        user.setFavoriteNumber(rnd.nextInt());
        user.setTypeDoubleTest(rnd.nextDouble());
        user.setTypeBoolTest(true);
        user.setTypeArrayString(stringArray);
        user.setTypeArrayBoolean(booleanArray);
        user.setTypeEnum(TEST_ENUM_COLOR);
        user.setTypeMap(longMap);
        Address address = new Address();
        address.setNum(TEST_NUM);
        address.setStreet(TEST_STREET);
        address.setCity(TEST_CITY);
        address.setState(TEST_STATE);
        address.setZip(TEST_ZIP);
        user.setTypeNested(address);
        user.setTypeBytes(ByteBuffer.allocate(10));
        user.setTypeDate(LocalDate.parse("2014-03-01"));
        user.setTypeTimeMillis(LocalTime.parse("12:12:12"));
        user.setTypeTimeMicros(LocalTime.ofSecondOfDay(0).plus(123456L, ChronoUnit.MICROS));
        user.setTypeTimestampMillis(Instant.parse("2014-03-01T12:12:12.321Z"));
        user.setTypeTimestampMicros(Instant.ofEpochSecond(0).plus(123456L, ChronoUnit.MICROS));
        // 20.00
        user.setTypeDecimalBytes(ByteBuffer.wrap(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray()));
        // 20.00
        user.setTypeDecimalFixed(new Fixed2(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray()));
        dataFileWriter.append(user);
    }
    dataFileWriter.close();
}
Also used : User(org.apache.flink.formats.avro.generated.User) Address(org.apache.flink.formats.avro.generated.Address) HashMap(java.util.HashMap) DataFileWriter(org.apache.avro.file.DataFileWriter) ArrayList(java.util.ArrayList) Fixed2(org.apache.flink.formats.avro.generated.Fixed2) SpecificDatumWriter(org.apache.avro.specific.SpecificDatumWriter) Fixed16(org.apache.flink.formats.avro.generated.Fixed16) Random(java.util.Random) Before(org.junit.Before)

Example 94 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project druid by druid-io.

the class AvroHadoopInputRowParserTest method createAvroFile.

public static File createAvroFile(GenericRecord datum) throws IOException {
    final File tmpDir = FileUtils.createTempDir();
    File someAvroDatumFile = new File(tmpDir, "someAvroDatum.avro");
    try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(new SpecificDatumWriter<>())) {
        dataFileWriter.create(SomeAvroDatum.getClassSchema(), someAvroDatumFile);
        dataFileWriter.append(datum);
    }
    return someAvroDatumFile;
}
Also used : DataFileWriter(org.apache.avro.file.DataFileWriter) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 95 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project drill by apache.

the class AvroDataGenerator method generateDateTimeData.

public String generateDateTimeData(LocalDateTime dateTime) throws Exception {
    File file = File.createTempFile("avro-date-time-test", ".avro", dirTestWatcher.getRootDir());
    Schema timestampMillis = LogicalTypes.timestampMillis().addToSchema(SchemaBuilder.builder().longType());
    Schema timestampMicros = LogicalTypes.timestampMicros().addToSchema(SchemaBuilder.builder().longType());
    Schema date = LogicalTypes.date().addToSchema(SchemaBuilder.builder().intType());
    Schema timeMillis = LogicalTypes.timeMillis().addToSchema(SchemaBuilder.builder().intType());
    Schema timeMicros = LogicalTypes.timeMicros().addToSchema(SchemaBuilder.builder().longType());
    Schema schema = SchemaBuilder.record("rec").fields().name("col_timestamp_millis").type(timestampMillis).noDefault().name("col_timestamp_micros").type(timestampMicros).noDefault().name("col_date").type(date).noDefault().name("col_time_millis").type(timeMillis).noDefault().name("col_time_micros").type(timeMicros).noDefault().endRecord();
    try (DataFileWriter<GenericRecord> writer = new DataFileWriter<>(new GenericDatumWriter<>(schema))) {
        writer.create(schema, file);
        GenericRecord record = new GenericData.Record(schema);
        long timestampMillisValue = dateTime.toInstant(ZoneOffset.UTC).toEpochMilli();
        record.put("col_timestamp_millis", timestampMillisValue);
        record.put("col_timestamp_micros", timestampMillisValue * 1000);
        record.put("col_date", dateTime.toLocalDate().toEpochDay());
        long startOfDayMillis = dateTime.toLocalDate().atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli();
        long timeMillisValue = timestampMillisValue - startOfDayMillis;
        record.put("col_time_millis", timeMillisValue);
        record.put("col_time_micros", timeMillisValue * 1000);
        writer.append(record);
    }
    return file.getName();
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Aggregations

DataFileWriter (org.apache.avro.file.DataFileWriter)102 GenericRecord (org.apache.avro.generic.GenericRecord)58 Schema (org.apache.avro.Schema)50 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)47 File (java.io.File)38 ByteArrayOutputStream (java.io.ByteArrayOutputStream)22 IOException (java.io.IOException)22 GenericData (org.apache.avro.generic.GenericData)17 FileOutputStream (java.io.FileOutputStream)15 Test (org.junit.Test)14 HashMap (java.util.HashMap)11 InputStream (java.io.InputStream)10 SpecificDatumWriter (org.apache.avro.specific.SpecificDatumWriter)10 ArrayList (java.util.ArrayList)9 Path (org.apache.hadoop.fs.Path)9 ByteArrayInputStream (java.io.ByteArrayInputStream)8 OutputStream (java.io.OutputStream)8 ByteBuffer (java.nio.ByteBuffer)7 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)7 MockFlowFile (org.apache.nifi.util.MockFlowFile)7