Search in sources :

Example 96 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project components by Talend.

the class RecordSetUtil method writeRandomAvroFile.

/**
 * Writes all records from the test set into a single Avro file on the file system.
 *
 * @param fs The filesystem.
 * @param path The path of the file on the filesystem.
 * @param td The test data to write.
 * @throws IOException If there was an exception writing to the filesystem.
 */
public static void writeRandomAvroFile(FileSystem fs, String path, RecordSet td) throws IOException {
    try (OutputStream out = fs.create(new Path(path))) {
        DatumWriter<IndexedRecord> datumWriter = new GenericDatumWriter<>(td.getSchema());
        DataFileWriter<IndexedRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
        dataFileWriter.create(td.getSchema(), out);
        for (List<IndexedRecord> partition : td.getPartitions()) {
            for (IndexedRecord record : partition) {
                dataFileWriter.append(record);
            }
        }
        dataFileWriter.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) IndexedRecord(org.apache.avro.generic.IndexedRecord) OutputStream(java.io.OutputStream) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter)

Example 97 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project components by Talend.

the class Person method serToAvroBytes.

public byte[] serToAvroBytes() throws IOException {
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
    datumWriter.write(toAvroRecord(), encoder);
    encoder.flush();
    byte[] result = out.toByteArray();
    out.close();
    return result;
}
Also used : BinaryEncoder(org.apache.avro.io.BinaryEncoder) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 98 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project components by Talend.

the class FixedFlowInputRuntimeTest method generateInputJSON.

private static String generateInputJSON(Schema inputSchema, IndexedRecord inputIndexedRecord) throws IOException {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DatumWriter<IndexedRecord> writer = new GenericDatumWriter<IndexedRecord>(inputSchema);
    JsonEncoder encoder = EncoderFactory.get().jsonEncoder(inputSchema, baos, false);
    writer.write(inputIndexedRecord, encoder);
    encoder.flush();
    baos.flush();
    return new String(baos.toByteArray(), StandardCharsets.UTF_8);
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) JsonEncoder(org.apache.avro.io.JsonEncoder) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter)

Example 99 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project components by Talend.

the class Person method serToAvroBytes.

public byte[] serToAvroBytes() throws IOException {
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
    datumWriter.write(toAvroRecord(), encoder);
    encoder.flush();
    byte[] result = out.toByteArray();
    out.close();
    return result;
}
Also used : BinaryEncoder(org.apache.avro.io.BinaryEncoder) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 100 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project kylo by Teradata.

the class JdbcCommon method convertToAvroStream.

public static long convertToAvroStream(final ResultSet rs, final OutputStream outStream, final RowVisitor visitor, final Schema schema) throws SQLException, IOException {
    int dateConversionWarning = 0;
    final GenericRecord rec = new GenericData.Record(schema);
    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
        dataFileWriter.create(schema, outStream);
        final ResultSetMetaData meta = rs.getMetaData();
        final int nrOfColumns = meta.getColumnCount();
        long nrOfRows = 0;
        while (rs.next()) {
            if (visitor != null) {
                visitor.visitRow(rs);
            }
            for (int i = 1; i <= nrOfColumns; i++) {
                final int javaSqlType = meta.getColumnType(i);
                final Object value = rs.getObject(i);
                if (value == null) {
                    rec.put(i - 1, null);
                } else if (javaSqlType == BINARY || javaSqlType == VARBINARY || javaSqlType == LONGVARBINARY || javaSqlType == ARRAY || javaSqlType == BLOB || javaSqlType == CLOB) {
                    // bytes requires little bit different handling
                    byte[] bytes = rs.getBytes(i);
                    ByteBuffer bb = ByteBuffer.wrap(bytes);
                    rec.put(i - 1, bb);
                } else if (value instanceof Byte) {
                    // tinyint(1) type is returned by JDBC driver as java.sql.Types.TINYINT
                    // But value is returned by JDBC as java.lang.Byte
                    // (at least H2 JDBC works this way)
                    // direct put to avro record results:
                    // org.apache.avro.AvroRuntimeException: Unknown datum type java.lang.Byte
                    rec.put(i - 1, ((Byte) value).intValue());
                } else if (value instanceof BigDecimal || value instanceof BigInteger) {
                    // Avro can't handle BigDecimal and BigInteger as numbers - it will throw an AvroRuntimeException such as: "Unknown datum type: java.math.BigDecimal: 38"
                    rec.put(i - 1, value.toString());
                } else if (value instanceof Number || value instanceof Boolean) {
                    rec.put(i - 1, value);
                } else if (value instanceof Date) {
                    final DateTimeFormatter formatter = ISODateTimeFormat.dateTime().withZoneUTC();
                    rec.put(i - 1, formatter.print(new DateTime(((Date) value).getTime())));
                } else if (value instanceof Time) {
                    final DateTimeFormatter formatter = ISODateTimeFormat.time().withZoneUTC();
                    rec.put(i - 1, formatter.print(new DateTime(((Time) value).getTime())));
                } else if (value instanceof Timestamp) {
                    final DateTimeFormatter formatter = ISODateTimeFormat.dateTime().withZoneUTC();
                    rec.put(i - 1, formatter.print(new DateTime(((Timestamp) value).getTime())));
                } else {
                    // The different types that we support are numbers (int, long, double, float),
                    // as well as boolean values and Strings. Since Avro doesn't provide
                    // timestamp types, we want to convert those to Strings. So we will cast anything other
                    // than numbers or booleans to strings by using the toString() method.
                    rec.put(i - 1, value.toString());
                }
                // notify the visitor
                if (javaSqlType == Types.DATE || javaSqlType == Types.TIMESTAMP) {
                    Timestamp sqlDate = null;
                    try {
                        // Extract timestamp
                        sqlDate = extractSqlDate(rs, i);
                    } catch (Exception e) {
                        if (dateConversionWarning++ < 10) {
                            log.warn("{} is not convertible to timestamp or date", rs.getMetaData().getColumnName(i));
                        }
                    }
                    if (visitor != null) {
                        visitor.visitColumn(rs.getMetaData().getColumnName(i), javaSqlType, sqlDate);
                    }
                } else if (javaSqlType == Types.TIME) {
                    Time time = rs.getTime(i);
                    if (visitor != null) {
                        visitor.visitColumn(rs.getMetaData().getColumnName(i), javaSqlType, time);
                    }
                } else {
                    if (visitor != null) {
                        visitor.visitColumn(rs.getMetaData().getColumnName(i), javaSqlType, (value != null) ? value.toString() : null);
                    }
                }
            }
            dataFileWriter.append(rec);
            nrOfRows += 1;
        }
        return nrOfRows;
    }
}
Also used : DataFileWriter(org.apache.avro.file.DataFileWriter) Time(java.sql.Time) DateTime(org.joda.time.DateTime) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteBuffer(java.nio.ByteBuffer) Timestamp(java.sql.Timestamp) BigDecimal(java.math.BigDecimal) Date(java.sql.Date) DateTime(org.joda.time.DateTime) SQLException(java.sql.SQLException) IOException(java.io.IOException) ResultSetMetaData(java.sql.ResultSetMetaData) BigInteger(java.math.BigInteger) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) DateTimeFormatter(org.joda.time.format.DateTimeFormatter)

Aggregations

GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)127 GenericRecord (org.apache.avro.generic.GenericRecord)105 Schema (org.apache.avro.Schema)69 ByteArrayOutputStream (java.io.ByteArrayOutputStream)57 DataFileWriter (org.apache.avro.file.DataFileWriter)47 File (java.io.File)40 Test (org.junit.Test)37 IOException (java.io.IOException)29 BinaryEncoder (org.apache.avro.io.BinaryEncoder)29 MockFlowFile (org.apache.nifi.util.MockFlowFile)25 Encoder (org.apache.avro.io.Encoder)23 TestRunner (org.apache.nifi.util.TestRunner)20 HashMap (java.util.HashMap)14 ByteArrayOutputStream (org.apache.nifi.stream.io.ByteArrayOutputStream)14 GenericData (org.apache.avro.generic.GenericData)12 ByteArrayInputStream (java.io.ByteArrayInputStream)11 FileOutputStream (java.io.FileOutputStream)10 InputStream (java.io.InputStream)9 ArrayList (java.util.ArrayList)8 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)8