use of org.apache.avro.generic.GenericDatumWriter in project nifi by apache.
the class TestConvertAvroToORC method test_onTrigger_array_of_records.
@Test
public void test_onTrigger_array_of_records() throws Exception {
final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array_of_records.avsc"));
List<GenericRecord> innerRecords = new LinkedList<>();
final GenericRecord outerRecord = new GenericData.Record(schema);
Schema arraySchema = schema.getField("records").schema();
Schema innerRecordSchema = arraySchema.getElementType();
final GenericRecord innerRecord1 = new GenericData.Record(innerRecordSchema);
innerRecord1.put("name", "Joe");
innerRecord1.put("age", 42);
innerRecords.add(innerRecord1);
final GenericRecord innerRecord2 = new GenericData.Record(innerRecordSchema);
innerRecord2.put("name", "Mary");
innerRecord2.put("age", 28);
innerRecords.add(innerRecord2);
GenericData.Array<GenericRecord> array = new GenericData.Array<>(arraySchema, innerRecords);
outerRecord.put("records", array);
final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
ByteArrayOutputStream out = new ByteArrayOutputStream();
try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
dataFileWriter.create(schema, out);
dataFileWriter.append(outerRecord);
}
out.close();
// Build a flow file from the Avro record
Map<String, String> attributes = new HashMap<String, String>() {
{
put(CoreAttributes.FILENAME.key(), "test");
}
};
runner.enqueue(out.toByteArray(), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);
// Write the flow file out to disk, since the ORC Reader needs a path
MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS org_apache_nifi_outer_record " + "(records ARRAY<STRUCT<name:STRING, age:INT>>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
assertEquals("1", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
FileOutputStream fos = new FileOutputStream("target/test1.orc");
fos.write(resultContents);
fos.flush();
fos.close();
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
RecordReader rows = reader.rows();
Object o = rows.next(null);
assertNotNull(o);
assertTrue(o instanceof OrcStruct);
StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(schema));
// Verify the record contains an array
Object arrayFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("records"));
assertTrue(arrayFieldObject instanceof ArrayList);
ArrayList<?> arrayField = (ArrayList<?>) arrayFieldObject;
assertEquals(2, arrayField.size());
// Verify the first element. Should be a record with two fields "name" and "age"
Object element = arrayField.get(0);
assertTrue(element instanceof OrcStruct);
StructObjectInspector elementInspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(innerRecordSchema));
Object nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name"));
assertTrue(nameObject instanceof Text);
assertEquals("Joe", nameObject.toString());
Object ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age"));
assertTrue(ageObject instanceof IntWritable);
assertEquals(42, ((IntWritable) ageObject).get());
// Verify the first element. Should be a record with two fields "name" and "age"
element = arrayField.get(1);
assertTrue(element instanceof OrcStruct);
nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name"));
assertTrue(nameObject instanceof Text);
assertEquals("Mary", nameObject.toString());
ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age"));
assertTrue(ageObject instanceof IntWritable);
assertEquals(28, ((IntWritable) ageObject).get());
}
use of org.apache.avro.generic.GenericDatumWriter in project nifi by apache.
the class TestPutHiveStreaming method createAvroRecord.
private byte[] createAvroRecord(List<Map<String, Object>> records) throws IOException {
final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc"));
List<GenericRecord> users = new LinkedList<>();
for (Map<String, Object> record : records) {
final GenericRecord user = new GenericData.Record(schema);
user.put("name", record.get("name"));
user.put("favorite_number", record.get("favorite_number"));
user.put("favorite_color", record.get("favorite_color"));
users.add(user);
}
final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
ByteArrayOutputStream out = new ByteArrayOutputStream();
try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
dataFileWriter.create(schema, out);
for (final GenericRecord user : users) {
dataFileWriter.append(user);
}
}
return out.toByteArray();
}
use of org.apache.avro.generic.GenericDatumWriter in project nifi by apache.
the class JdbcCommon method convertToAvroStream.
public static long convertToAvroStream(final ResultSet rs, final OutputStream outStream, final AvroConversionOptions options, final ResultSetRowCallback callback) throws SQLException, IOException {
final Schema schema = createSchema(rs, options);
final GenericRecord rec = new GenericData.Record(schema);
final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
dataFileWriter.create(schema, outStream);
final ResultSetMetaData meta = rs.getMetaData();
final int nrOfColumns = meta.getColumnCount();
long nrOfRows = 0;
while (rs.next()) {
if (callback != null) {
callback.processRow(rs);
}
for (int i = 1; i <= nrOfColumns; i++) {
final int javaSqlType = meta.getColumnType(i);
final Schema fieldSchema = schema.getFields().get(i - 1).schema();
// Need to handle CLOB and BLOB before getObject() is called, due to ResultSet's maximum portability statement
if (javaSqlType == CLOB) {
Clob clob = rs.getClob(i);
if (clob != null) {
long numChars = clob.length();
char[] buffer = new char[(int) numChars];
InputStream is = clob.getAsciiStream();
int index = 0;
int c = is.read();
while (c >= 0) {
buffer[index++] = (char) c;
c = is.read();
}
rec.put(i - 1, new String(buffer));
clob.free();
} else {
rec.put(i - 1, null);
}
continue;
}
if (javaSqlType == NCLOB) {
NClob nClob = rs.getNClob(i);
if (nClob != null) {
final Reader characterStream = nClob.getCharacterStream();
long numChars = (int) nClob.length();
final CharBuffer buffer = CharBuffer.allocate((int) numChars);
characterStream.read(buffer);
buffer.flip();
rec.put(i - 1, buffer.toString());
nClob.free();
} else {
rec.put(i - 1, null);
}
continue;
}
if (javaSqlType == BLOB) {
Blob blob = rs.getBlob(i);
if (blob != null) {
long numChars = blob.length();
byte[] buffer = new byte[(int) numChars];
InputStream is = blob.getBinaryStream();
int index = 0;
int c = is.read();
while (c >= 0) {
buffer[index++] = (byte) c;
c = is.read();
}
ByteBuffer bb = ByteBuffer.wrap(buffer);
rec.put(i - 1, bb);
blob.free();
} else {
rec.put(i - 1, null);
}
continue;
}
final Object value = rs.getObject(i);
if (value == null) {
rec.put(i - 1, null);
} else if (javaSqlType == BINARY || javaSqlType == VARBINARY || javaSqlType == LONGVARBINARY || javaSqlType == ARRAY) {
// bytes requires little bit different handling
byte[] bytes = rs.getBytes(i);
ByteBuffer bb = ByteBuffer.wrap(bytes);
rec.put(i - 1, bb);
} else if (value instanceof Byte) {
// tinyint(1) type is returned by JDBC driver as java.sql.Types.TINYINT
// But value is returned by JDBC as java.lang.Byte
// (at least H2 JDBC works this way)
// direct put to avro record results:
// org.apache.avro.AvroRuntimeException: Unknown datum type java.lang.Byte
rec.put(i - 1, ((Byte) value).intValue());
} else if (value instanceof Short) {
// MS SQL returns TINYINT as a Java Short, which Avro doesn't understand.
rec.put(i - 1, ((Short) value).intValue());
} else if (value instanceof BigDecimal) {
if (options.useLogicalTypes) {
// Delegate mapping to AvroTypeUtil in order to utilize logical types.
rec.put(i - 1, AvroTypeUtil.convertToAvroObject(value, fieldSchema));
} else {
// As string for backward compatibility.
rec.put(i - 1, value.toString());
}
} else if (value instanceof BigInteger) {
// such as: "Unknown datum type: java.math.BigInteger: 38". In this case the schema is expecting a string.
if (javaSqlType == BIGINT) {
int precision = meta.getPrecision(i);
if (precision < 0 || precision > MAX_DIGITS_IN_BIGINT) {
rec.put(i - 1, value.toString());
} else {
try {
rec.put(i - 1, ((BigInteger) value).longValueExact());
} catch (ArithmeticException ae) {
// Since the value won't fit in a long, convert it to a string
rec.put(i - 1, value.toString());
}
}
} else {
rec.put(i - 1, value.toString());
}
} else if (value instanceof Number || value instanceof Boolean) {
if (javaSqlType == BIGINT) {
int precision = meta.getPrecision(i);
if (precision < 0 || precision > MAX_DIGITS_IN_BIGINT) {
rec.put(i - 1, value.toString());
} else {
rec.put(i - 1, value);
}
} else {
rec.put(i - 1, value);
}
} else if (value instanceof Date) {
if (options.useLogicalTypes) {
// Delegate mapping to AvroTypeUtil in order to utilize logical types.
rec.put(i - 1, AvroTypeUtil.convertToAvroObject(value, fieldSchema));
} else {
// As string for backward compatibility.
rec.put(i - 1, value.toString());
}
} else {
// The different types that we support are numbers (int, long, double, float),
// as well as boolean values and Strings. Since Avro doesn't provide
// timestamp types, we want to convert those to Strings. So we will cast anything other
// than numbers or booleans to strings by using the toString() method.
rec.put(i - 1, value.toString());
}
}
dataFileWriter.append(rec);
nrOfRows += 1;
if (options.maxRows > 0 && nrOfRows == options.maxRows)
break;
}
return nrOfRows;
}
}
use of org.apache.avro.generic.GenericDatumWriter in project nifi by apache.
the class JdbcCommon method createEmptyAvroStream.
public static void createEmptyAvroStream(final OutputStream outStream) throws IOException {
final FieldAssembler<Schema> builder = SchemaBuilder.record("NiFi_ExecuteSQL_Record").namespace("any.data").fields();
final Schema schema = builder.endRecord();
final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
dataFileWriter.create(schema, outStream);
}
}
use of org.apache.avro.generic.GenericDatumWriter in project nifi by apache.
the class TestMergeContent method testSimpleAvroConcat.
@Test
public void testSimpleAvroConcat() throws IOException, InterruptedException {
final TestRunner runner = TestRunners.newTestRunner(new MergeContent());
runner.setProperty(MergeContent.MAX_ENTRIES, "3");
runner.setProperty(MergeContent.MIN_ENTRIES, "3");
runner.setProperty(MergeContent.MERGE_FORMAT, MergeContent.MERGE_FORMAT_AVRO);
final Schema schema = new Schema.Parser().parse(new File("src/test/resources/TestMergeContent/user.avsc"));
final GenericRecord user1 = new GenericData.Record(schema);
user1.put("name", "Alyssa");
user1.put("favorite_number", 256);
final GenericRecord user2 = new GenericData.Record(schema);
user2.put("name", "Ben");
user2.put("favorite_number", 7);
user2.put("favorite_color", "red");
final GenericRecord user3 = new GenericData.Record(schema);
user3.put("name", "John");
user3.put("favorite_number", 5);
user3.put("favorite_color", "blue");
final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
final ByteArrayOutputStream out1 = serializeAvroRecord(schema, user1, datumWriter);
final ByteArrayOutputStream out2 = serializeAvroRecord(schema, user2, datumWriter);
final ByteArrayOutputStream out3 = serializeAvroRecord(schema, user3, datumWriter);
runner.enqueue(out1.toByteArray());
runner.enqueue(out2.toByteArray());
runner.enqueue(out3.toByteArray());
runner.run();
runner.assertQueueEmpty();
runner.assertTransferCount(MergeContent.REL_MERGED, 1);
runner.assertTransferCount(MergeContent.REL_FAILURE, 0);
runner.assertTransferCount(MergeContent.REL_ORIGINAL, 3);
final MockFlowFile bundle = runner.getFlowFilesForRelationship(MergeContent.REL_MERGED).get(0);
bundle.assertAttributeEquals(CoreAttributes.MIME_TYPE.key(), "application/avro-binary");
// create a reader for the merged content
byte[] data = runner.getContentAsByteArray(bundle);
final Map<String, GenericRecord> users = getGenericRecordMap(data, schema, "name");
Assert.assertEquals(3, users.size());
Assert.assertTrue(users.containsKey("Alyssa"));
Assert.assertTrue(users.containsKey("Ben"));
Assert.assertTrue(users.containsKey("John"));
}
Aggregations