use of org.apache.avro.file.DataFileStream in project nifi by apache.
the class TestConvertAvroSchema method testBasicConversionWithCompression.
@Test
public void testBasicConversionWithCompression() throws IOException {
TestRunner runner = TestRunners.newTestRunner(ConvertAvroSchema.class);
runner.assertNotValid();
runner.setProperty(ConvertAvroSchema.INPUT_SCHEMA, INPUT_SCHEMA.toString());
runner.setProperty(ConvertAvroSchema.OUTPUT_SCHEMA, OUTPUT_SCHEMA.toString());
runner.setProperty(AbstractKiteConvertProcessor.COMPRESSION_TYPE, CodecType.BZIP2.toString());
Locale locale = Locale.getDefault();
runner.setProperty("primaryColor", "color");
runner.assertValid();
NumberFormat format = NumberFormat.getInstance(locale);
// Two valid rows, and one invalid because "free" is not a double.
Record goodRecord1 = dataBasic("1", "blue", null, null);
Record goodRecord2 = dataBasic("2", "red", "yellow", format.format(5.5));
Record badRecord = dataBasic("3", "red", "yellow", "free");
List<Record> input = Lists.newArrayList(goodRecord1, goodRecord2, badRecord);
runner.enqueue(streamFor(input));
runner.run();
long converted = runner.getCounterValue("Converted records");
long errors = runner.getCounterValue("Conversion errors");
Assert.assertEquals("Should convert 2 rows", 2, converted);
Assert.assertEquals("Should reject 1 rows", 1, errors);
runner.assertTransferCount("success", 1);
runner.assertTransferCount("failure", 1);
MockFlowFile incompatible = runner.getFlowFilesForRelationship("failure").get(0);
GenericDatumReader<Record> reader = new GenericDatumReader<Record>(INPUT_SCHEMA);
DataFileStream<Record> stream = new DataFileStream<Record>(new ByteArrayInputStream(runner.getContentAsByteArray(incompatible)), reader);
int count = 0;
for (Record r : stream) {
Assert.assertEquals(badRecord, r);
count++;
}
stream.close();
Assert.assertEquals(1, count);
Assert.assertEquals("Should accumulate error messages", FAILURE_SUMMARY, incompatible.getAttribute("errors"));
GenericDatumReader<Record> successReader = new GenericDatumReader<Record>(OUTPUT_SCHEMA);
DataFileStream<Record> successStream = new DataFileStream<Record>(new ByteArrayInputStream(runner.getContentAsByteArray(runner.getFlowFilesForRelationship("success").get(0))), successReader);
count = 0;
for (Record r : successStream) {
if (count == 0) {
Assert.assertEquals(convertBasic(goodRecord1, locale), r);
} else {
Assert.assertEquals(convertBasic(goodRecord2, locale), r);
}
count++;
}
successStream.close();
Assert.assertEquals(2, count);
}
use of org.apache.avro.file.DataFileStream in project nifi by apache.
the class TestConvertAvroSchema method testBasicConversion.
@Test
public void testBasicConversion() throws IOException {
TestRunner runner = TestRunners.newTestRunner(ConvertAvroSchema.class);
runner.assertNotValid();
runner.setProperty(ConvertAvroSchema.INPUT_SCHEMA, INPUT_SCHEMA.toString());
runner.setProperty(ConvertAvroSchema.OUTPUT_SCHEMA, OUTPUT_SCHEMA.toString());
Locale locale = Locale.getDefault();
runner.setProperty("primaryColor", "color");
runner.assertValid();
NumberFormat format = NumberFormat.getInstance(locale);
// Two valid rows, and one invalid because "free" is not a double.
Record goodRecord1 = dataBasic("1", "blue", null, null);
Record goodRecord2 = dataBasic("2", "red", "yellow", format.format(5.5));
Record badRecord = dataBasic("3", "red", "yellow", "free");
List<Record> input = Lists.newArrayList(goodRecord1, goodRecord2, badRecord);
runner.enqueue(streamFor(input));
runner.run();
long converted = runner.getCounterValue("Converted records");
long errors = runner.getCounterValue("Conversion errors");
Assert.assertEquals("Should convert 2 rows", 2, converted);
Assert.assertEquals("Should reject 1 rows", 1, errors);
runner.assertTransferCount("success", 1);
runner.assertTransferCount("failure", 1);
MockFlowFile incompatible = runner.getFlowFilesForRelationship("failure").get(0);
GenericDatumReader<Record> reader = new GenericDatumReader<Record>(INPUT_SCHEMA);
DataFileStream<Record> stream = new DataFileStream<Record>(new ByteArrayInputStream(runner.getContentAsByteArray(incompatible)), reader);
int count = 0;
for (Record r : stream) {
Assert.assertEquals(badRecord, r);
count++;
}
stream.close();
Assert.assertEquals(1, count);
Assert.assertEquals("Should accumulate error messages", FAILURE_SUMMARY, incompatible.getAttribute("errors"));
GenericDatumReader<Record> successReader = new GenericDatumReader<Record>(OUTPUT_SCHEMA);
DataFileStream<Record> successStream = new DataFileStream<Record>(new ByteArrayInputStream(runner.getContentAsByteArray(runner.getFlowFilesForRelationship("success").get(0))), successReader);
count = 0;
for (Record r : successStream) {
if (count == 0) {
Assert.assertEquals(convertBasic(goodRecord1, locale), r);
} else {
Assert.assertEquals(convertBasic(goodRecord2, locale), r);
}
count++;
}
successStream.close();
Assert.assertEquals(2, count);
}
use of org.apache.avro.file.DataFileStream in project nifi by apache.
the class TestJdbcCommon method testConvertToAvroStreamForBigDecimal.
private void testConvertToAvroStreamForBigDecimal(BigDecimal bigDecimal, int dbPrecision, int defaultPrecision, int expectedPrecision, int expectedScale) throws SQLException, IOException {
final ResultSetMetaData metadata = mock(ResultSetMetaData.class);
when(metadata.getColumnCount()).thenReturn(1);
when(metadata.getColumnType(1)).thenReturn(Types.NUMERIC);
when(metadata.getColumnName(1)).thenReturn("The.Chairman");
when(metadata.getTableName(1)).thenReturn("1the::table");
when(metadata.getPrecision(1)).thenReturn(dbPrecision);
when(metadata.getScale(1)).thenReturn(expectedScale);
final ResultSet rs = mock(ResultSet.class);
when(rs.getMetaData()).thenReturn(metadata);
final AtomicInteger counter = new AtomicInteger(1);
Mockito.doAnswer(new Answer<Boolean>() {
@Override
public Boolean answer(InvocationOnMock invocation) throws Throwable {
return counter.getAndDecrement() > 0;
}
}).when(rs).next();
when(rs.getObject(Mockito.anyInt())).thenReturn(bigDecimal);
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
final JdbcCommon.AvroConversionOptions options = JdbcCommon.AvroConversionOptions.builder().convertNames(true).useLogicalTypes(true).defaultPrecision(defaultPrecision).build();
JdbcCommon.convertToAvroStream(rs, baos, options, null);
final byte[] serializedBytes = baos.toByteArray();
final InputStream instream = new ByteArrayInputStream(serializedBytes);
final GenericData genericData = new GenericData();
genericData.addLogicalTypeConversion(new Conversions.DecimalConversion());
final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(null, null, genericData);
try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
final Schema generatedUnion = dataFileReader.getSchema().getField("The_Chairman").schema();
// null and decimal.
assertEquals(2, generatedUnion.getTypes().size());
final LogicalType logicalType = generatedUnion.getTypes().get(1).getLogicalType();
assertNotNull(logicalType);
assertEquals("decimal", logicalType.getName());
LogicalTypes.Decimal decimalType = (LogicalTypes.Decimal) logicalType;
assertEquals(expectedPrecision, decimalType.getPrecision());
assertEquals(expectedScale, decimalType.getScale());
GenericRecord record = null;
while (dataFileReader.hasNext()) {
record = dataFileReader.next(record);
assertEquals("_1the__table", record.getSchema().getName());
assertEquals(bigDecimal, record.get("The_Chairman"));
}
}
}
use of org.apache.avro.file.DataFileStream in project nifi by apache.
the class TestJdbcCommon method testClob.
@Test
public void testClob() throws Exception {
try (final Statement stmt = con.createStatement()) {
stmt.executeUpdate("CREATE TABLE clobtest (id INT, text CLOB(64 K))");
stmt.execute("INSERT INTO clobtest VALUES (41, NULL)");
PreparedStatement ps = con.prepareStatement("INSERT INTO clobtest VALUES (?, ?)");
ps.setInt(1, 42);
final char[] buffer = new char[4002];
IntStream.range(0, 4002).forEach((i) -> buffer[i] = String.valueOf(i % 10).charAt(0));
// Put a zero-byte in to test the buffer building logic
buffer[1] = 0;
ReaderInputStream isr = new ReaderInputStream(new CharArrayReader(buffer), Charset.defaultCharset());
// - set the value of the input parameter to the input stream
ps.setAsciiStream(2, isr, 4002);
ps.execute();
isr.close();
final ResultSet resultSet = stmt.executeQuery("select * from clobtest");
final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
JdbcCommon.convertToAvroStream(resultSet, outStream, false);
final byte[] serializedBytes = outStream.toByteArray();
assertNotNull(serializedBytes);
// Deserialize bytes to records
final InputStream instream = new ByteArrayInputStream(serializedBytes);
final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
GenericRecord record = null;
while (dataFileReader.hasNext()) {
// Reuse record object by passing it to next(). This saves us from
// allocating and garbage collecting many objects for files with
// many items.
record = dataFileReader.next(record);
Integer id = (Integer) record.get("ID");
Object o = record.get("TEXT");
if (id == 41) {
assertNull(o);
} else {
assertNotNull(o);
final String text = o.toString();
assertEquals(4002, text.length());
// Third character should be '2'
assertEquals('2', text.charAt(2));
}
}
}
}
}
use of org.apache.avro.file.DataFileStream in project nifi by apache.
the class TestJdbcCommon method testConvertToAvroStreamForDateTime.
private void testConvertToAvroStreamForDateTime(JdbcCommon.AvroConversionOptions options, BiConsumer<GenericRecord, java.sql.Date> assertDate, BiConsumer<GenericRecord, Time> assertTime, BiConsumer<GenericRecord, Timestamp> assertTimeStamp) throws SQLException, IOException, ParseException {
final ResultSetMetaData metadata = mock(ResultSetMetaData.class);
final ResultSet rs = mock(ResultSet.class);
when(rs.getMetaData()).thenReturn(metadata);
BiFunction<String, String, Long> toMillis = (format, dateStr) -> {
try {
final SimpleDateFormat dateFormat = new SimpleDateFormat(format);
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
return dateFormat.parse(dateStr).getTime();
} catch (ParseException e) {
throw new RuntimeException(e);
}
};
when(metadata.getColumnCount()).thenReturn(3);
when(metadata.getTableName(anyInt())).thenReturn("table");
when(metadata.getColumnType(1)).thenReturn(Types.DATE);
when(metadata.getColumnName(1)).thenReturn("date");
final java.sql.Date date = new java.sql.Date(toMillis.apply("yyyy/MM/dd", "2017/05/10"));
when(rs.getObject(1)).thenReturn(date);
when(metadata.getColumnType(2)).thenReturn(Types.TIME);
when(metadata.getColumnName(2)).thenReturn("time");
final Time time = new Time(toMillis.apply("HH:mm:ss.SSS", "12:34:56.789"));
when(rs.getObject(2)).thenReturn(time);
when(metadata.getColumnType(3)).thenReturn(Types.TIMESTAMP);
when(metadata.getColumnName(3)).thenReturn("timestamp");
final Timestamp timestamp = new Timestamp(toMillis.apply("yyyy/MM/dd HH:mm:ss.SSS", "2017/05/11 19:59:39.123"));
when(rs.getObject(3)).thenReturn(timestamp);
final AtomicInteger counter = new AtomicInteger(1);
Mockito.doAnswer(new Answer<Boolean>() {
@Override
public Boolean answer(InvocationOnMock invocation) throws Throwable {
return counter.getAndDecrement() > 0;
}
}).when(rs).next();
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
JdbcCommon.convertToAvroStream(rs, baos, options, null);
final byte[] serializedBytes = baos.toByteArray();
final InputStream instream = new ByteArrayInputStream(serializedBytes);
final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
GenericRecord record = null;
while (dataFileReader.hasNext()) {
record = dataFileReader.next(record);
assertDate.accept(record, date);
assertTime.accept(record, time);
assertTimeStamp.accept(record, timestamp);
}
}
}
Aggregations