Search in sources :

Example 36 with DataFileStream

use of org.apache.avro.file.DataFileStream in project nifi by apache.

the class TestConvertAvroSchema method testBasicConversionWithCompression.

@Test
public void testBasicConversionWithCompression() throws IOException {
    TestRunner runner = TestRunners.newTestRunner(ConvertAvroSchema.class);
    runner.assertNotValid();
    runner.setProperty(ConvertAvroSchema.INPUT_SCHEMA, INPUT_SCHEMA.toString());
    runner.setProperty(ConvertAvroSchema.OUTPUT_SCHEMA, OUTPUT_SCHEMA.toString());
    runner.setProperty(AbstractKiteConvertProcessor.COMPRESSION_TYPE, CodecType.BZIP2.toString());
    Locale locale = Locale.getDefault();
    runner.setProperty("primaryColor", "color");
    runner.assertValid();
    NumberFormat format = NumberFormat.getInstance(locale);
    // Two valid rows, and one invalid because "free" is not a double.
    Record goodRecord1 = dataBasic("1", "blue", null, null);
    Record goodRecord2 = dataBasic("2", "red", "yellow", format.format(5.5));
    Record badRecord = dataBasic("3", "red", "yellow", "free");
    List<Record> input = Lists.newArrayList(goodRecord1, goodRecord2, badRecord);
    runner.enqueue(streamFor(input));
    runner.run();
    long converted = runner.getCounterValue("Converted records");
    long errors = runner.getCounterValue("Conversion errors");
    Assert.assertEquals("Should convert 2 rows", 2, converted);
    Assert.assertEquals("Should reject 1 rows", 1, errors);
    runner.assertTransferCount("success", 1);
    runner.assertTransferCount("failure", 1);
    MockFlowFile incompatible = runner.getFlowFilesForRelationship("failure").get(0);
    GenericDatumReader<Record> reader = new GenericDatumReader<Record>(INPUT_SCHEMA);
    DataFileStream<Record> stream = new DataFileStream<Record>(new ByteArrayInputStream(runner.getContentAsByteArray(incompatible)), reader);
    int count = 0;
    for (Record r : stream) {
        Assert.assertEquals(badRecord, r);
        count++;
    }
    stream.close();
    Assert.assertEquals(1, count);
    Assert.assertEquals("Should accumulate error messages", FAILURE_SUMMARY, incompatible.getAttribute("errors"));
    GenericDatumReader<Record> successReader = new GenericDatumReader<Record>(OUTPUT_SCHEMA);
    DataFileStream<Record> successStream = new DataFileStream<Record>(new ByteArrayInputStream(runner.getContentAsByteArray(runner.getFlowFilesForRelationship("success").get(0))), successReader);
    count = 0;
    for (Record r : successStream) {
        if (count == 0) {
            Assert.assertEquals(convertBasic(goodRecord1, locale), r);
        } else {
            Assert.assertEquals(convertBasic(goodRecord2, locale), r);
        }
        count++;
    }
    successStream.close();
    Assert.assertEquals(2, count);
}
Also used : Locale(java.util.Locale) TestRunner(org.apache.nifi.util.TestRunner) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) DataFileStream(org.apache.avro.file.DataFileStream) MockFlowFile(org.apache.nifi.util.MockFlowFile) ByteArrayInputStream(java.io.ByteArrayInputStream) Record(org.apache.avro.generic.GenericData.Record) NumberFormat(java.text.NumberFormat) Test(org.junit.Test)

Example 37 with DataFileStream

use of org.apache.avro.file.DataFileStream in project nifi by apache.

the class TestConvertAvroSchema method testBasicConversion.

@Test
public void testBasicConversion() throws IOException {
    TestRunner runner = TestRunners.newTestRunner(ConvertAvroSchema.class);
    runner.assertNotValid();
    runner.setProperty(ConvertAvroSchema.INPUT_SCHEMA, INPUT_SCHEMA.toString());
    runner.setProperty(ConvertAvroSchema.OUTPUT_SCHEMA, OUTPUT_SCHEMA.toString());
    Locale locale = Locale.getDefault();
    runner.setProperty("primaryColor", "color");
    runner.assertValid();
    NumberFormat format = NumberFormat.getInstance(locale);
    // Two valid rows, and one invalid because "free" is not a double.
    Record goodRecord1 = dataBasic("1", "blue", null, null);
    Record goodRecord2 = dataBasic("2", "red", "yellow", format.format(5.5));
    Record badRecord = dataBasic("3", "red", "yellow", "free");
    List<Record> input = Lists.newArrayList(goodRecord1, goodRecord2, badRecord);
    runner.enqueue(streamFor(input));
    runner.run();
    long converted = runner.getCounterValue("Converted records");
    long errors = runner.getCounterValue("Conversion errors");
    Assert.assertEquals("Should convert 2 rows", 2, converted);
    Assert.assertEquals("Should reject 1 rows", 1, errors);
    runner.assertTransferCount("success", 1);
    runner.assertTransferCount("failure", 1);
    MockFlowFile incompatible = runner.getFlowFilesForRelationship("failure").get(0);
    GenericDatumReader<Record> reader = new GenericDatumReader<Record>(INPUT_SCHEMA);
    DataFileStream<Record> stream = new DataFileStream<Record>(new ByteArrayInputStream(runner.getContentAsByteArray(incompatible)), reader);
    int count = 0;
    for (Record r : stream) {
        Assert.assertEquals(badRecord, r);
        count++;
    }
    stream.close();
    Assert.assertEquals(1, count);
    Assert.assertEquals("Should accumulate error messages", FAILURE_SUMMARY, incompatible.getAttribute("errors"));
    GenericDatumReader<Record> successReader = new GenericDatumReader<Record>(OUTPUT_SCHEMA);
    DataFileStream<Record> successStream = new DataFileStream<Record>(new ByteArrayInputStream(runner.getContentAsByteArray(runner.getFlowFilesForRelationship("success").get(0))), successReader);
    count = 0;
    for (Record r : successStream) {
        if (count == 0) {
            Assert.assertEquals(convertBasic(goodRecord1, locale), r);
        } else {
            Assert.assertEquals(convertBasic(goodRecord2, locale), r);
        }
        count++;
    }
    successStream.close();
    Assert.assertEquals(2, count);
}
Also used : Locale(java.util.Locale) TestRunner(org.apache.nifi.util.TestRunner) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) DataFileStream(org.apache.avro.file.DataFileStream) MockFlowFile(org.apache.nifi.util.MockFlowFile) ByteArrayInputStream(java.io.ByteArrayInputStream) Record(org.apache.avro.generic.GenericData.Record) NumberFormat(java.text.NumberFormat) Test(org.junit.Test)

Example 38 with DataFileStream

use of org.apache.avro.file.DataFileStream in project nifi by apache.

the class TestJdbcCommon method testConvertToAvroStreamForBigDecimal.

private void testConvertToAvroStreamForBigDecimal(BigDecimal bigDecimal, int dbPrecision, int defaultPrecision, int expectedPrecision, int expectedScale) throws SQLException, IOException {
    final ResultSetMetaData metadata = mock(ResultSetMetaData.class);
    when(metadata.getColumnCount()).thenReturn(1);
    when(metadata.getColumnType(1)).thenReturn(Types.NUMERIC);
    when(metadata.getColumnName(1)).thenReturn("The.Chairman");
    when(metadata.getTableName(1)).thenReturn("1the::table");
    when(metadata.getPrecision(1)).thenReturn(dbPrecision);
    when(metadata.getScale(1)).thenReturn(expectedScale);
    final ResultSet rs = mock(ResultSet.class);
    when(rs.getMetaData()).thenReturn(metadata);
    final AtomicInteger counter = new AtomicInteger(1);
    Mockito.doAnswer(new Answer<Boolean>() {

        @Override
        public Boolean answer(InvocationOnMock invocation) throws Throwable {
            return counter.getAndDecrement() > 0;
        }
    }).when(rs).next();
    when(rs.getObject(Mockito.anyInt())).thenReturn(bigDecimal);
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final JdbcCommon.AvroConversionOptions options = JdbcCommon.AvroConversionOptions.builder().convertNames(true).useLogicalTypes(true).defaultPrecision(defaultPrecision).build();
    JdbcCommon.convertToAvroStream(rs, baos, options, null);
    final byte[] serializedBytes = baos.toByteArray();
    final InputStream instream = new ByteArrayInputStream(serializedBytes);
    final GenericData genericData = new GenericData();
    genericData.addLogicalTypeConversion(new Conversions.DecimalConversion());
    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(null, null, genericData);
    try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
        final Schema generatedUnion = dataFileReader.getSchema().getField("The_Chairman").schema();
        // null and decimal.
        assertEquals(2, generatedUnion.getTypes().size());
        final LogicalType logicalType = generatedUnion.getTypes().get(1).getLogicalType();
        assertNotNull(logicalType);
        assertEquals("decimal", logicalType.getName());
        LogicalTypes.Decimal decimalType = (LogicalTypes.Decimal) logicalType;
        assertEquals(expectedPrecision, decimalType.getPrecision());
        assertEquals(expectedScale, decimalType.getScale());
        GenericRecord record = null;
        while (dataFileReader.hasNext()) {
            record = dataFileReader.next(record);
            assertEquals("_1the__table", record.getSchema().getName());
            assertEquals(bigDecimal, record.get("The_Chairman"));
        }
    }
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) LogicalType(org.apache.avro.LogicalType) ResultSetMetaData(java.sql.ResultSetMetaData) BigDecimal(java.math.BigDecimal) ResultSet(java.sql.ResultSet) GenericRecord(org.apache.avro.generic.GenericRecord) ReaderInputStream(org.apache.commons.io.input.ReaderInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) LogicalTypes(org.apache.avro.LogicalTypes) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataFileStream(org.apache.avro.file.DataFileStream) GenericData(org.apache.avro.generic.GenericData) Conversions(org.apache.avro.Conversions) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ByteArrayInputStream(java.io.ByteArrayInputStream) InvocationOnMock(org.mockito.invocation.InvocationOnMock)

Example 39 with DataFileStream

use of org.apache.avro.file.DataFileStream in project nifi by apache.

the class TestJdbcCommon method testClob.

@Test
public void testClob() throws Exception {
    try (final Statement stmt = con.createStatement()) {
        stmt.executeUpdate("CREATE TABLE clobtest (id INT, text CLOB(64 K))");
        stmt.execute("INSERT INTO clobtest VALUES (41, NULL)");
        PreparedStatement ps = con.prepareStatement("INSERT INTO clobtest VALUES (?, ?)");
        ps.setInt(1, 42);
        final char[] buffer = new char[4002];
        IntStream.range(0, 4002).forEach((i) -> buffer[i] = String.valueOf(i % 10).charAt(0));
        // Put a zero-byte in to test the buffer building logic
        buffer[1] = 0;
        ReaderInputStream isr = new ReaderInputStream(new CharArrayReader(buffer), Charset.defaultCharset());
        // - set the value of the input parameter to the input stream
        ps.setAsciiStream(2, isr, 4002);
        ps.execute();
        isr.close();
        final ResultSet resultSet = stmt.executeQuery("select * from clobtest");
        final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        JdbcCommon.convertToAvroStream(resultSet, outStream, false);
        final byte[] serializedBytes = outStream.toByteArray();
        assertNotNull(serializedBytes);
        // Deserialize bytes to records
        final InputStream instream = new ByteArrayInputStream(serializedBytes);
        final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
        try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
            GenericRecord record = null;
            while (dataFileReader.hasNext()) {
                // Reuse record object by passing it to next(). This saves us from
                // allocating and garbage collecting many objects for files with
                // many items.
                record = dataFileReader.next(record);
                Integer id = (Integer) record.get("ID");
                Object o = record.get("TEXT");
                if (id == 41) {
                    assertNull(o);
                } else {
                    assertNotNull(o);
                    final String text = o.toString();
                    assertEquals(4002, text.length());
                    // Third character should be '2'
                    assertEquals('2', text.charAt(2));
                }
            }
        }
    }
}
Also used : PreparedStatement(java.sql.PreparedStatement) Statement(java.sql.Statement) ReaderInputStream(org.apache.commons.io.input.ReaderInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) PreparedStatement(java.sql.PreparedStatement) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataFileStream(org.apache.avro.file.DataFileStream) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) BigInteger(java.math.BigInteger) ReaderInputStream(org.apache.commons.io.input.ReaderInputStream) CharArrayReader(java.io.CharArrayReader) ByteArrayInputStream(java.io.ByteArrayInputStream) ResultSet(java.sql.ResultSet) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 40 with DataFileStream

use of org.apache.avro.file.DataFileStream in project nifi by apache.

the class TestJdbcCommon method testConvertToAvroStreamForDateTime.

private void testConvertToAvroStreamForDateTime(JdbcCommon.AvroConversionOptions options, BiConsumer<GenericRecord, java.sql.Date> assertDate, BiConsumer<GenericRecord, Time> assertTime, BiConsumer<GenericRecord, Timestamp> assertTimeStamp) throws SQLException, IOException, ParseException {
    final ResultSetMetaData metadata = mock(ResultSetMetaData.class);
    final ResultSet rs = mock(ResultSet.class);
    when(rs.getMetaData()).thenReturn(metadata);
    BiFunction<String, String, Long> toMillis = (format, dateStr) -> {
        try {
            final SimpleDateFormat dateFormat = new SimpleDateFormat(format);
            dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
            return dateFormat.parse(dateStr).getTime();
        } catch (ParseException e) {
            throw new RuntimeException(e);
        }
    };
    when(metadata.getColumnCount()).thenReturn(3);
    when(metadata.getTableName(anyInt())).thenReturn("table");
    when(metadata.getColumnType(1)).thenReturn(Types.DATE);
    when(metadata.getColumnName(1)).thenReturn("date");
    final java.sql.Date date = new java.sql.Date(toMillis.apply("yyyy/MM/dd", "2017/05/10"));
    when(rs.getObject(1)).thenReturn(date);
    when(metadata.getColumnType(2)).thenReturn(Types.TIME);
    when(metadata.getColumnName(2)).thenReturn("time");
    final Time time = new Time(toMillis.apply("HH:mm:ss.SSS", "12:34:56.789"));
    when(rs.getObject(2)).thenReturn(time);
    when(metadata.getColumnType(3)).thenReturn(Types.TIMESTAMP);
    when(metadata.getColumnName(3)).thenReturn("timestamp");
    final Timestamp timestamp = new Timestamp(toMillis.apply("yyyy/MM/dd HH:mm:ss.SSS", "2017/05/11 19:59:39.123"));
    when(rs.getObject(3)).thenReturn(timestamp);
    final AtomicInteger counter = new AtomicInteger(1);
    Mockito.doAnswer(new Answer<Boolean>() {

        @Override
        public Boolean answer(InvocationOnMock invocation) throws Throwable {
            return counter.getAndDecrement() > 0;
        }
    }).when(rs).next();
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    JdbcCommon.convertToAvroStream(rs, baos, options, null);
    final byte[] serializedBytes = baos.toByteArray();
    final InputStream instream = new ByteArrayInputStream(serializedBytes);
    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
        GenericRecord record = null;
        while (dataFileReader.hasNext()) {
            record = dataFileReader.next(record);
            assertDate.accept(record, date);
            assertTime.accept(record, time);
            assertTimeStamp.accept(record, timestamp);
        }
    }
}
Also used : Connection(java.sql.Connection) CharArrayReader(java.io.CharArrayReader) Time(java.sql.Time) BiFunction(java.util.function.BiFunction) ReaderInputStream(org.apache.commons.io.input.ReaderInputStream) ByteBuffer(java.nio.ByteBuffer) BigDecimal(java.math.BigDecimal) ByteArrayInputStream(java.io.ByteArrayInputStream) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ResultSet(java.sql.ResultSet) Matchers.anyInt(org.mockito.Matchers.anyInt) BigInteger(java.math.BigInteger) ParseException(java.text.ParseException) ClassRule(org.junit.ClassRule) Conversions(org.apache.avro.Conversions) Utf8(org.apache.avro.util.Utf8) Schema(org.apache.avro.Schema) MathContext(java.math.MathContext) TimeZone(java.util.TimeZone) Timestamp(java.sql.Timestamp) Set(java.util.Set) PreparedStatement(java.sql.PreparedStatement) DatumReader(org.apache.avro.io.DatumReader) ResultSetMetaData(java.sql.ResultSetMetaData) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Mockito.mock(org.mockito.Mockito.mock) Types(java.sql.Types) IntStream(java.util.stream.IntStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) BeforeClass(org.junit.BeforeClass) SimpleDateFormat(java.text.SimpleDateFormat) GenericData(org.apache.avro.generic.GenericData) HashSet(java.util.HashSet) Answer(org.mockito.stubbing.Answer) SQLException(java.sql.SQLException) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Charset(java.nio.charset.Charset) LogicalTypes(org.apache.avro.LogicalTypes) BiConsumer(java.util.function.BiConsumer) GenericRecord(org.apache.avro.generic.GenericRecord) LogicalType(org.apache.avro.LogicalType) Assert.assertNotNull(org.junit.Assert.assertNotNull) Assert.assertTrue(org.junit.Assert.assertTrue) DataFileStream(org.apache.avro.file.DataFileStream) IOException(java.io.IOException) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) Field(java.lang.reflect.Field) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Assert.assertNull(org.junit.Assert.assertNull) Statement(java.sql.Statement) Assert(org.junit.Assert) Assert.assertEquals(org.junit.Assert.assertEquals) DriverManager(java.sql.DriverManager) TemporaryFolder(org.junit.rules.TemporaryFolder) InputStream(java.io.InputStream) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Time(java.sql.Time) Timestamp(java.sql.Timestamp) ResultSetMetaData(java.sql.ResultSetMetaData) ResultSet(java.sql.ResultSet) GenericRecord(org.apache.avro.generic.GenericRecord) ReaderInputStream(org.apache.commons.io.input.ReaderInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataFileStream(org.apache.avro.file.DataFileStream) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ByteArrayInputStream(java.io.ByteArrayInputStream) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ParseException(java.text.ParseException) SimpleDateFormat(java.text.SimpleDateFormat)

Aggregations

DataFileStream (org.apache.avro.file.DataFileStream)59 GenericRecord (org.apache.avro.generic.GenericRecord)39 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)34 Test (org.junit.Test)26 Schema (org.apache.avro.Schema)21 ByteArrayInputStream (java.io.ByteArrayInputStream)20 InputStream (java.io.InputStream)19 IOException (java.io.IOException)13 ByteArrayOutputStream (java.io.ByteArrayOutputStream)11 File (java.io.File)9 FileInputStream (java.io.FileInputStream)9 ResultSet (java.sql.ResultSet)9 HashMap (java.util.HashMap)9 MockFlowFile (org.apache.nifi.util.MockFlowFile)9 Statement (java.sql.Statement)8 BufferedInputStream (java.io.BufferedInputStream)7 HashSet (java.util.HashSet)7 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)7 DataFileWriter (org.apache.avro.file.DataFileWriter)7 Path (org.apache.hadoop.fs.Path)7