Search in sources :

Example 61 with Record

use of org.apache.avro.generic.GenericData.Record in project nifi by apache.

the class TestAvroRecordConverter method testIllegalConversion.

/**
 * Tests the case where we try to convert a string to a long incorrectly.
 */
@Test(expected = org.apache.nifi.processors.kite.AvroRecordConverter.AvroConversionException.class)
public void testIllegalConversion() throws Exception {
    // We will convert s1 from string to long (or leave it null), ignore s2,
    // convert l1 from long to string, and leave l2 the same.
    Schema input = SchemaBuilder.record("Input").namespace("com.cloudera.edh").fields().nullableString("s1", "").requiredString("s2").optionalLong("l1").requiredLong("l2").endRecord();
    Schema output = SchemaBuilder.record("Output").namespace("com.cloudera.edh").fields().optionalLong("s1").optionalString("l1").requiredLong("l2").endRecord();
    AvroRecordConverter converter = new AvroRecordConverter(input, output, EMPTY_MAPPING);
    Record inputRecord = new Record(input);
    inputRecord.put("s1", "blah");
    inputRecord.put("s2", "blah");
    inputRecord.put("l1", null);
    inputRecord.put("l2", 5L);
    converter.convert(inputRecord);
}
Also used : Schema(org.apache.avro.Schema) Record(org.apache.avro.generic.GenericData.Record) Test(org.junit.Test)

Example 62 with Record

use of org.apache.avro.generic.GenericData.Record in project nifi by apache.

the class TestConvertAvroSchema method testBasicConversionWithCompression.

@Test
public void testBasicConversionWithCompression() throws IOException {
    TestRunner runner = TestRunners.newTestRunner(ConvertAvroSchema.class);
    runner.assertNotValid();
    runner.setProperty(ConvertAvroSchema.INPUT_SCHEMA, INPUT_SCHEMA.toString());
    runner.setProperty(ConvertAvroSchema.OUTPUT_SCHEMA, OUTPUT_SCHEMA.toString());
    runner.setProperty(AbstractKiteConvertProcessor.COMPRESSION_TYPE, CodecType.BZIP2.toString());
    Locale locale = Locale.getDefault();
    runner.setProperty("primaryColor", "color");
    runner.assertValid();
    NumberFormat format = NumberFormat.getInstance(locale);
    // Two valid rows, and one invalid because "free" is not a double.
    Record goodRecord1 = dataBasic("1", "blue", null, null);
    Record goodRecord2 = dataBasic("2", "red", "yellow", format.format(5.5));
    Record badRecord = dataBasic("3", "red", "yellow", "free");
    List<Record> input = Lists.newArrayList(goodRecord1, goodRecord2, badRecord);
    runner.enqueue(streamFor(input));
    runner.run();
    long converted = runner.getCounterValue("Converted records");
    long errors = runner.getCounterValue("Conversion errors");
    Assert.assertEquals("Should convert 2 rows", 2, converted);
    Assert.assertEquals("Should reject 1 rows", 1, errors);
    runner.assertTransferCount("success", 1);
    runner.assertTransferCount("failure", 1);
    MockFlowFile incompatible = runner.getFlowFilesForRelationship("failure").get(0);
    GenericDatumReader<Record> reader = new GenericDatumReader<Record>(INPUT_SCHEMA);
    DataFileStream<Record> stream = new DataFileStream<Record>(new ByteArrayInputStream(runner.getContentAsByteArray(incompatible)), reader);
    int count = 0;
    for (Record r : stream) {
        Assert.assertEquals(badRecord, r);
        count++;
    }
    stream.close();
    Assert.assertEquals(1, count);
    Assert.assertEquals("Should accumulate error messages", FAILURE_SUMMARY, incompatible.getAttribute("errors"));
    GenericDatumReader<Record> successReader = new GenericDatumReader<Record>(OUTPUT_SCHEMA);
    DataFileStream<Record> successStream = new DataFileStream<Record>(new ByteArrayInputStream(runner.getContentAsByteArray(runner.getFlowFilesForRelationship("success").get(0))), successReader);
    count = 0;
    for (Record r : successStream) {
        if (count == 0) {
            Assert.assertEquals(convertBasic(goodRecord1, locale), r);
        } else {
            Assert.assertEquals(convertBasic(goodRecord2, locale), r);
        }
        count++;
    }
    successStream.close();
    Assert.assertEquals(2, count);
}
Also used : Locale(java.util.Locale) TestRunner(org.apache.nifi.util.TestRunner) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) DataFileStream(org.apache.avro.file.DataFileStream) MockFlowFile(org.apache.nifi.util.MockFlowFile) ByteArrayInputStream(java.io.ByteArrayInputStream) Record(org.apache.avro.generic.GenericData.Record) NumberFormat(java.text.NumberFormat) Test(org.junit.Test)

Example 63 with Record

use of org.apache.avro.generic.GenericData.Record in project nifi by apache.

the class TestConvertAvroSchema method dataBasic.

private Record dataBasic(String id, String primaryColor, String secondaryColor, String price) {
    Record result = new Record(INPUT_SCHEMA);
    result.put("id", id);
    result.put("primaryColor", primaryColor);
    result.put("secondaryColor", secondaryColor);
    result.put("price", price);
    return result;
}
Also used : Record(org.apache.avro.generic.GenericData.Record)

Example 64 with Record

use of org.apache.avro.generic.GenericData.Record in project nifi by apache.

the class TestConvertAvroSchema method testBasicConversion.

@Test
public void testBasicConversion() throws IOException {
    TestRunner runner = TestRunners.newTestRunner(ConvertAvroSchema.class);
    runner.assertNotValid();
    runner.setProperty(ConvertAvroSchema.INPUT_SCHEMA, INPUT_SCHEMA.toString());
    runner.setProperty(ConvertAvroSchema.OUTPUT_SCHEMA, OUTPUT_SCHEMA.toString());
    Locale locale = Locale.getDefault();
    runner.setProperty("primaryColor", "color");
    runner.assertValid();
    NumberFormat format = NumberFormat.getInstance(locale);
    // Two valid rows, and one invalid because "free" is not a double.
    Record goodRecord1 = dataBasic("1", "blue", null, null);
    Record goodRecord2 = dataBasic("2", "red", "yellow", format.format(5.5));
    Record badRecord = dataBasic("3", "red", "yellow", "free");
    List<Record> input = Lists.newArrayList(goodRecord1, goodRecord2, badRecord);
    runner.enqueue(streamFor(input));
    runner.run();
    long converted = runner.getCounterValue("Converted records");
    long errors = runner.getCounterValue("Conversion errors");
    Assert.assertEquals("Should convert 2 rows", 2, converted);
    Assert.assertEquals("Should reject 1 rows", 1, errors);
    runner.assertTransferCount("success", 1);
    runner.assertTransferCount("failure", 1);
    MockFlowFile incompatible = runner.getFlowFilesForRelationship("failure").get(0);
    GenericDatumReader<Record> reader = new GenericDatumReader<Record>(INPUT_SCHEMA);
    DataFileStream<Record> stream = new DataFileStream<Record>(new ByteArrayInputStream(runner.getContentAsByteArray(incompatible)), reader);
    int count = 0;
    for (Record r : stream) {
        Assert.assertEquals(badRecord, r);
        count++;
    }
    stream.close();
    Assert.assertEquals(1, count);
    Assert.assertEquals("Should accumulate error messages", FAILURE_SUMMARY, incompatible.getAttribute("errors"));
    GenericDatumReader<Record> successReader = new GenericDatumReader<Record>(OUTPUT_SCHEMA);
    DataFileStream<Record> successStream = new DataFileStream<Record>(new ByteArrayInputStream(runner.getContentAsByteArray(runner.getFlowFilesForRelationship("success").get(0))), successReader);
    count = 0;
    for (Record r : successStream) {
        if (count == 0) {
            Assert.assertEquals(convertBasic(goodRecord1, locale), r);
        } else {
            Assert.assertEquals(convertBasic(goodRecord2, locale), r);
        }
        count++;
    }
    successStream.close();
    Assert.assertEquals(2, count);
}
Also used : Locale(java.util.Locale) TestRunner(org.apache.nifi.util.TestRunner) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) DataFileStream(org.apache.avro.file.DataFileStream) MockFlowFile(org.apache.nifi.util.MockFlowFile) ByteArrayInputStream(java.io.ByteArrayInputStream) Record(org.apache.avro.generic.GenericData.Record) NumberFormat(java.text.NumberFormat) Test(org.junit.Test)

Example 65 with Record

use of org.apache.avro.generic.GenericData.Record in project parquet-mr by apache.

the class ConvertCommand method run.

@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
    Preconditions.checkArgument(targets != null && targets.size() == 1, "A data file is required.");
    String source = targets.get(0);
    CompressionCodecName codec = Codecs.parquetCodec(compressionCodecName);
    Schema schema;
    if (avroSchemaFile != null) {
        schema = Schemas.fromAvsc(open(avroSchemaFile));
    } else {
        schema = getAvroSchema(source);
    }
    Schema projection = filterSchema(schema, columns);
    Path outPath = qualifiedPath(outputPath);
    FileSystem outFS = outPath.getFileSystem(getConf());
    if (overwrite && outFS.exists(outPath)) {
        console.debug("Deleting output file {} (already exists)", outPath);
        outFS.delete(outPath);
    }
    Iterable<Record> reader = openDataFile(source, projection);
    boolean threw = true;
    long count = 0;
    try {
        try (ParquetWriter<Record> writer = AvroParquetWriter.<Record>builder(qualifiedPath(outputPath)).withWriterVersion(v2 ? PARQUET_2_0 : PARQUET_1_0).withConf(getConf()).withCompressionCodec(codec).withRowGroupSize(rowGroupSize).withDictionaryPageSize(dictionaryPageSize < 64 ? 64 : dictionaryPageSize).withDictionaryEncoding(dictionaryPageSize != 0).withPageSize(pageSize).withDataModel(GenericData.get()).withSchema(projection).build()) {
            for (Record record : reader) {
                writer.write(record);
                count += 1;
            }
        }
        threw = false;
    } catch (RuntimeException e) {
        throw new RuntimeException("Failed on record " + count, e);
    } finally {
        if (reader instanceof Closeable) {
            Closeables.close((Closeable) reader, threw);
        }
    }
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) Schema(org.apache.avro.Schema) Expressions.filterSchema(org.apache.parquet.cli.util.Expressions.filterSchema) FileSystem(org.apache.hadoop.fs.FileSystem) Closeable(java.io.Closeable) Record(org.apache.avro.generic.GenericData.Record)

Aggregations

Record (org.apache.avro.generic.GenericData.Record)96 Test (org.junit.Test)44 IndexedRecord (org.apache.avro.generic.IndexedRecord)43 Schema (org.apache.avro.Schema)33 ArrayList (java.util.ArrayList)24 GenericRecord (org.apache.avro.generic.GenericRecord)14 Field (org.apache.avro.Schema.Field)11 List (java.util.List)10 GenericData (org.apache.avro.generic.GenericData)10 TestRunner (org.apache.nifi.util.TestRunner)8 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)7 JsonObject (com.google.gson.JsonObject)6 DataFileStream (org.apache.avro.file.DataFileStream)6 DataFileWriter (org.apache.avro.file.DataFileWriter)6 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)6 Utf8 (org.apache.avro.util.Utf8)6 TMarketoOutputProperties (org.talend.components.marketo.tmarketooutput.TMarketoOutputProperties)6 ActivityRecord (com.marketo.mktows.ActivityRecord)5 ArrayOfLeadRecord (com.marketo.mktows.ArrayOfLeadRecord)5 LeadChangeRecord (com.marketo.mktows.LeadChangeRecord)5