use of org.apache.avro.generic.GenericData.Record in project nifi by apache.
the class TestAvroRecordConverter method testIllegalConversion.
/**
* Tests the case where we try to convert a string to a long incorrectly.
*/
@Test(expected = org.apache.nifi.processors.kite.AvroRecordConverter.AvroConversionException.class)
public void testIllegalConversion() throws Exception {
// We will convert s1 from string to long (or leave it null), ignore s2,
// convert l1 from long to string, and leave l2 the same.
Schema input = SchemaBuilder.record("Input").namespace("com.cloudera.edh").fields().nullableString("s1", "").requiredString("s2").optionalLong("l1").requiredLong("l2").endRecord();
Schema output = SchemaBuilder.record("Output").namespace("com.cloudera.edh").fields().optionalLong("s1").optionalString("l1").requiredLong("l2").endRecord();
AvroRecordConverter converter = new AvroRecordConverter(input, output, EMPTY_MAPPING);
Record inputRecord = new Record(input);
inputRecord.put("s1", "blah");
inputRecord.put("s2", "blah");
inputRecord.put("l1", null);
inputRecord.put("l2", 5L);
converter.convert(inputRecord);
}
use of org.apache.avro.generic.GenericData.Record in project nifi by apache.
the class TestConvertAvroSchema method testBasicConversionWithCompression.
@Test
public void testBasicConversionWithCompression() throws IOException {
TestRunner runner = TestRunners.newTestRunner(ConvertAvroSchema.class);
runner.assertNotValid();
runner.setProperty(ConvertAvroSchema.INPUT_SCHEMA, INPUT_SCHEMA.toString());
runner.setProperty(ConvertAvroSchema.OUTPUT_SCHEMA, OUTPUT_SCHEMA.toString());
runner.setProperty(AbstractKiteConvertProcessor.COMPRESSION_TYPE, CodecType.BZIP2.toString());
Locale locale = Locale.getDefault();
runner.setProperty("primaryColor", "color");
runner.assertValid();
NumberFormat format = NumberFormat.getInstance(locale);
// Two valid rows, and one invalid because "free" is not a double.
Record goodRecord1 = dataBasic("1", "blue", null, null);
Record goodRecord2 = dataBasic("2", "red", "yellow", format.format(5.5));
Record badRecord = dataBasic("3", "red", "yellow", "free");
List<Record> input = Lists.newArrayList(goodRecord1, goodRecord2, badRecord);
runner.enqueue(streamFor(input));
runner.run();
long converted = runner.getCounterValue("Converted records");
long errors = runner.getCounterValue("Conversion errors");
Assert.assertEquals("Should convert 2 rows", 2, converted);
Assert.assertEquals("Should reject 1 rows", 1, errors);
runner.assertTransferCount("success", 1);
runner.assertTransferCount("failure", 1);
MockFlowFile incompatible = runner.getFlowFilesForRelationship("failure").get(0);
GenericDatumReader<Record> reader = new GenericDatumReader<Record>(INPUT_SCHEMA);
DataFileStream<Record> stream = new DataFileStream<Record>(new ByteArrayInputStream(runner.getContentAsByteArray(incompatible)), reader);
int count = 0;
for (Record r : stream) {
Assert.assertEquals(badRecord, r);
count++;
}
stream.close();
Assert.assertEquals(1, count);
Assert.assertEquals("Should accumulate error messages", FAILURE_SUMMARY, incompatible.getAttribute("errors"));
GenericDatumReader<Record> successReader = new GenericDatumReader<Record>(OUTPUT_SCHEMA);
DataFileStream<Record> successStream = new DataFileStream<Record>(new ByteArrayInputStream(runner.getContentAsByteArray(runner.getFlowFilesForRelationship("success").get(0))), successReader);
count = 0;
for (Record r : successStream) {
if (count == 0) {
Assert.assertEquals(convertBasic(goodRecord1, locale), r);
} else {
Assert.assertEquals(convertBasic(goodRecord2, locale), r);
}
count++;
}
successStream.close();
Assert.assertEquals(2, count);
}
use of org.apache.avro.generic.GenericData.Record in project nifi by apache.
the class TestConvertAvroSchema method dataBasic.
private Record dataBasic(String id, String primaryColor, String secondaryColor, String price) {
Record result = new Record(INPUT_SCHEMA);
result.put("id", id);
result.put("primaryColor", primaryColor);
result.put("secondaryColor", secondaryColor);
result.put("price", price);
return result;
}
use of org.apache.avro.generic.GenericData.Record in project nifi by apache.
the class TestConvertAvroSchema method testBasicConversion.
@Test
public void testBasicConversion() throws IOException {
TestRunner runner = TestRunners.newTestRunner(ConvertAvroSchema.class);
runner.assertNotValid();
runner.setProperty(ConvertAvroSchema.INPUT_SCHEMA, INPUT_SCHEMA.toString());
runner.setProperty(ConvertAvroSchema.OUTPUT_SCHEMA, OUTPUT_SCHEMA.toString());
Locale locale = Locale.getDefault();
runner.setProperty("primaryColor", "color");
runner.assertValid();
NumberFormat format = NumberFormat.getInstance(locale);
// Two valid rows, and one invalid because "free" is not a double.
Record goodRecord1 = dataBasic("1", "blue", null, null);
Record goodRecord2 = dataBasic("2", "red", "yellow", format.format(5.5));
Record badRecord = dataBasic("3", "red", "yellow", "free");
List<Record> input = Lists.newArrayList(goodRecord1, goodRecord2, badRecord);
runner.enqueue(streamFor(input));
runner.run();
long converted = runner.getCounterValue("Converted records");
long errors = runner.getCounterValue("Conversion errors");
Assert.assertEquals("Should convert 2 rows", 2, converted);
Assert.assertEquals("Should reject 1 rows", 1, errors);
runner.assertTransferCount("success", 1);
runner.assertTransferCount("failure", 1);
MockFlowFile incompatible = runner.getFlowFilesForRelationship("failure").get(0);
GenericDatumReader<Record> reader = new GenericDatumReader<Record>(INPUT_SCHEMA);
DataFileStream<Record> stream = new DataFileStream<Record>(new ByteArrayInputStream(runner.getContentAsByteArray(incompatible)), reader);
int count = 0;
for (Record r : stream) {
Assert.assertEquals(badRecord, r);
count++;
}
stream.close();
Assert.assertEquals(1, count);
Assert.assertEquals("Should accumulate error messages", FAILURE_SUMMARY, incompatible.getAttribute("errors"));
GenericDatumReader<Record> successReader = new GenericDatumReader<Record>(OUTPUT_SCHEMA);
DataFileStream<Record> successStream = new DataFileStream<Record>(new ByteArrayInputStream(runner.getContentAsByteArray(runner.getFlowFilesForRelationship("success").get(0))), successReader);
count = 0;
for (Record r : successStream) {
if (count == 0) {
Assert.assertEquals(convertBasic(goodRecord1, locale), r);
} else {
Assert.assertEquals(convertBasic(goodRecord2, locale), r);
}
count++;
}
successStream.close();
Assert.assertEquals(2, count);
}
use of org.apache.avro.generic.GenericData.Record in project parquet-mr by apache.
the class ConvertCommand method run.
@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
Preconditions.checkArgument(targets != null && targets.size() == 1, "A data file is required.");
String source = targets.get(0);
CompressionCodecName codec = Codecs.parquetCodec(compressionCodecName);
Schema schema;
if (avroSchemaFile != null) {
schema = Schemas.fromAvsc(open(avroSchemaFile));
} else {
schema = getAvroSchema(source);
}
Schema projection = filterSchema(schema, columns);
Path outPath = qualifiedPath(outputPath);
FileSystem outFS = outPath.getFileSystem(getConf());
if (overwrite && outFS.exists(outPath)) {
console.debug("Deleting output file {} (already exists)", outPath);
outFS.delete(outPath);
}
Iterable<Record> reader = openDataFile(source, projection);
boolean threw = true;
long count = 0;
try {
try (ParquetWriter<Record> writer = AvroParquetWriter.<Record>builder(qualifiedPath(outputPath)).withWriterVersion(v2 ? PARQUET_2_0 : PARQUET_1_0).withConf(getConf()).withCompressionCodec(codec).withRowGroupSize(rowGroupSize).withDictionaryPageSize(dictionaryPageSize < 64 ? 64 : dictionaryPageSize).withDictionaryEncoding(dictionaryPageSize != 0).withPageSize(pageSize).withDataModel(GenericData.get()).withSchema(projection).build()) {
for (Record record : reader) {
writer.write(record);
count += 1;
}
}
threw = false;
} catch (RuntimeException e) {
throw new RuntimeException("Failed on record " + count, e);
} finally {
if (reader instanceof Closeable) {
Closeables.close((Closeable) reader, threw);
}
}
return 0;
}
Aggregations