use of org.apache.avro.generic.GenericData in project nifi by apache.
the class ConvertAvroToJSON method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final String containerOption = context.getProperty(CONTAINER_OPTIONS).getValue();
final boolean useContainer = containerOption.equals(CONTAINER_ARRAY);
// Wrap a single record (inclusive of no records) only when a container is being used
final boolean wrapSingleRecord = context.getProperty(WRAP_SINGLE_RECORD).asBoolean() && useContainer;
final String stringSchema = context.getProperty(SCHEMA).getValue();
final boolean schemaLess = stringSchema != null;
try {
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(final InputStream rawIn, final OutputStream rawOut) throws IOException {
final GenericData genericData = GenericData.get();
if (schemaLess) {
if (schema == null) {
schema = new Schema.Parser().parse(stringSchema);
}
try (final InputStream in = new BufferedInputStream(rawIn);
final OutputStream out = new BufferedOutputStream(rawOut)) {
final DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
final BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(in, null);
final GenericRecord record = reader.read(null, decoder);
// need to be true before we wrap it with an array
if (useContainer && wrapSingleRecord) {
out.write('[');
}
final byte[] outputBytes = (record == null) ? EMPTY_JSON_OBJECT : genericData.toString(record).getBytes(StandardCharsets.UTF_8);
out.write(outputBytes);
if (useContainer && wrapSingleRecord) {
out.write(']');
}
}
} else {
try (final InputStream in = new BufferedInputStream(rawIn);
final OutputStream out = new BufferedOutputStream(rawOut);
final DataFileStream<GenericRecord> reader = new DataFileStream<>(in, new GenericDatumReader<GenericRecord>())) {
int recordCount = 0;
GenericRecord currRecord = null;
if (reader.hasNext()) {
currRecord = reader.next();
recordCount++;
}
// if configured to wrap single record
if (reader.hasNext() && useContainer || wrapSingleRecord) {
out.write('[');
}
// Determine the initial output record, inclusive if we should have an empty set of Avro records
final byte[] outputBytes = (currRecord == null) ? EMPTY_JSON_OBJECT : genericData.toString(currRecord).getBytes(StandardCharsets.UTF_8);
out.write(outputBytes);
while (reader.hasNext()) {
if (useContainer) {
out.write(',');
} else {
out.write('\n');
}
currRecord = reader.next(currRecord);
out.write(genericData.toString(currRecord).getBytes(StandardCharsets.UTF_8));
recordCount++;
}
// configured to wrap a single record
if (recordCount > 1 && useContainer || wrapSingleRecord) {
out.write(']');
}
}
}
}
});
} catch (final ProcessException pe) {
getLogger().error("Failed to convert {} from Avro to JSON due to {}; transferring to failure", new Object[] { flowFile, pe });
session.transfer(flowFile, REL_FAILURE);
return;
}
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
session.transfer(flowFile, REL_SUCCESS);
}
use of org.apache.avro.generic.GenericData in project incubator-gobblin by apache.
the class AvroGenericRecordAccessor method set.
/*
* Recurse down record types to set the right value
*/
private void set(String fieldName, Object value) {
try {
String subField;
Iterator<String> levels = Splitter.on(".").split(fieldName).iterator();
GenericRecord toInsert = record;
subField = levels.next();
Object subRecord = toInsert;
while (levels.hasNext()) {
if (subRecord instanceof GenericRecord) {
subRecord = ((GenericRecord) subRecord).get(subField);
} else if (subRecord instanceof List) {
subRecord = ((List) subRecord).get(Integer.parseInt(subField));
} else if (subRecord instanceof Map) {
subRecord = ((Map) subRecord).get(subField);
}
if (subRecord == null) {
throw new FieldDoesNotExistException("Field " + subField + " not found when trying to set " + fieldName);
}
subField = levels.next();
}
if (!(subRecord instanceof GenericRecord)) {
throw new IllegalArgumentException("Field " + fieldName + " does not refer to a record type.");
}
toInsert = (GenericRecord) subRecord;
Object oldValue = toInsert.get(subField);
toInsert.put(subField, value);
Schema.Field changedField = toInsert.getSchema().getField(subField);
GenericData genericData = GenericData.get();
boolean valid = genericData.validate(changedField.schema(), genericData.getField(toInsert, changedField.name(), changedField.pos()));
if (!valid) {
toInsert.put(subField, oldValue);
throw new IncorrectTypeException("Incorrect type - can't insert a " + value.getClass().getCanonicalName() + " into an Avro record of type " + changedField.schema().getType().toString());
}
} catch (AvroRuntimeException e) {
throw new FieldDoesNotExistException("Field not found setting name " + fieldName, e);
}
}
use of org.apache.avro.generic.GenericData in project parquet-mr by apache.
the class AvroReadSupport method prepareForRead.
@Override
public RecordMaterializer<T> prepareForRead(Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema, ReadContext readContext) {
Map<String, String> metadata = readContext.getReadSupportMetadata();
MessageType parquetSchema = readContext.getRequestedSchema();
Schema avroSchema;
if (metadata.get(AVRO_READ_SCHEMA_METADATA_KEY) != null) {
// use the Avro read schema provided by the user
avroSchema = new Schema.Parser().parse(metadata.get(AVRO_READ_SCHEMA_METADATA_KEY));
} else if (keyValueMetaData.get(AVRO_SCHEMA_METADATA_KEY) != null) {
// use the Avro schema from the file metadata if present
avroSchema = new Schema.Parser().parse(keyValueMetaData.get(AVRO_SCHEMA_METADATA_KEY));
} else if (keyValueMetaData.get(OLD_AVRO_SCHEMA_METADATA_KEY) != null) {
// use the Avro schema from the file metadata if present
avroSchema = new Schema.Parser().parse(keyValueMetaData.get(OLD_AVRO_SCHEMA_METADATA_KEY));
} else {
// default to converting the Parquet schema into an Avro schema
avroSchema = new AvroSchemaConverter(configuration).convert(parquetSchema);
}
GenericData model = getDataModel(configuration);
String compatEnabled = metadata.get(AvroReadSupport.AVRO_COMPATIBILITY);
if (compatEnabled != null && Boolean.valueOf(compatEnabled)) {
return newCompatMaterializer(parquetSchema, avroSchema, model);
}
return new AvroRecordMaterializer<T>(parquetSchema, avroSchema, model);
}
use of org.apache.avro.generic.GenericData in project parquet-mr by apache.
the class TestReadWrite method testDecimalValues.
@Test
public void testDecimalValues() throws Exception {
Schema decimalSchema = Schema.createRecord("myrecord", null, null, false);
Schema decimal = LogicalTypes.decimal(9, 2).addToSchema(Schema.create(Schema.Type.BYTES));
decimalSchema.setFields(Collections.singletonList(new Schema.Field("dec", decimal, null, null)));
// add the decimal conversion to a generic data model
GenericData decimalSupport = new GenericData();
decimalSupport.addLogicalTypeConversion(new Conversions.DecimalConversion());
File file = temp.newFile("decimal.parquet");
file.delete();
Path path = new Path(file.toString());
ParquetWriter<GenericRecord> writer = AvroParquetWriter.<GenericRecord>builder(path).withDataModel(decimalSupport).withSchema(decimalSchema).build();
Random random = new Random(34L);
GenericRecordBuilder builder = new GenericRecordBuilder(decimalSchema);
List<GenericRecord> expected = Lists.newArrayList();
for (int i = 0; i < 1000; i += 1) {
BigDecimal dec = new BigDecimal(new BigInteger(31, random), 2);
builder.set("dec", dec);
GenericRecord rec = builder.build();
expected.add(rec);
writer.write(builder.build());
}
writer.close();
ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord>builder(path).withDataModel(decimalSupport).disableCompatibility().build();
List<GenericRecord> records = Lists.newArrayList();
GenericRecord rec;
while ((rec = reader.read()) != null) {
records.add(rec);
}
reader.close();
Assert.assertTrue("dec field should be a BigDecimal instance", records.get(0).get("dec") instanceof BigDecimal);
Assert.assertEquals("Content should match", expected, records);
}
use of org.apache.avro.generic.GenericData in project flink by apache.
the class AvroSerializationSchema method checkAvroInitialized.
protected void checkAvroInitialized() {
if (datumWriter != null) {
return;
}
ClassLoader cl = Thread.currentThread().getContextClassLoader();
if (SpecificRecord.class.isAssignableFrom(recordClazz)) {
Schema schema = SpecificData.get().getSchema(recordClazz);
this.datumWriter = new SpecificDatumWriter<>(schema);
this.schema = schema;
} else {
this.schema = new Schema.Parser().parse(this.schemaString);
GenericData genericData = new GenericData(cl);
this.datumWriter = new GenericDatumWriter<>(schema, genericData);
}
this.arrayOutputStream = new ByteArrayOutputStream();
this.encoder = EncoderFactory.get().directBinaryEncoder(arrayOutputStream, null);
}
Aggregations