use of org.apache.avro.file.DataFileStream in project beam by apache.
the class AvroIOTest method testMetadata.
@Test
@SuppressWarnings("unchecked")
@Category(NeedsRunner.class)
public void testMetadata() throws Exception {
List<GenericClass> values = ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar"));
File outputFile = tmpFolder.newFile("output.avro");
p.apply(Create.of(values)).apply(AvroIO.write(GenericClass.class).to(outputFile.getAbsolutePath()).withoutSharding().withMetadata(ImmutableMap.<String, Object>of("stringKey", "stringValue", "longKey", 100L, "bytesKey", "bytesValue".getBytes())));
p.run();
DataFileStream dataFileStream = new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader());
assertEquals("stringValue", dataFileStream.getMetaString("stringKey"));
assertEquals(100L, dataFileStream.getMetaLong("longKey"));
assertArrayEquals("bytesValue".getBytes(), dataFileStream.getMeta("bytesKey"));
}
use of org.apache.avro.file.DataFileStream in project beam by apache.
the class AvroIOTest method testAvroIONullCodecWriteAndReadASingleFile.
@Test
@SuppressWarnings("unchecked")
@Category(NeedsRunner.class)
public void testAvroIONullCodecWriteAndReadASingleFile() throws Throwable {
List<GenericClass> values = ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar"));
File outputFile = tmpFolder.newFile("output.avro");
p.apply(Create.of(values)).apply(AvroIO.write(GenericClass.class).to(outputFile.getAbsolutePath()).withoutSharding().withCodec(CodecFactory.nullCodec()));
p.run();
PCollection<GenericClass> input = p.apply(AvroIO.read(GenericClass.class).from(outputFile.getAbsolutePath()));
PAssert.that(input).containsInAnyOrder(values);
p.run();
DataFileStream dataFileStream = new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader());
assertEquals("null", dataFileStream.getMetaString("avro.codec"));
}
use of org.apache.avro.file.DataFileStream in project nifi by apache.
the class StandardContentViewerController method doGet.
/**
* @param request servlet request
* @param response servlet response
* @throws ServletException if a servlet-specific error occurs
* @throws IOException if an I/O error occurs
*/
@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
final ViewableContent content = (ViewableContent) request.getAttribute(ViewableContent.CONTENT_REQUEST_ATTRIBUTE);
// handle json/xml specifically, treat others as plain text
String contentType = content.getContentType();
if (supportedMimeTypes.contains(contentType)) {
final String formatted;
// leave the content alone if specified
if (DisplayMode.Original.equals(content.getDisplayMode())) {
formatted = content.getContent();
} else {
if ("application/json".equals(contentType)) {
// format json
final ObjectMapper mapper = new ObjectMapper();
final Object objectJson = mapper.readValue(content.getContentStream(), Object.class);
formatted = mapper.writerWithDefaultPrettyPrinter().writeValueAsString(objectJson);
} else if ("application/xml".equals(contentType) || "text/xml".equals(contentType)) {
// format xml
final StringWriter writer = new StringWriter();
try {
final StreamSource source = new StreamSource(content.getContentStream());
final StreamResult result = new StreamResult(writer);
final TransformerFactory transformFactory = TransformerFactory.newInstance();
final Transformer transformer = transformFactory.newTransformer();
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.transform(source, result);
} catch (final TransformerFactoryConfigurationError | TransformerException te) {
throw new IOException("Unable to transform content as XML: " + te, te);
}
// get the transformed xml
formatted = writer.toString();
} else if ("application/avro-binary".equals(contentType) || "avro/binary".equals(contentType) || "application/avro+binary".equals(contentType)) {
final StringBuilder sb = new StringBuilder();
sb.append("[");
// Use Avro conversions to display logical type values in human readable way.
final GenericData genericData = new GenericData() {
@Override
protected void toString(Object datum, StringBuilder buffer) {
// Since these types are not quoted and produce a malformed JSON string, quote it here.
if (datum instanceof LocalDate || datum instanceof LocalTime || datum instanceof DateTime) {
buffer.append("\"").append(datum).append("\"");
return;
}
super.toString(datum, buffer);
}
};
genericData.addLogicalTypeConversion(new Conversions.DecimalConversion());
genericData.addLogicalTypeConversion(new TimeConversions.DateConversion());
genericData.addLogicalTypeConversion(new TimeConversions.TimeConversion());
genericData.addLogicalTypeConversion(new TimeConversions.TimestampConversion());
final DatumReader<GenericData.Record> datumReader = new GenericDatumReader<>(null, null, genericData);
try (final DataFileStream<GenericData.Record> dataFileReader = new DataFileStream<>(content.getContentStream(), datumReader)) {
while (dataFileReader.hasNext()) {
final GenericData.Record record = dataFileReader.next();
final String formattedRecord = genericData.toString(record);
sb.append(formattedRecord);
sb.append(",");
// Do not format more than 10 MB of content.
if (sb.length() > 1024 * 1024 * 2) {
break;
}
}
}
if (sb.length() > 1) {
sb.deleteCharAt(sb.length() - 1);
}
sb.append("]");
final String json = sb.toString();
final ObjectMapper mapper = new ObjectMapper();
final Object objectJson = mapper.readValue(json, Object.class);
formatted = mapper.writerWithDefaultPrettyPrinter().writeValueAsString(objectJson);
contentType = "application/json";
} else {
// leave plain text alone when formatting
formatted = content.getContent();
}
}
// defer to the jsp
request.setAttribute("mode", contentType);
request.setAttribute("content", formatted);
request.getRequestDispatcher("/WEB-INF/jsp/codemirror.jsp").include(request, response);
} else {
final PrintWriter out = response.getWriter();
out.println("Unexpected content type: " + contentType);
}
}
use of org.apache.avro.file.DataFileStream in project nifi by apache.
the class ConvertAvroSchema method onTrigger.
@Override
public void onTrigger(ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile incomingAvro = session.get();
if (incomingAvro == null) {
return;
}
String inputSchemaProperty = context.getProperty(INPUT_SCHEMA).evaluateAttributeExpressions(incomingAvro).getValue();
final Schema inputSchema;
try {
inputSchema = getSchema(inputSchemaProperty, DefaultConfiguration.get());
} catch (SchemaNotFoundException e) {
getLogger().error("Cannot find schema: " + inputSchemaProperty);
session.transfer(incomingAvro, FAILURE);
return;
}
String outputSchemaProperty = context.getProperty(OUTPUT_SCHEMA).evaluateAttributeExpressions(incomingAvro).getValue();
final Schema outputSchema;
try {
outputSchema = getSchema(outputSchemaProperty, DefaultConfiguration.get());
} catch (SchemaNotFoundException e) {
getLogger().error("Cannot find schema: " + outputSchemaProperty);
session.transfer(incomingAvro, FAILURE);
return;
}
final Map<String, String> fieldMapping = new HashMap<>();
for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
if (entry.getKey().isDynamic()) {
fieldMapping.put(entry.getKey().getName(), entry.getValue());
}
}
// Set locale
final String localeProperty = context.getProperty(LOCALE).getValue();
final Locale locale = localeProperty.equals(DEFAULT_LOCALE_VALUE) ? Locale.getDefault() : LocaleUtils.toLocale(localeProperty);
final AvroRecordConverter converter = new AvroRecordConverter(inputSchema, outputSchema, fieldMapping, locale);
final DataFileWriter<Record> writer = new DataFileWriter<>(AvroUtil.newDatumWriter(outputSchema, Record.class));
writer.setCodec(getCodecFactory(context.getProperty(COMPRESSION_TYPE).getValue()));
final DataFileWriter<Record> failureWriter = new DataFileWriter<>(AvroUtil.newDatumWriter(outputSchema, Record.class));
failureWriter.setCodec(getCodecFactory(context.getProperty(COMPRESSION_TYPE).getValue()));
try {
final AtomicLong written = new AtomicLong(0L);
final FailureTracker failures = new FailureTracker();
final List<Record> badRecords = Lists.newLinkedList();
FlowFile incomingAvroCopy = session.clone(incomingAvro);
FlowFile outgoingAvro = session.write(incomingAvro, new StreamCallback() {
@Override
public void process(InputStream in, OutputStream out) throws IOException {
try (DataFileStream<Record> stream = new DataFileStream<Record>(in, new GenericDatumReader<Record>(converter.getInputSchema()))) {
try (DataFileWriter<Record> w = writer.create(outputSchema, out)) {
for (Record record : stream) {
try {
Record converted = converter.convert(record);
w.append(converted);
written.incrementAndGet();
} catch (AvroConversionException e) {
failures.add(e);
getLogger().error("Error converting data: " + e.getMessage());
badRecords.add(record);
}
}
}
}
}
});
FlowFile badOutput = session.write(incomingAvroCopy, new StreamCallback() {
@Override
public void process(InputStream in, OutputStream out) throws IOException {
try (DataFileWriter<Record> w = failureWriter.create(inputSchema, out)) {
for (Record record : badRecords) {
w.append(record);
}
}
}
});
long errors = failures.count();
// update only if file transfer is successful
session.adjustCounter("Converted records", written.get(), false);
// update only if file transfer is successful
session.adjustCounter("Conversion errors", errors, false);
if (written.get() > 0L) {
session.transfer(outgoingAvro, SUCCESS);
} else {
session.remove(outgoingAvro);
if (errors == 0L) {
badOutput = session.putAttribute(badOutput, "errors", "No incoming records");
session.transfer(badOutput, FAILURE);
}
}
if (errors > 0L) {
getLogger().warn("Failed to convert {}/{} records between Avro Schemas", new Object[] { errors, errors + written.get() });
badOutput = session.putAttribute(badOutput, "errors", failures.summary());
session.transfer(badOutput, FAILURE);
} else {
session.remove(badOutput);
}
} catch (ProcessException | DatasetIOException e) {
getLogger().error("Failed reading or writing", e);
session.transfer(incomingAvro, FAILURE);
} catch (DatasetException e) {
getLogger().error("Failed to read FlowFile", e);
session.transfer(incomingAvro, FAILURE);
} finally {
try {
writer.close();
} catch (IOException e) {
getLogger().warn("Unable to close writer ressource", e);
}
try {
failureWriter.close();
} catch (IOException e) {
getLogger().warn("Unable to close writer ressource", e);
}
}
}
use of org.apache.avro.file.DataFileStream in project nifi by apache.
the class StoreInKiteDataset method onTrigger.
@Override
public void onTrigger(ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final View<Record> target = load(context, flowFile);
final Schema schema = target.getDataset().getDescriptor().getSchema();
try {
StopWatch timer = new StopWatch(true);
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(InputStream in) throws IOException {
try (DataFileStream<Record> stream = new DataFileStream<>(in, AvroUtil.newDatumReader(schema, Record.class))) {
IncompatibleSchemaException.check(SchemaValidationUtil.canRead(stream.getSchema(), schema), "Incompatible file schema %s, expected %s", stream.getSchema(), schema);
long written = 0L;
try (DatasetWriter<Record> writer = target.newWriter()) {
for (Record record : stream) {
writer.write(record);
written += 1;
}
} finally {
session.adjustCounter("Stored records", written, true);
}
}
}
});
timer.stop();
session.getProvenanceReporter().send(flowFile, target.getUri().toString(), timer.getDuration(TimeUnit.MILLISECONDS), true);
session.transfer(flowFile, SUCCESS);
} catch (ProcessException | DatasetIOException e) {
getLogger().error("Failed to read FlowFile", e);
session.transfer(flowFile, FAILURE);
} catch (ValidationException e) {
getLogger().error(e.getMessage());
getLogger().debug("Incompatible schema error", e);
session.transfer(flowFile, INCOMPATIBLE);
}
}
Aggregations