use of org.apache.nifi.processor.io.StreamCallback in project nifi by apache.
the class SequenceFileWriterImpl method writeSequenceFile.
@Override
public FlowFile writeSequenceFile(final FlowFile flowFile, final ProcessSession session, final Configuration configuration, final CompressionType compressionType, final CompressionCodec compressionCodec) {
if (flowFile.getSize() > Integer.MAX_VALUE) {
throw new IllegalArgumentException("Cannot write " + flowFile + "to Sequence File because its size is greater than the largest possible Integer");
}
final String sequenceFilename = flowFile.getAttribute(CoreAttributes.FILENAME.key()) + ".sf";
// Analytics running on HDFS want data that is written with a BytesWritable. However, creating a
// BytesWritable requires that we buffer the entire file into memory in a byte array.
// We can create an FSFilterableOutputStream to wrap the FSDataOutputStream and use that to replace
// the InputStreamWritable class name with the BytesWritable class name when we write the header.
// This allows the Sequence File to say that the Values are of type BytesWritable (so they can be
// read via the BytesWritable class) while allowing us to stream the data rather than buffering
// entire files in memory.
final byte[] toReplace, replaceWith;
try {
toReplace = InputStreamWritable.class.getCanonicalName().getBytes("UTF-8");
replaceWith = BytesWritable.class.getCanonicalName().getBytes("UTF-8");
} catch (final UnsupportedEncodingException e) {
// This won't happen.
throw new RuntimeException("UTF-8 is not a supported Character Format");
}
final StopWatch watch = new StopWatch(true);
FlowFile sfFlowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(InputStream in, OutputStream out) throws IOException {
// Use a FilterableOutputStream to change 'InputStreamWritable' to 'BytesWritable' - see comment
// above for an explanation of why we want to do this.
final ByteFilteringOutputStream bwos = new ByteFilteringOutputStream(out);
// TODO: Adding this filter could be dangerous... A Sequence File's header contains 3 bytes: "SEQ",
// followed by 1 byte that is the Sequence File version, followed by 2 "entries." These "entries"
// contain the size of the Key/Value type and the Key/Value type. So, we will be writing the
// value type as InputStreamWritable -- which we need to change to BytesWritable. This means that
// we must also change the "size" that is written, but replacing this single byte could be
// dangerous. However, we know exactly what will be written to the header, and we limit this at one
// replacement, so we should be just fine.
bwos.addFilter(toReplace, replaceWith, 1);
bwos.addFilter((byte) InputStreamWritable.class.getCanonicalName().length(), (byte) BytesWritable.class.getCanonicalName().length(), 1);
try (final FSDataOutputStream fsDataOutputStream = new FSDataOutputStream(bwos, new Statistics(""));
final SequenceFile.Writer writer = SequenceFile.createWriter(configuration, SequenceFile.Writer.stream(fsDataOutputStream), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(InputStreamWritable.class), SequenceFile.Writer.compression(compressionType, compressionCodec))) {
processInputStream(in, flowFile, writer);
} finally {
watch.stop();
}
}
});
logger.debug("Wrote Sequence File {} ({}).", new Object[] { sequenceFilename, watch.calculateDataRate(flowFile.getSize()) });
return sfFlowFile;
}
use of org.apache.nifi.processor.io.StreamCallback in project nifi by apache.
the class TestStandardProcessSession method testStreamAfterSessionClosesStream.
@Test
public void testStreamAfterSessionClosesStream() throws IOException {
final ContentClaim claim = contentRepo.create(false);
final FlowFileRecord flowFileRecord = new StandardFlowFileRecord.Builder().contentClaim(claim).addAttribute("uuid", "12345678-1234-1234-1234-123456789012").entryDate(System.currentTimeMillis()).build();
flowFileQueue.put(flowFileRecord);
FlowFile flowFile = session.get();
assertNotNull(flowFile);
final AtomicReference<InputStream> inputStreamHolder = new AtomicReference<>(null);
final AtomicReference<OutputStream> outputStreamHolder = new AtomicReference<>(null);
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(final InputStream input, final OutputStream output) throws IOException {
inputStreamHolder.set(input);
outputStreamHolder.set(output);
}
});
assertDisabled(inputStreamHolder.get());
assertDisabled(outputStreamHolder.get());
}
use of org.apache.nifi.processor.io.StreamCallback in project nifi by apache.
the class TestStandardProcessSession method testProcessExceptionThrownIfCallbackThrowsInStreamCallback.
@Test
public void testProcessExceptionThrownIfCallbackThrowsInStreamCallback() {
final FlowFile ff1 = session.create();
final RuntimeException runtime = new RuntimeException();
try {
session.write(ff1, new StreamCallback() {
@Override
public void process(final InputStream in, final OutputStream out) throws IOException {
throw runtime;
}
});
Assert.fail("Should have thrown RuntimeException");
} catch (final RuntimeException re) {
assertTrue(runtime == re);
}
final IOException ioe = new IOException();
try {
session.write(ff1, new StreamCallback() {
@Override
public void process(final InputStream in, OutputStream out) throws IOException {
throw ioe;
}
});
Assert.fail("Should have thrown ProcessException");
} catch (final ProcessException pe) {
assertTrue(ioe == pe.getCause());
}
final ProcessException pe = new ProcessException();
try {
session.write(ff1, new StreamCallback() {
@Override
public void process(final InputStream in, OutputStream out) throws IOException {
throw pe;
}
});
Assert.fail("Should have thrown ProcessException");
} catch (final ProcessException pe2) {
assertTrue(pe == pe2);
}
}
use of org.apache.nifi.processor.io.StreamCallback in project nifi by apache.
the class TestStandardProcessSession method testMissingFlowFileExceptionThrownWhenUnableToReadDataStreamCallback.
@Test
public void testMissingFlowFileExceptionThrownWhenUnableToReadDataStreamCallback() {
final FlowFileRecord flowFileRecord = new StandardFlowFileRecord.Builder().addAttribute("uuid", "12345678-1234-1234-1234-123456789012").entryDate(System.currentTimeMillis()).contentClaim(new StandardContentClaim(resourceClaimManager.newResourceClaim("x", "x", "0", true, false), 0L)).size(1L).build();
flowFileQueue.put(flowFileRecord);
// attempt to read the data.
try {
final FlowFile ff1 = session.get();
session.write(ff1, new StreamCallback() {
@Override
public void process(InputStream in, OutputStream out) throws IOException {
}
});
Assert.fail("Expected MissingFlowFileException");
} catch (final MissingFlowFileException mffe) {
}
}
use of org.apache.nifi.processor.io.StreamCallback in project nifi by apache.
the class TestStandardProcessSession method testContentNotFoundExceptionThrownWhenUnableToReadDataStreamCallbackOffsetTooLarge.
@Test
public void testContentNotFoundExceptionThrownWhenUnableToReadDataStreamCallbackOffsetTooLarge() {
final FlowFileRecord flowFileRecord = new StandardFlowFileRecord.Builder().addAttribute("uuid", "12345678-1234-1234-1234-123456789012").entryDate(System.currentTimeMillis()).contentClaim(new StandardContentClaim(resourceClaimManager.newResourceClaim("x", "x", "0", true, false), 0L)).build();
flowFileQueue.put(flowFileRecord);
FlowFile ff1 = session.get();
ff1 = session.write(ff1, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
}
});
session.transfer(ff1);
session.commit();
final FlowFileRecord flowFileRecord2 = new StandardFlowFileRecord.Builder().addAttribute("uuid", "12345678-1234-1234-1234-123456789012").entryDate(System.currentTimeMillis()).contentClaim(new StandardContentClaim(resourceClaimManager.newResourceClaim("x", "x", "0", true, false), 0L)).contentClaimOffset(1000L).size(1000L).build();
flowFileQueue.put(flowFileRecord2);
// attempt to read the data.
try {
session.get();
final FlowFile ff2 = session.get();
session.write(ff2, new StreamCallback() {
@Override
public void process(InputStream in, OutputStream out) throws IOException {
}
});
Assert.fail("Expected ContentNotFoundException");
} catch (final MissingFlowFileException mffe) {
}
}
Aggregations