use of org.apache.nifi.stream.io.NullOutputStream in project nifi by apache.
the class TestSchemaSwapSerializerDeserializer method testWritePerformance.
@Test
@Ignore("For manual testing, in order to ensure that changes do not negatively impact performance")
public void testWritePerformance() throws IOException, InterruptedException {
final ResourceClaimManager resourceClaimManager = new StandardResourceClaimManager();
final List<FlowFileRecord> toSwap = new ArrayList<>(10000);
final Map<String, String> attrs = new HashMap<>();
for (int i = 0; i < 10000; i++) {
attrs.put("i", String.valueOf(i));
final FlowFileRecord ff = new MockFlowFile(attrs, i, resourceClaimManager);
toSwap.add(ff);
}
final FlowFileQueue flowFileQueue = Mockito.mock(FlowFileQueue.class);
Mockito.when(flowFileQueue.getIdentifier()).thenReturn("87bb99fe-412c-49f6-a441-d1b0af4e20b4");
final String swapLocation = "target/testRoundTrip.swap";
final int iterations = 1000;
final long start = System.nanoTime();
final SwapSerializer serializer = new SchemaSwapSerializer();
for (int i = 0; i < iterations; i++) {
try (final OutputStream out = new NullOutputStream()) {
serializer.serializeFlowFiles(toSwap, flowFileQueue, swapLocation, out);
}
}
final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
System.out.println("Wrote " + iterations + " Swap Files in " + millis + " millis");
}
use of org.apache.nifi.stream.io.NullOutputStream in project nifi by apache.
the class TestSchemaRecordReaderWriter method testWritePerformance.
@Test
@Ignore("For local testing only")
public void testWritePerformance() throws IOException {
// This is a simple micro-benchmarking test so that we can determine how fast the serialization/deserialization is before
// making significant changes. This allows us to ensure that changes that we make do not have significant adverse effects
// on performance of the repository.
final ProvenanceEventRecord event = createEvent();
final TocWriter tocWriter = new NopTocWriter();
final int numEvents = 10_000_000;
final long startNanos = System.nanoTime();
try (final OutputStream nullOut = new NullOutputStream();
final RecordWriter writer = new ByteArraySchemaRecordWriter(nullOut, "out", idGenerator, tocWriter, false, 0)) {
writer.writeHeader(0L);
for (int i = 0; i < numEvents; i++) {
writer.writeRecord(event);
}
}
final long nanos = System.nanoTime() - startNanos;
final long millis = TimeUnit.NANOSECONDS.toMillis(nanos);
System.out.println("Took " + millis + " millis to write " + numEvents + " events");
}
use of org.apache.nifi.stream.io.NullOutputStream in project nifi by apache.
the class TailFile method recoverRolledFiles.
/**
* Finds any files that have rolled over and have not yet been ingested by
* this Processor. Each of these files that is found will be ingested as its
* own FlowFile. If a file is found that has been partially ingested, the
* rest of the file will be ingested as a single FlowFile but the data that
* already has been ingested will not be ingested again.
*
* @param context the ProcessContext to use in order to obtain Processor
* configuration.
* @param session the ProcessSession to use in order to interact with
* FlowFile creation and content.
* @param expectedChecksum the checksum value that is expected for the
* oldest file from offset 0 through <position>.
* @param timestamp the latest Last Modfiied Timestamp that has been
* consumed. Any data that was written before this data will not be
* ingested.
* @param position the byte offset in the file being tailed, where tailing
* last left off.
*
* @return <code>true</code> if the file being tailed has rolled over, false
* otherwise
*/
private boolean recoverRolledFiles(final ProcessContext context, final ProcessSession session, final String tailFile, final List<File> rolledOffFiles, final Long expectedChecksum, final long timestamp, final long position) {
try {
getLogger().debug("Recovering Rolled Off Files; total number of files rolled off = {}", new Object[] { rolledOffFiles.size() });
TailFileObject tfo = states.get(tailFile);
// For first file that we find, it may or may not be the file that we were last reading from.
// As a result, we have to read up to the position we stored, while calculating the checksum. If the checksums match,
// then we know we've already processed this file. If the checksums do not match, then we have not
// processed this file and we need to seek back to position 0 and ingest the entire file.
// For all other files that have been rolled over, we need to just ingest the entire file.
boolean rolloverOccurred = !rolledOffFiles.isEmpty();
if (rolloverOccurred && expectedChecksum != null && rolledOffFiles.get(0).length() >= position) {
final File firstFile = rolledOffFiles.get(0);
final long startNanos = System.nanoTime();
if (position > 0) {
try (final InputStream fis = new FileInputStream(firstFile);
final CheckedInputStream in = new CheckedInputStream(fis, new CRC32())) {
StreamUtils.copy(in, new NullOutputStream(), position);
final long checksumResult = in.getChecksum().getValue();
if (checksumResult == expectedChecksum) {
getLogger().debug("Checksum for {} matched expected checksum. Will skip first {} bytes", new Object[] { firstFile, position });
// This is the same file that we were reading when we shutdown. Start reading from this point on.
rolledOffFiles.remove(0);
FlowFile flowFile = session.create();
flowFile = session.importFrom(in, flowFile);
if (flowFile.getSize() == 0L) {
session.remove(flowFile);
// use a timestamp of lastModified() + 1 so that we do not ingest this file again.
cleanup();
tfo.setState(new TailFileState(tailFile, null, null, 0L, firstFile.lastModified() + 1L, firstFile.length(), null, tfo.getState().getBuffer()));
} else {
final Map<String, String> attributes = new HashMap<>(3);
attributes.put(CoreAttributes.FILENAME.key(), firstFile.getName());
attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
attributes.put("tailfile.original.path", tailFile);
flowFile = session.putAllAttributes(flowFile, attributes);
session.getProvenanceReporter().receive(flowFile, firstFile.toURI().toString(), "FlowFile contains bytes 0 through " + position + " of source file", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos));
session.transfer(flowFile, REL_SUCCESS);
getLogger().debug("Created {} from rolled over file {} and routed to success", new Object[] { flowFile, firstFile });
// use a timestamp of lastModified() + 1 so that we do not ingest this file again.
cleanup();
tfo.setState(new TailFileState(tailFile, null, null, 0L, firstFile.lastModified() + 1L, firstFile.length(), null, tfo.getState().getBuffer()));
// must ensure that we do session.commit() before persisting state in order to avoid data loss.
session.commit();
persistState(tfo, context);
}
} else {
getLogger().debug("Checksum for {} did not match expected checksum. Checksum for file was {} but expected {}. Will consume entire file", new Object[] { firstFile, checksumResult, expectedChecksum });
}
}
}
}
// above block of code.
for (final File file : rolledOffFiles) {
tfo.setState(consumeFileFully(file, context, session, tfo));
}
return rolloverOccurred;
} catch (final IOException e) {
getLogger().error("Failed to recover files that have rolled over due to {}", new Object[] { e });
return false;
}
}
use of org.apache.nifi.stream.io.NullOutputStream in project nifi by apache.
the class TestHandleHttpRequest method testFailToRegister.
@Test(timeout = 10000)
public void testFailToRegister() throws InitializationException, MalformedURLException, IOException, InterruptedException {
final TestRunner runner = TestRunners.newTestRunner(HandleHttpRequest.class);
runner.setProperty(HandleHttpRequest.PORT, "0");
final MockHttpContextMap contextMap = new MockHttpContextMap();
runner.addControllerService("http-context-map", contextMap);
runner.enableControllerService(contextMap);
runner.setProperty(HandleHttpRequest.HTTP_CONTEXT_MAP, "http-context-map");
contextMap.setRegisterSuccessfully(false);
// trigger processor to stop but not shutdown.
runner.run(1, false);
try {
final int[] responseCode = new int[1];
responseCode[0] = 0;
final Thread httpThread = new Thread(new Runnable() {
@Override
public void run() {
HttpURLConnection connection = null;
try {
final int port = ((HandleHttpRequest) runner.getProcessor()).getPort();
connection = (HttpURLConnection) new URL("http://localhost:" + port + "/my/path?query=true&value1=value1&value2=&value3&value4=apple=orange").openConnection();
connection.setDoOutput(false);
connection.setRequestMethod("GET");
connection.setRequestProperty("header1", "value1");
connection.setRequestProperty("header2", "");
connection.setRequestProperty("header3", "apple=orange");
connection.setConnectTimeout(3000);
connection.setReadTimeout(3000);
StreamUtils.copy(connection.getInputStream(), new NullOutputStream());
} catch (final Throwable t) {
t.printStackTrace();
if (connection != null) {
try {
responseCode[0] = connection.getResponseCode();
} catch (IOException e) {
responseCode[0] = -1;
}
} else {
responseCode[0] = -2;
}
}
}
});
httpThread.start();
while (responseCode[0] == 0) {
// process the request.
runner.run(1, false, false);
}
runner.assertTransferCount(HandleHttpRequest.REL_SUCCESS, 0);
assertEquals(503, responseCode[0]);
} finally {
// shut down the server
runner.run(1, true);
}
}
use of org.apache.nifi.stream.io.NullOutputStream in project nifi by apache.
the class ClientResponseUtils method drainClientResponse.
public static void drainClientResponse(final Response response) {
if (response != null) {
BufferedInputStream bis = null;
try {
bis = new BufferedInputStream(response.readEntity(InputStream.class));
IOUtils.copy(bis, new NullOutputStream());
} catch (final IOException ioe) {
logger.info("Failed clearing out non-client response buffer due to: " + ioe, ioe);
} finally {
IOUtils.closeQuietly(bis);
}
}
}
Aggregations