use of gov.cms.bfd.pipeline.sharedutils.PipelineJobOutcome in project beneficiary-fhir-data by CMSgov.
the class RdaServerJobIT method testS3.
@Test
public void testS3() throws Exception {
AmazonS3 s3Client = createS3Client(REGION_DEFAULT);
Bucket bucket = null;
try {
bucket = createTestBucket(s3Client);
final String directoryPath = "files-go-here";
final RdaServerJob.Config config = RdaServerJob.Config.builder().serverMode(RdaServerJob.Config.ServerMode.S3).serverName(SERVER_NAME).s3Bucket(bucket.getName()).s3Directory(directoryPath).build();
final String fissObjectKey = config.getS3Sources().createFissObjectKey();
final String mcsObjectKey = config.getS3Sources().createMcsObjectKey();
uploadJsonToBucket(s3Client, bucket.getName(), fissObjectKey, fissClaimsSource);
uploadJsonToBucket(s3Client, bucket.getName(), mcsObjectKey, mcsClaimsSource);
final RdaServerJob job = new RdaServerJob(config);
final ExecutorService exec = Executors.newCachedThreadPool();
final Future<PipelineJobOutcome> outcome = exec.submit(job);
try {
waitForServerToStart(job);
final ManagedChannel fissChannel = InProcessChannelBuilder.forName(SERVER_NAME).build();
final FissClaimStreamCaller fissCaller = new FissClaimStreamCaller();
final var fissStream = fissCaller.callService(fissChannel, CallOptions.DEFAULT, 1098);
assertTrue(fissStream.hasNext());
RdaChange<PreAdjFissClaim> fissChange = fissTransformer.transformClaim(fissStream.next());
assertMatches(fissCaller.callVersionService(fissChannel, CallOptions.DEFAULT), "S3:\\d+:.*");
assertEquals(1098L, fissChange.getSequenceNumber());
assertTrue(fissStream.hasNext());
fissChange = fissTransformer.transformClaim(fissStream.next());
assertEquals(1099L, fissChange.getSequenceNumber());
assertTrue(fissStream.hasNext());
fissChange = fissTransformer.transformClaim(fissStream.next());
assertEquals(1100L, fissChange.getSequenceNumber());
assertFalse(fissStream.hasNext());
final ManagedChannel mcsChannel = InProcessChannelBuilder.forName(SERVER_NAME).build();
final McsClaimStreamCaller mcsCaller = new McsClaimStreamCaller();
final var mcsStream = mcsCaller.callService(mcsChannel, CallOptions.DEFAULT, 1099);
assertTrue(mcsStream.hasNext());
RdaChange<PreAdjMcsClaim> mcsChange = mcsTransformer.transformClaim(mcsStream.next());
assertMatches(mcsCaller.callVersionService(mcsChannel, CallOptions.DEFAULT), "S3:\\d+:.*");
assertEquals(1099L, mcsChange.getSequenceNumber());
assertTrue(mcsStream.hasNext());
mcsChange = mcsTransformer.transformClaim(mcsStream.next());
assertEquals(1100L, mcsChange.getSequenceNumber());
assertFalse(mcsStream.hasNext());
} finally {
exec.shutdownNow();
exec.awaitTermination(10, TimeUnit.SECONDS);
assertEquals(PipelineJobOutcome.WORK_DONE, outcome.get());
}
} finally {
deleteTestBucket(s3Client, bucket);
}
}
use of gov.cms.bfd.pipeline.sharedutils.PipelineJobOutcome in project beneficiary-fhir-data by CMSgov.
the class AbstractRdaLoadJobTest method enforcesOneCallAtATime.
@Test
public void enforcesOneCallAtATime() throws Exception {
// let the source indicate that it did some work to set the first call apart from the second one
doReturn(100).when(source).retrieveAndProcessObjects(anyInt(), same(sink));
// Used to allow the second call to happen after the first call has acquired its semaphore
final CountDownLatch waitForStartup = new CountDownLatch(1);
// Used to allow the first call to wait until the second call has completed before it proceeds.
final CountDownLatch waitForCompletion = new CountDownLatch(1);
// A test job that waits for the second job to complete before doing any work itself.
job = new TestingLoadJob(config, () -> {
// lets the main thread know we've acquired the semaphore
waitForStartup.countDown();
// waits until the second call is done before returning a source
waitForCompletion.await();
return source;
}, () -> sink, appMetrics);
final ExecutorService pool = Executors.newCachedThreadPool();
try {
// this call will grab the semaphore and hold it until we count down the waitForCompletion
Future<PipelineJobOutcome> firstCall = pool.submit(() -> job.call());
// wait for the first call to have grabbed the semaphore before we make the second call
waitForStartup.await();
// this job should exit immediately without doing any work
Future<PipelineJobOutcome> secondCall = pool.submit(() -> job.call());
assertEquals(PipelineJobOutcome.NOTHING_TO_DO, secondCall.get());
// now allow the first call to proceed and it should reflect that it has done some work
waitForCompletion.countDown();
assertEquals(PipelineJobOutcome.WORK_DONE, firstCall.get());
} finally {
pool.shutdown();
pool.awaitTermination(5, TimeUnit.SECONDS);
}
assertMeterReading(1, "calls", job.getMetrics().getCalls());
assertMeterReading(1, "successes", job.getMetrics().getSuccesses());
assertMeterReading(0, "failures", job.getMetrics().getFailures());
assertMeterReading(100, "processed", job.getMetrics().getProcessed());
}
use of gov.cms.bfd.pipeline.sharedutils.PipelineJobOutcome in project beneficiary-fhir-data by CMSgov.
the class CcwRifLoadJob method call.
/**
* @see gov.cms.bfd.pipeline.sharedutils.PipelineJob#call()
*/
@Override
public PipelineJobOutcome call() throws Exception {
LOGGER.debug("Scanning for data sets to process...");
// Update the queue from S3.
dataSetQueue.updatePendingDataSets();
// If no manifest was found, we're done (until next time).
if (dataSetQueue.isEmpty()) {
LOGGER.debug(LOG_MESSAGE_NO_DATA_SETS);
listener.noDataAvailable();
return PipelineJobOutcome.NOTHING_TO_DO;
}
// We've found the oldest manifest.
DataSetManifest manifestToProcess = dataSetQueue.getNextDataSetToProcess().get();
LOGGER.info("Found data set to process: '{}'." + " There were '{}' total pending data sets and '{}' completed ones.", manifestToProcess.toString(), dataSetQueue.getPendingManifestsCount(), dataSetQueue.getCompletedManifestsCount().get());
/*
* We've got a data set to process. However, it might still be uploading
* to S3, so we need to wait for that to complete before we start
* processing it.
*/
boolean alreadyLoggedWaitingEvent = false;
while (!dataSetIsAvailable(manifestToProcess)) {
/*
* We're very patient here, so we keep looping, but it's prudent to
* pause between each iteration. TODO should eventually time out,
* once we know how long transfers might take
*/
try {
if (!alreadyLoggedWaitingEvent) {
LOGGER.info("Data set not ready. Waiting for it to finish uploading...");
alreadyLoggedWaitingEvent = true;
}
Thread.sleep(1000 * 1);
} catch (InterruptedException e) {
/*
* Many Java applications use InterruptedExceptions to signal
* that a thread should stop what it's doing ASAP. This app
* doesn't, so this is unexpected, and accordingly, we don't
* know what to do. Safest bet is to blow up.
*/
throw new RuntimeException(e);
}
}
/*
* Huzzah! We've got a data set to process and we've verified it's all there
* waiting for us in S3. Now convert it into a RifFilesEvent (containing a List
* of asynchronously-downloading S3RifFiles.
*/
LOGGER.info(LOG_MESSAGE_DATA_SET_READY);
List<S3RifFile> rifFiles = manifestToProcess.getEntries().stream().map(manifestEntry -> new S3RifFile(appMetrics, manifestEntry, s3TaskManager.downloadAsync(manifestEntry))).collect(Collectors.toList());
RifFilesEvent rifFilesEvent = new RifFilesEvent(manifestToProcess.getTimestamp(), new ArrayList<>(rifFiles));
/*
* To save time for the next data set, peek ahead at it. If it's available and
* it looks like there's enough disk space, start downloading it early in the
* background.
*/
Optional<DataSetManifest> secondManifestToProcess = dataSetQueue.getSecondDataSetToProcess();
if (secondManifestToProcess.isPresent() && dataSetIsAvailable(secondManifestToProcess.get())) {
Path tmpdir = Paths.get(System.getProperty("java.io.tmpdir"));
long usableFreeTempSpace;
try {
usableFreeTempSpace = Files.getFileStore(tmpdir).getUsableSpace();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
if (usableFreeTempSpace >= (50 * GIGA)) {
secondManifestToProcess.get().getEntries().stream().forEach(manifestEntry -> s3TaskManager.downloadAsync(manifestEntry));
}
}
/*
* Now we hand that off to the DataSetMonitorListener, to do the *real*
* work of actually processing that data set. It's important that we
* block until it's completed, in order to ensure that we don't end up
* processing multiple data sets in parallel (which would lead to data
* consistency problems).
*/
listener.dataAvailable(rifFilesEvent);
LOGGER.info(LOG_MESSAGE_DATA_SET_COMPLETE);
/*
* Now that the data set has been processed, we need to ensure that we
* don't end up processing it again. We ensure this two ways: 1) we keep
* a list of the data sets most recently processed, and 2) we rename the
* S3 objects that comprise that data set. (#1 is required as S3
* deletes/moves are only *eventually* consistent, so #2 may not take
* effect right away.)
*/
rifFiles.stream().forEach(f -> f.cleanupTempFile());
dataSetQueue.markProcessed(manifestToProcess);
s3TaskManager.submit(new DataSetMoveTask(s3TaskManager, options, manifestToProcess));
return PipelineJobOutcome.WORK_DONE;
}
use of gov.cms.bfd.pipeline.sharedutils.PipelineJobOutcome in project beneficiary-fhir-data by CMSgov.
the class AbstractRdaLoadJobTest method nothingToDo.
@Test
public void nothingToDo() throws Exception {
doReturn(source).when(sourceFactory).call();
doReturn(sink).when(sinkFactory).call();
doReturn(0).when(source).retrieveAndProcessObjects(anyInt(), same(sink));
try {
PipelineJobOutcome outcome = job.call();
assertEquals(PipelineJobOutcome.NOTHING_TO_DO, outcome);
} catch (Exception ex) {
fail("job should NOT have thrown exception");
}
verify(source).close();
verify(sink).close();
assertMeterReading(1, "calls", job.getMetrics().getCalls());
assertMeterReading(1, "successes", job.getMetrics().getSuccesses());
assertMeterReading(0, "failures", job.getMetrics().getFailures());
assertMeterReading(0, "processed", job.getMetrics().getProcessed());
}
use of gov.cms.bfd.pipeline.sharedutils.PipelineJobOutcome in project beneficiary-fhir-data by CMSgov.
the class AbstractRdaLoadJobTest method workDone.
@Test
public void workDone() throws Exception {
doReturn(source).when(sourceFactory).call();
doReturn(sink).when(sinkFactory).call();
doReturn(25_000).when(source).retrieveAndProcessObjects(anyInt(), same(sink));
try {
PipelineJobOutcome outcome = job.call();
assertEquals(PipelineJobOutcome.WORK_DONE, outcome);
} catch (Exception ex) {
fail("job should NOT have thrown exception");
}
verify(source).close();
verify(sink).close();
assertMeterReading(1, "calls", job.getMetrics().getCalls());
assertMeterReading(1, "successes", job.getMetrics().getSuccesses());
assertMeterReading(0, "failures", job.getMetrics().getFailures());
assertMeterReading(25_000, "processed", job.getMetrics().getProcessed());
}
Aggregations