use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.
the class IngestCleanupStep method doStep.
@Override
public StepResult doStep(FlightContext context) {
// We do not want to fail the insert because we fail to cleanup the staging table.
// We log the failure and move on.
String stagingTableName = "<unknown>";
try {
Dataset dataset = IngestUtils.getDataset(context, datasetService);
stagingTableName = IngestUtils.getStagingTableName(context);
bigQueryPdao.deleteDatasetTable(dataset, stagingTableName);
} catch (Exception ex) {
logger.error("Failure deleting ingest staging table: " + stagingTableName, ex);
}
return StepResult.getStepResultSuccess();
}
use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.
the class IngestInsertIntoDatasetTableStep method doStep.
@Override
public StepResult doStep(FlightContext context) throws InterruptedException {
Dataset dataset = IngestUtils.getDataset(context, datasetService);
DatasetTable targetTable = IngestUtils.getDatasetTable(context, dataset);
String stagingTableName = IngestUtils.getStagingTableName(context);
IngestRequestModel ingestRequest = IngestUtils.getIngestRequestModel(context);
PdaoLoadStatistics loadStatistics = IngestUtils.getIngestStatistics(context);
IngestResponseModel ingestResponse = new IngestResponseModel().dataset(dataset.getName()).datasetId(dataset.getId().toString()).table(ingestRequest.getTable()).path(ingestRequest.getPath()).loadTag(ingestRequest.getLoadTag()).badRowCount(loadStatistics.getBadRecords()).rowCount(loadStatistics.getRowCount());
context.getWorkingMap().put(JobMapKeys.RESPONSE.getKeyName(), ingestResponse);
bigQueryPdao.insertIntoDatasetTable(dataset, targetTable, stagingTableName);
return StepResult.getStepResultSuccess();
}
use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.
the class IngestLoadTableStep method undoStep.
@Override
public StepResult undoStep(FlightContext context) throws InterruptedException {
Dataset dataset = IngestUtils.getDataset(context, datasetService);
String stagingTableName = IngestUtils.getStagingTableName(context);
bigQueryPdao.deleteDatasetTable(dataset, stagingTableName);
return StepResult.getStepResultSuccess();
}
use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.
the class IngestLoadTableStep method doStep.
@Override
public StepResult doStep(FlightContext context) throws InterruptedException {
Dataset dataset = IngestUtils.getDataset(context, datasetService);
DatasetTable targetTable = IngestUtils.getDatasetTable(context, dataset);
String stagingTableName = IngestUtils.getStagingTableName(context);
IngestRequestModel ingestRequest = IngestUtils.getIngestRequestModel(context);
PdaoLoadStatistics ingestStatistics = bigQueryPdao.loadToStagingTable(dataset, targetTable, stagingTableName, ingestRequest);
// Save away the stats in the working map. We will use some of them later
// when we make the annotations. Others are returned on the ingest response.
IngestUtils.putIngestStatistics(context, ingestStatistics);
return StepResult.getStepResultSuccess();
}
use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.
the class IngestCopyLoadHistoryToBQStep method doStep.
@Override
public StepResult doStep(FlightContext context) {
FlightMap workingMap = context.getWorkingMap();
String loadIdString = workingMap.get(LoadMapKeys.LOAD_ID, String.class);
UUID loadId = UUID.fromString(loadIdString);
UUID datasetId = UUID.fromString(datasetIdString);
Dataset dataset = datasetService.retrieve(datasetId);
int chunkNum = 0;
List<BulkLoadHistoryModel> loadHistoryArray = null;
String flightId = context.getFlightId();
String tableName_FlightId = flightId.replaceAll("[^a-zA-Z0-9]", "_");
try {
Instant loadTime = context.getStairway().getFlightState(flightId).getSubmitted();
bigQueryPdao.createStagingLoadHistoryTable(dataset, tableName_FlightId);
TimeUnit.SECONDS.sleep(waitSeconds);
while (loadHistoryArray == null || loadHistoryArray.size() == fileChunkSize) {
loadHistoryArray = loadService.makeLoadHistoryArray(loadId, fileChunkSize, (chunkNum * fileChunkSize));
chunkNum++;
// send list plus load_tag, load_time to BQ to be put in a staging table
if (!loadHistoryArray.isEmpty()) {
bigQueryPdao.loadHistoryToStagingTable(dataset, tableName_FlightId, loadTag, loadTime, loadHistoryArray);
// Sleep to avoid BQ rate limit error
// From quick survey of logs, longest time to complete load query: 3 seconds
TimeUnit.SECONDS.sleep(waitSeconds);
}
}
// copy from staging to actual BQ table
bigQueryPdao.mergeStagingLoadHistoryTable(dataset, tableName_FlightId);
bigQueryPdao.deleteStagingLoadHistoryTable(dataset, tableName_FlightId);
} catch (Exception ex) {
logger.error("Failed during copy of load history to BQ for flight: " + flightId, ex);
}
return StepResult.getStepResultSuccess();
}
Aggregations