Search in sources :

Example 11 with Dataset

use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.

the class IngestCleanupStep method doStep.

@Override
public StepResult doStep(FlightContext context) {
    // We do not want to fail the insert because we fail to cleanup the staging table.
    // We log the failure and move on.
    String stagingTableName = "<unknown>";
    try {
        Dataset dataset = IngestUtils.getDataset(context, datasetService);
        stagingTableName = IngestUtils.getStagingTableName(context);
        bigQueryPdao.deleteDatasetTable(dataset, stagingTableName);
    } catch (Exception ex) {
        logger.error("Failure deleting ingest staging table: " + stagingTableName, ex);
    }
    return StepResult.getStepResultSuccess();
}
Also used : Dataset(bio.terra.service.dataset.Dataset)

Example 12 with Dataset

use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.

the class IngestInsertIntoDatasetTableStep method doStep.

@Override
public StepResult doStep(FlightContext context) throws InterruptedException {
    Dataset dataset = IngestUtils.getDataset(context, datasetService);
    DatasetTable targetTable = IngestUtils.getDatasetTable(context, dataset);
    String stagingTableName = IngestUtils.getStagingTableName(context);
    IngestRequestModel ingestRequest = IngestUtils.getIngestRequestModel(context);
    PdaoLoadStatistics loadStatistics = IngestUtils.getIngestStatistics(context);
    IngestResponseModel ingestResponse = new IngestResponseModel().dataset(dataset.getName()).datasetId(dataset.getId().toString()).table(ingestRequest.getTable()).path(ingestRequest.getPath()).loadTag(ingestRequest.getLoadTag()).badRowCount(loadStatistics.getBadRecords()).rowCount(loadStatistics.getRowCount());
    context.getWorkingMap().put(JobMapKeys.RESPONSE.getKeyName(), ingestResponse);
    bigQueryPdao.insertIntoDatasetTable(dataset, targetTable, stagingTableName);
    return StepResult.getStepResultSuccess();
}
Also used : Dataset(bio.terra.service.dataset.Dataset) IngestRequestModel(bio.terra.model.IngestRequestModel) DatasetTable(bio.terra.service.dataset.DatasetTable) PdaoLoadStatistics(bio.terra.common.PdaoLoadStatistics) IngestResponseModel(bio.terra.model.IngestResponseModel)

Example 13 with Dataset

use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.

the class IngestLoadTableStep method undoStep.

@Override
public StepResult undoStep(FlightContext context) throws InterruptedException {
    Dataset dataset = IngestUtils.getDataset(context, datasetService);
    String stagingTableName = IngestUtils.getStagingTableName(context);
    bigQueryPdao.deleteDatasetTable(dataset, stagingTableName);
    return StepResult.getStepResultSuccess();
}
Also used : Dataset(bio.terra.service.dataset.Dataset)

Example 14 with Dataset

use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.

the class IngestLoadTableStep method doStep.

@Override
public StepResult doStep(FlightContext context) throws InterruptedException {
    Dataset dataset = IngestUtils.getDataset(context, datasetService);
    DatasetTable targetTable = IngestUtils.getDatasetTable(context, dataset);
    String stagingTableName = IngestUtils.getStagingTableName(context);
    IngestRequestModel ingestRequest = IngestUtils.getIngestRequestModel(context);
    PdaoLoadStatistics ingestStatistics = bigQueryPdao.loadToStagingTable(dataset, targetTable, stagingTableName, ingestRequest);
    // Save away the stats in the working map. We will use some of them later
    // when we make the annotations. Others are returned on the ingest response.
    IngestUtils.putIngestStatistics(context, ingestStatistics);
    return StepResult.getStepResultSuccess();
}
Also used : Dataset(bio.terra.service.dataset.Dataset) IngestRequestModel(bio.terra.model.IngestRequestModel) DatasetTable(bio.terra.service.dataset.DatasetTable) PdaoLoadStatistics(bio.terra.common.PdaoLoadStatistics)

Example 15 with Dataset

use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.

the class IngestCopyLoadHistoryToBQStep method doStep.

@Override
public StepResult doStep(FlightContext context) {
    FlightMap workingMap = context.getWorkingMap();
    String loadIdString = workingMap.get(LoadMapKeys.LOAD_ID, String.class);
    UUID loadId = UUID.fromString(loadIdString);
    UUID datasetId = UUID.fromString(datasetIdString);
    Dataset dataset = datasetService.retrieve(datasetId);
    int chunkNum = 0;
    List<BulkLoadHistoryModel> loadHistoryArray = null;
    String flightId = context.getFlightId();
    String tableName_FlightId = flightId.replaceAll("[^a-zA-Z0-9]", "_");
    try {
        Instant loadTime = context.getStairway().getFlightState(flightId).getSubmitted();
        bigQueryPdao.createStagingLoadHistoryTable(dataset, tableName_FlightId);
        TimeUnit.SECONDS.sleep(waitSeconds);
        while (loadHistoryArray == null || loadHistoryArray.size() == fileChunkSize) {
            loadHistoryArray = loadService.makeLoadHistoryArray(loadId, fileChunkSize, (chunkNum * fileChunkSize));
            chunkNum++;
            // send list plus load_tag, load_time to BQ to be put in a staging table
            if (!loadHistoryArray.isEmpty()) {
                bigQueryPdao.loadHistoryToStagingTable(dataset, tableName_FlightId, loadTag, loadTime, loadHistoryArray);
                // Sleep to avoid BQ rate limit error
                // From quick survey of logs, longest time to complete load query: 3 seconds
                TimeUnit.SECONDS.sleep(waitSeconds);
            }
        }
        // copy from staging to actual BQ table
        bigQueryPdao.mergeStagingLoadHistoryTable(dataset, tableName_FlightId);
        bigQueryPdao.deleteStagingLoadHistoryTable(dataset, tableName_FlightId);
    } catch (Exception ex) {
        logger.error("Failed during copy of load history to BQ for flight: " + flightId, ex);
    }
    return StepResult.getStepResultSuccess();
}
Also used : Dataset(bio.terra.service.dataset.Dataset) Instant(java.time.Instant) BulkLoadHistoryModel(bio.terra.model.BulkLoadHistoryModel) UUID(java.util.UUID)

Aggregations

Dataset (bio.terra.service.dataset.Dataset)49 AssetSpecification (bio.terra.service.dataset.AssetSpecification)9 Snapshot (bio.terra.service.snapshot.Snapshot)9 FlightMap (bio.terra.stairway.FlightMap)9 UUID (java.util.UUID)9 DatasetSummaryModel (bio.terra.model.DatasetSummaryModel)8 DatasetTable (bio.terra.service.dataset.DatasetTable)8 Test (org.junit.Test)8 SpringBootTest (org.springframework.boot.test.context.SpringBootTest)8 IngestRequestModel (bio.terra.model.IngestRequestModel)7 SnapshotSource (bio.terra.service.snapshot.SnapshotSource)7 StepResult (bio.terra.stairway.StepResult)5 ArrayList (java.util.ArrayList)5 Column (bio.terra.common.Column)4 Table (bio.terra.common.Table)4 DataDeletionTableModel (bio.terra.model.DataDeletionTableModel)4 SnapshotRequestContentsModel (bio.terra.model.SnapshotRequestContentsModel)4 ValidationException (bio.terra.app.controller.exception.ValidationException)3 PdaoLoadStatistics (bio.terra.common.PdaoLoadStatistics)3 PdaoException (bio.terra.common.exception.PdaoException)3