use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.
the class IngestSetupStep method doStep.
@Override
public StepResult doStep(FlightContext context) throws InterruptedException {
if (configService.testInsertFault(ConfigEnum.TABLE_INGEST_LOCK_CONFLICT_STOP_FAULT)) {
logger.info("TABLE_INGEST_LOCK_CONFLICT_STOP_FAULT");
while (!configService.testInsertFault(ConfigEnum.TABLE_INGEST_LOCK_CONFLICT_CONTINUE_FAULT)) {
logger.info("Sleeping for CONTINUE FAULT");
TimeUnit.SECONDS.sleep(5);
}
logger.info("TABLE_INGEST_LOCK_CONFLICT_CONTINUE_FAULT");
}
IngestRequestModel ingestRequestModel = IngestUtils.getIngestRequestModel(context);
// We don't actually care about the output here since BQ takes the raw "gs://" string as input.
// As long as parsing succeeds, we're good to move forward.
IngestUtils.parseBlobUri(ingestRequestModel.getPath());
Dataset dataset = IngestUtils.getDataset(context, datasetService);
IngestUtils.putDatasetName(context, dataset.getName());
DatasetTable targetTable = IngestUtils.getDatasetTable(context, dataset);
String sgName = DatasetUtils.generateAuxTableName(targetTable, "st");
IngestUtils.putStagingTableName(context, sgName);
return StepResult.getStepResultSuccess();
}
use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.
the class IngestValidateRefsStep method doStep.
@Override
public StepResult doStep(FlightContext context) throws InterruptedException {
Dataset dataset = IngestUtils.getDataset(context, datasetService);
Table table = IngestUtils.getDatasetTable(context, dataset);
String stagingTableName = IngestUtils.getStagingTableName(context);
// For each fileref column, scan the staging table and build an array of file ids
// Then probe the file system to validate that the file exists and is part
// of this dataset. We check all ids and return one complete error.
List<String> invalidRefIds = new ArrayList<>();
for (Column column : table.getColumns()) {
if (StringUtils.equalsIgnoreCase(column.getType(), "FILEREF")) {
List<String> refIdArray = bigQueryPdao.getRefIds(dataset, stagingTableName, column);
List<String> badRefIds = fileDao.validateRefIds(dataset, refIdArray);
if (badRefIds != null) {
invalidRefIds.addAll(badRefIds);
}
}
}
int invalidIdCount = invalidRefIds.size();
if (invalidIdCount != 0) {
// Made a string buffer to appease findbugs; it saw + in the loop and said "bad!"
StringBuffer errorMessage = new StringBuffer("Invalid file ids found during ingest (");
List<String> errorDetails = new ArrayList<>();
int count = 0;
for (String badId : invalidRefIds) {
errorDetails.add(badId);
count++;
if (count > MAX_ERROR_REF_IDS) {
errorMessage.append(MAX_ERROR_REF_IDS + "out of ");
break;
}
}
errorMessage.append(invalidIdCount + " returned in details)");
throw new InvalidFileRefException(errorMessage.toString(), errorDetails);
}
return StepResult.getStepResultSuccess();
}
use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.
the class CreateDatasetAssetStep method doStep.
@Override
public StepResult doStep(FlightContext context) {
// TODO: Asset columns and tables need to match things in the dataset schema
Dataset dataset = getDataset(context);
FlightMap map = context.getWorkingMap();
// get the dataset assets that already exist --asset name needs to be unique
AssetSpecification newAssetSpecification = getNewAssetSpec(context, dataset);
// add a fault that forces an exception to make sure the undo works
try {
configService.fault(ConfigEnum.CREATE_ASSET_FAULT, () -> {
throw new RuntimeException("fault insertion");
});
} catch (Exception e) {
throw new RuntimeException(e);
}
try {
assetDao.create(newAssetSpecification, dataset.getId());
} catch (InvalidAssetException e) {
FlightUtils.setErrorResponse(context, e.getMessage(), HttpStatus.BAD_REQUEST);
map.put(DatasetWorkingMapKeys.ASSET_NAME_COLLISION, true);
return new StepResult(StepStatus.STEP_RESULT_FAILURE_FATAL, e);
}
map.put(JobMapKeys.STATUS_CODE.getKeyName(), HttpStatus.CREATED);
return StepResult.getStepResultSuccess();
}
use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.
the class CreateDatasetAuthzBqJobUserStep method doStep.
@Override
public StepResult doStep(FlightContext context) throws InterruptedException {
FlightMap workingMap = context.getWorkingMap();
UUID datasetId = workingMap.get(DatasetWorkingMapKeys.DATASET_ID, UUID.class);
Map<IamRole, String> policies = workingMap.get(DatasetWorkingMapKeys.POLICY_EMAILS, Map.class);
Dataset dataset = datasetService.retrieve(datasetId);
DatasetModel datasetModel = datasetService.retrieveModel(dataset);
// The underlying service provides retries so we do not need to retry this operation
resourceService.grantPoliciesBqJobUser(datasetModel.getDataProject(), policies.values());
return StepResult.getStepResultSuccess();
}
use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.
the class CreateDatasetMetadataStep method doStep.
@Override
public StepResult doStep(FlightContext context) {
try {
Dataset newDataset = DatasetUtils.convertRequestWithGeneratedNames(datasetRequest);
UUID datasetId = datasetDao.createAndLock(newDataset, context.getFlightId());
FlightMap workingMap = context.getWorkingMap();
workingMap.put(DatasetWorkingMapKeys.DATASET_ID, datasetId);
DatasetSummaryModel datasetSummary = DatasetJsonConversion.datasetSummaryModelFromDatasetSummary(newDataset.getDatasetSummary());
workingMap.put(JobMapKeys.RESPONSE.getKeyName(), datasetSummary);
return StepResult.getStepResultSuccess();
} catch (InvalidDatasetException idEx) {
return new StepResult(StepStatus.STEP_RESULT_FAILURE_FATAL, idEx);
} catch (Exception ex) {
return new StepResult(StepStatus.STEP_RESULT_FAILURE_FATAL, new InvalidDatasetException("Cannot create dataset: " + datasetRequest.getName(), ex));
}
}
Aggregations