use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.
the class BigQueryPdao method matchRowIds.
// for each table in a dataset (source), collect row id matches ON the row id
public RowIdMatch matchRowIds(Snapshot snapshot, SnapshotSource source, String tableName, List<String> rowIds) throws InterruptedException {
// One source: grab it and navigate to the relevant parts
BigQueryProject bigQueryProject = bigQueryProjectForSnapshot(snapshot);
Optional<SnapshotMapTable> optTable = source.getSnapshotMapTables().stream().filter(table -> table.getFromTable().getName().equals(tableName)).findFirst();
// create a column to point to the row id column in the source table to check that passed row ids exist in it
Column rowIdColumn = new Column().table(optTable.get().getFromTable()).name(PDAO_ROW_ID_COLUMN);
ST sqlTemplate = new ST(mapValuesToRowsTemplate);
sqlTemplate.add("project", bigQueryProject.getProjectId());
sqlTemplate.add("dataset", prefixName(source.getDataset().getName()));
sqlTemplate.add("table", tableName);
sqlTemplate.add("column", rowIdColumn.getName());
sqlTemplate.add("inputVals", rowIds);
// Execute the query building the row id match structure that tracks the matching
// ids and the mismatched ids
RowIdMatch rowIdMatch = new RowIdMatch();
String sql = sqlTemplate.render();
logger.debug("mapValuesToRows sql: " + sql);
TableResult result = bigQueryProject.query(sql);
for (FieldValueList row : result.iterateAll()) {
// Test getting these by name
FieldValue rowId = row.get(0);
FieldValue inputValue = row.get(1);
if (rowId.isNull()) {
rowIdMatch.addMismatch(inputValue.getStringValue());
logger.debug("rowId=<NULL>" + " inVal=" + inputValue.getStringValue());
} else {
rowIdMatch.addMatch(inputValue.getStringValue(), rowId.getStringValue());
logger.debug("rowId=" + rowId.getStringValue() + " inVal=" + inputValue.getStringValue());
}
}
return rowIdMatch;
}
use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.
the class DeleteSnapshotPrimaryDataStep method doStep.
@Override
public StepResult doStep(FlightContext context) throws InterruptedException {
try {
// this fault is used by the SnapshotConnectedTest > testOverlappingDeletes
if (configService.testInsertFault(ConfigEnum.SNAPSHOT_DELETE_LOCK_CONFLICT_STOP_FAULT)) {
logger.info("SNAPSHOT_DELETE_LOCK_CONFLICT_STOP_FAULT");
while (!configService.testInsertFault(ConfigEnum.SNAPSHOT_DELETE_LOCK_CONFLICT_CONTINUE_FAULT)) {
logger.info("Sleeping for CONTINUE FAULT");
TimeUnit.SECONDS.sleep(5);
}
logger.info("SNAPSHOT_DELETE_LOCK_CONFLICT_CONTINUE_FAULT");
}
Snapshot snapshot = snapshotService.retrieve(snapshotId);
bigQueryPdao.deleteSnapshot(snapshot);
// Remove snapshot file references from the underlying datasets
for (SnapshotSource snapshotSource : snapshot.getSnapshotSources()) {
Dataset dataset = datasetService.retrieve(snapshotSource.getDataset().getId());
dependencyDao.deleteSnapshotFileDependencies(dataset, snapshotId.toString());
}
fileDao.deleteFilesFromSnapshot(snapshot);
} catch (SnapshotNotFoundException | DatasetNotFoundException nfe) {
// If we do not find the snapshot or dataset, we assume things are already clean
}
return StepResult.getStepResultSuccess();
}
use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.
the class CreateSnapshotPrimaryDataFullViewStep method doStep.
@Override
public StepResult doStep(FlightContext context) throws InterruptedException {
/*
* from the dataset tables, we will need to get the table's live views
*/
SnapshotRequestContentsModel contentsModel = snapshotReq.getContents().get(0);
Snapshot snapshot = snapshotDao.retrieveSnapshotByName(snapshotReq.getName());
Dataset dataset = datasetservice.retrieveByName(contentsModel.getDatasetName());
bigQueryPdao.createSnapshotWithLiveViews(snapshot, dataset);
return StepResult.getStepResultSuccess();
}
use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.
the class SnapshotAuthzFileAclStep method undoStep.
@Override
public StepResult undoStep(FlightContext context) throws InterruptedException {
FlightMap workingMap = context.getWorkingMap();
UUID snapshotId = workingMap.get(SnapshotWorkingMapKeys.SNAPSHOT_ID, UUID.class);
Snapshot snapshot = snapshotService.retrieve(snapshotId);
Map<IamRole, String> policies = workingMap.get(SnapshotWorkingMapKeys.POLICY_MAP, Map.class);
String readersPolicyEmail = policies.get(IamRole.READER);
// TODO: when we support multiple datasets, we can generate more than one copy of this
// step: one for each dataset. That is because each dataset keeps its file dependencies
// in its own scope. For now, we know there is exactly one dataset and we take shortcuts.
SnapshotSource snapshotSource = snapshot.getSnapshotSources().get(0);
String datasetId = snapshotSource.getDataset().getId().toString();
Dataset dataset = datasetService.retrieve(UUID.fromString(datasetId));
List<String> fileIds = fireStoreDao.getDatasetSnapshotFileIds(dataset, snapshotId.toString());
try {
gcsPdao.removeAclOnFiles(dataset, fileIds, readersPolicyEmail);
} catch (StorageException ex) {
// We don't let the exception stop us from continuing to remove the rest of the snapshot parts.
// TODO: change this to whatever our alert-a-human log message is.
logger.warn("NEEDS CLEANUP: Failed to remove snapshot reader ACLs from files", ex);
}
return StepResult.getStepResultSuccess();
}
use of bio.terra.service.dataset.Dataset in project jade-data-repo by DataBiosphere.
the class SnapshotAuthzFileAclStep method doStep.
@Override
public StepResult doStep(FlightContext context) throws InterruptedException {
FlightMap workingMap = context.getWorkingMap();
UUID snapshotId = workingMap.get(SnapshotWorkingMapKeys.SNAPSHOT_ID, UUID.class);
Snapshot snapshot = snapshotService.retrieve(snapshotId);
Map<IamRole, String> policies = workingMap.get(SnapshotWorkingMapKeys.POLICY_MAP, Map.class);
String readersPolicyEmail = policies.get(IamRole.READER);
// TODO: when we support multiple datasets, we can generate more than one copy of this
// step: one for each dataset. That is because each dataset keeps its file dependencies
// in its own scope. For now, we know there is exactly one dataset and we take shortcuts.
SnapshotSource snapshotSource = snapshot.getSnapshotSources().get(0);
String datasetId = snapshotSource.getDataset().getId().toString();
Dataset dataset = datasetService.retrieve(UUID.fromString(datasetId));
List<String> fileIds = fireStoreDao.getDatasetSnapshotFileIds(dataset, snapshotId.toString());
try {
if (configService.testInsertFault(SNAPSHOT_GRANT_FILE_ACCESS_FAULT)) {
throw new StorageException(400, "Fake IAM failure", "badRequest", null);
}
gcsPdao.setAclOnFiles(dataset, fileIds, readersPolicyEmail);
} catch (StorageException ex) {
// we will log alot and retry on that.
if (ex.getCode() == 400 && StringUtils.equals(ex.getReason(), "badRequest")) {
logger.info("Maybe caught an ACL propagation error: " + ex.getMessage() + " reason: " + ex.getReason(), ex);
return new StepResult(StepStatus.STEP_RESULT_FAILURE_RETRY, ex);
}
}
return StepResult.getStepResultSuccess();
}
Aggregations