use of bio.terra.model.DatasetModel in project jade-data-repo by DataBiosphere.
the class CreateSnapshotPrimaryDataQueryStep method doStep.
@Override
public StepResult doStep(FlightContext context) throws InterruptedException {
// TODO: this assumes single-dataset snapshots, will need to add a loop for multiple
// (based on the validation flight step that already occurred.)
/*
* get dataset and assetName
* get asset from dataset
* which gives the root table
* to use in conjunction with the filtered row ids to create this snapshot
*/
Snapshot snapshot = snapshotDao.retrieveSnapshotByName(snapshotReq.getName());
SnapshotRequestQueryModel snapshotQuerySpec = snapshotReq.getContents().get(0).getQuerySpec();
String snapshotAssetName = snapshotQuerySpec.getAssetName();
String snapshotQuery = snapshotReq.getContents().get(0).getQuerySpec().getQuery();
Query query = Query.parse(snapshotQuery);
List<String> datasetNames = query.getDatasetNames();
// TODO this makes the assumption that there is only one dataset
// (based on the validation flight step that already occurred.)
// This will change when more than 1 dataset is allowed
String datasetName = datasetNames.get(0);
Dataset dataset = datasetService.retrieveByName(datasetName);
DatasetModel datasetModel = datasetService.retrieveModel(dataset);
// get asset out of dataset
Optional<AssetSpecification> assetSpecOp = dataset.getAssetSpecificationByName(snapshotAssetName);
AssetSpecification assetSpec = assetSpecOp.orElseThrow(() -> new AssetNotFoundException("Expected asset specification"));
Map<String, DatasetModel> datasetMap = Collections.singletonMap(datasetName, datasetModel);
BigQueryVisitor bqVisitor = new BigQueryVisitor(datasetMap);
String sqlQuery = query.translateSql(bqVisitor);
// validate that the root table is actually a table being queried in the query -->
// and the grammar only picks up tables names in the from clause (though there may be more than one)
List<String> tableNames = query.getTableNames();
String rootTablename = assetSpec.getRootTable().getTable().getName();
if (!tableNames.contains(rootTablename)) {
throw new InvalidQueryException("The root table of the selected asset is not present in this query");
}
// now using the query, get the rowIds
// insert the rowIds into the snapshot row ids table and then kick off the rest of the relationship walking
bigQueryPdao.queryForRowIds(assetSpec, snapshot, sqlQuery);
return StepResult.getStepResultSuccess();
}
use of bio.terra.model.DatasetModel in project jade-data-repo by DataBiosphere.
the class DatasetIntegrationTest method testSoftDeleteNotInFullView.
@Test
public void testSoftDeleteNotInFullView() throws Exception {
datasetId = ingestedDataset();
// get row ids
DatasetModel dataset = dataRepoFixtures.getDataset(steward(), datasetId);
BigQuery bigQuery = BigQueryFixtures.getBigQuery(dataset.getDataProject(), stewardToken);
List<String> participantRowIds = getRowIds(bigQuery, dataset, "participant", 3L);
List<String> sampleRowIds = getRowIds(bigQuery, dataset, "sample", 2L);
// swap in these row ids in the request
SnapshotRequestModel requestModelAll = jsonLoader.loadObject("ingest-test-snapshot-fullviews.json", SnapshotRequestModel.class);
requestModelAll.getContents().get(0).datasetName(dataset.getName());
SnapshotSummaryModel snapshotSummaryAll = dataRepoFixtures.createSnapshotWithRequest(steward(), dataset.getName(), requestModelAll);
snapshotIds.add(snapshotSummaryAll.getId());
SnapshotModel snapshotAll = dataRepoFixtures.getSnapshot(steward(), snapshotSummaryAll.getId());
// The steward is the custodian in this case, so is a reader in big query.
BigQuery bigQueryAll = BigQueryFixtures.getBigQuery(snapshotAll.getDataProject(), stewardToken);
assertSnapshotTableCount(bigQueryAll, snapshotAll, "participant", 5L);
assertSnapshotTableCount(bigQueryAll, snapshotAll, "sample", 7L);
// write them to GCS
String participantPath = writeListToScratch("softDel", participantRowIds);
String samplePath = writeListToScratch("softDel", sampleRowIds);
// build the deletion request with pointers to the two files with row ids to soft delete
List<DataDeletionTableModel> dataDeletionTableModels = Arrays.asList(deletionTableFile("participant", participantPath), deletionTableFile("sample", samplePath));
DataDeletionRequest request = dataDeletionRequest().tables(dataDeletionTableModels);
// send off the soft delete request
dataRepoFixtures.deleteData(steward(), datasetId, request);
// make sure the new counts make sense
assertTableCount(bigQuery, dataset, "participant", 2L);
assertTableCount(bigQuery, dataset, "sample", 5L);
// make full views snapshot
SnapshotRequestModel requestModelLess = jsonLoader.loadObject("ingest-test-snapshot-fullviews.json", SnapshotRequestModel.class);
requestModelLess.getContents().get(0).datasetName(dataset.getName());
SnapshotSummaryModel snapshotSummaryLess = dataRepoFixtures.createSnapshotWithRequest(steward(), dataset.getName(), requestModelLess);
snapshotIds.add(snapshotSummaryLess.getId());
SnapshotModel snapshotLess = dataRepoFixtures.getSnapshot(steward(), snapshotSummaryLess.getId());
BigQuery bigQueryLess = BigQueryFixtures.getBigQuery(snapshotLess.getDataProject(), stewardToken);
// make sure the old counts stayed the same
assertSnapshotTableCount(bigQueryAll, snapshotAll, "participant", 5L);
assertSnapshotTableCount(bigQueryAll, snapshotAll, "sample", 7L);
// make sure the new counts make sense
assertSnapshotTableCount(bigQueryLess, snapshotLess, "participant", 2L);
assertSnapshotTableCount(bigQueryLess, snapshotLess, "sample", 5L);
}
use of bio.terra.model.DatasetModel in project jade-data-repo by DataBiosphere.
the class GrammarTest method setup.
@Before
public void setup() {
DatasetModel datasetModel = new DatasetModel().dataProject("a-data-project");
datasetMap = new HashMap<>();
datasetMap.put("dataset", datasetModel);
datasetMap.put("foo", datasetModel);
datasetMap.put("baz", datasetModel);
}
Aggregations