use of bio.terra.service.dataset.AssetSpecification in project jade-data-repo by DataBiosphere.
the class CreateDatasetAssetStep method undoStep.
@Override
public StepResult undoStep(FlightContext context) {
FlightMap map = context.getWorkingMap();
if (map.get(DatasetWorkingMapKeys.ASSET_NAME_COLLISION, Boolean.class) == null) {
Dataset dataset = getDataset(context);
// Search the Asset list in the dataset object to see if the asset you were trying to create got created.
AssetSpecification newAssetSpecification = getNewAssetSpec(context, dataset);
Optional<AssetSpecification> assetSpecificationToDelete = dataset.getAssetSpecificationByName(newAssetSpecification.getName());
// when the undoStep is run.
if (assetSpecificationToDelete.isPresent()) {
// If the asset is found, then you get its id and call delete.
assetDao.delete(assetSpecificationToDelete.get().getId());
}
}
// Else, if the asset is not found, then you are done. It never got created.
return StepResult.getStepResultSuccess();
}
use of bio.terra.service.dataset.AssetSpecification in project jade-data-repo by DataBiosphere.
the class SnapshotService method makeSnapshotFromSnapshotRequest.
/**
* Make a Snapshot structure with all of its parts from an incoming snapshot request.
* Note that the structure does not have UUIDs or created dates filled in. Those are
* updated by the DAO when it stores the snapshot in the repository metadata.
*
* @param snapshotRequestModel
* @return Snapshot
*/
public Snapshot makeSnapshotFromSnapshotRequest(SnapshotRequestModel snapshotRequestModel) {
// Make this early so we can hook up back links to it
Snapshot snapshot = new Snapshot();
List<SnapshotRequestContentsModel> requestContentsList = snapshotRequestModel.getContents();
// TODO: for MVM we only allow one source list
if (requestContentsList.size() > 1) {
throw new ValidationException("Only a single snapshot contents entry is currently allowed.");
}
SnapshotRequestContentsModel requestContents = requestContentsList.get(0);
Dataset dataset = datasetService.retrieveByName(requestContents.getDatasetName());
SnapshotSource snapshotSource = new SnapshotSource().snapshot(snapshot).dataset(dataset);
switch(snapshotRequestModel.getContents().get(0).getMode()) {
case BYASSET:
// TODO: When we implement explicit definition of snapshot tables, we will handle that here.
// For now, we generate the snapshot tables directly from the asset tables of the one source
// allowed in a snapshot.
AssetSpecification assetSpecification = getAssetSpecificationFromRequest(requestContents);
snapshotSource.assetSpecification(assetSpecification);
conjureSnapshotTablesFromAsset(snapshotSource.getAssetSpecification(), snapshot, snapshotSource);
break;
case BYFULLVIEW:
conjureSnapshotTablesFromDatasetTables(snapshot, snapshotSource);
break;
case BYQUERY:
SnapshotRequestQueryModel queryModel = requestContents.getQuerySpec();
String assetName = queryModel.getAssetName();
String snapshotQuery = queryModel.getQuery();
Query query = Query.parse(snapshotQuery);
List<String> datasetNames = query.getDatasetNames();
// TODO this makes the assumption that there is only one dataset
// (based on the validation flight step that already occurred.)
// This will change when more than 1 dataset is allowed
String datasetName = datasetNames.get(0);
Dataset queryDataset = datasetService.retrieveByName(datasetName);
AssetSpecification queryAssetSpecification = queryDataset.getAssetSpecificationByName(assetName).orElseThrow(() -> new AssetNotFoundException("This dataset does not have an asset specification with name: " + assetName));
snapshotSource.assetSpecification(queryAssetSpecification);
// TODO this is wrong? why dont we just pass the assetSpecification?
conjureSnapshotTablesFromAsset(snapshotSource.getAssetSpecification(), snapshot, snapshotSource);
break;
case BYROWID:
SnapshotRequestRowIdModel requestRowIdModel = requestContents.getRowIdSpec();
conjureSnapshotTablesFromRowIds(requestRowIdModel, snapshot, snapshotSource);
break;
default:
throw new InvalidSnapshotException("Snapshot does not have required mode information");
}
return snapshot.name(snapshotRequestModel.getName()).description(snapshotRequestModel.getDescription()).snapshotSources(Collections.singletonList(snapshotSource)).profileId(UUID.fromString(snapshotRequestModel.getProfileId())).relationships(createSnapshotRelationships(dataset.getRelationships(), snapshotSource));
}
use of bio.terra.service.dataset.AssetSpecification in project jade-data-repo by DataBiosphere.
the class BigQueryPdao method mapValuesToRows.
// compute the row ids from the input ids and validate all inputs have matches
// returns a structure with the matching row ids (suitable for calling create snapshot)
// and any mismatched input values that don't have corresponding roww.
// NOTE: In the fullness of time, we may not do this and kick the function into the UI.
// So this code assumes there is one source and one set of input values.
// The query it builds embeds data values into the query in an array. I think it will
// support about 25,000 input values. If that is not enough there is another, more
// complicated alternative:
// - create a scratch table at snapshot creation time
// - truncate before we start
// - load the values in
// - do the query
// - truncate (even tidier...)
// So if we need to make this work in the long term, we can take that approach.
@Override
public RowIdMatch mapValuesToRows(Snapshot snapshot, SnapshotSource source, List<String> inputValues) throws InterruptedException {
// One source: grab it and navigate to the relevant parts
BigQueryProject bigQueryProject = bigQueryProjectForSnapshot(snapshot);
AssetSpecification asset = source.getAssetSpecification();
Column column = asset.getRootColumn().getDatasetColumn();
ST sqlTemplate = new ST(mapValuesToRowsTemplate);
sqlTemplate.add("project", bigQueryProject.getProjectId());
sqlTemplate.add("dataset", prefixName(source.getDataset().getName()));
sqlTemplate.add("table", column.getTable().getName());
sqlTemplate.add("column", column.getName());
sqlTemplate.add("inputVals", inputValues);
// Execute the query building the row id match structure that tracks the matching
// ids and the mismatched ids
RowIdMatch rowIdMatch = new RowIdMatch();
String sql = sqlTemplate.render();
logger.debug("mapValuesToRows sql: " + sql);
TableResult result = bigQueryProject.query(sql);
for (FieldValueList row : result.iterateAll()) {
// Test getting these by name
FieldValue rowId = row.get(0);
FieldValue inputValue = row.get(1);
if (rowId.isNull()) {
rowIdMatch.addMismatch(inputValue.getStringValue());
logger.debug("rowId=<NULL>" + " inVal=" + inputValue.getStringValue());
} else {
rowIdMatch.addMatch(inputValue.getStringValue(), rowId.getStringValue());
logger.debug("rowId=" + rowId.getStringValue() + " inVal=" + inputValue.getStringValue());
}
}
return rowIdMatch;
}
use of bio.terra.service.dataset.AssetSpecification in project jade-data-repo by DataBiosphere.
the class CreateSnapshotPrimaryDataQueryStep method doStep.
@Override
public StepResult doStep(FlightContext context) throws InterruptedException {
// TODO: this assumes single-dataset snapshots, will need to add a loop for multiple
// (based on the validation flight step that already occurred.)
/*
* get dataset and assetName
* get asset from dataset
* which gives the root table
* to use in conjunction with the filtered row ids to create this snapshot
*/
Snapshot snapshot = snapshotDao.retrieveSnapshotByName(snapshotReq.getName());
SnapshotRequestQueryModel snapshotQuerySpec = snapshotReq.getContents().get(0).getQuerySpec();
String snapshotAssetName = snapshotQuerySpec.getAssetName();
String snapshotQuery = snapshotReq.getContents().get(0).getQuerySpec().getQuery();
Query query = Query.parse(snapshotQuery);
List<String> datasetNames = query.getDatasetNames();
// TODO this makes the assumption that there is only one dataset
// (based on the validation flight step that already occurred.)
// This will change when more than 1 dataset is allowed
String datasetName = datasetNames.get(0);
Dataset dataset = datasetService.retrieveByName(datasetName);
DatasetModel datasetModel = datasetService.retrieveModel(dataset);
// get asset out of dataset
Optional<AssetSpecification> assetSpecOp = dataset.getAssetSpecificationByName(snapshotAssetName);
AssetSpecification assetSpec = assetSpecOp.orElseThrow(() -> new AssetNotFoundException("Expected asset specification"));
Map<String, DatasetModel> datasetMap = Collections.singletonMap(datasetName, datasetModel);
BigQueryVisitor bqVisitor = new BigQueryVisitor(datasetMap);
String sqlQuery = query.translateSql(bqVisitor);
// validate that the root table is actually a table being queried in the query -->
// and the grammar only picks up tables names in the from clause (though there may be more than one)
List<String> tableNames = query.getTableNames();
String rootTablename = assetSpec.getRootTable().getTable().getName();
if (!tableNames.contains(rootTablename)) {
throw new InvalidQueryException("The root table of the selected asset is not present in this query");
}
// now using the query, get the rowIds
// insert the rowIds into the snapshot row ids table and then kick off the rest of the relationship walking
bigQueryPdao.queryForRowIds(assetSpec, snapshot, sqlQuery);
return StepResult.getStepResultSuccess();
}
use of bio.terra.service.dataset.AssetSpecification in project jade-data-repo by DataBiosphere.
the class BigQueryPdao method createSnapshot.
@Override
public void createSnapshot(Snapshot snapshot, List<String> rowIds) throws InterruptedException {
BigQueryProject bigQueryProject = bigQueryProjectForSnapshot(snapshot);
String projectId = bigQueryProject.getProjectId();
String snapshotName = snapshot.getName();
BigQuery bigQuery = bigQueryProject.getBigQuery();
// create snapshot BQ dataset
snapshotCreateBQDataset(bigQueryProject, snapshot);
// create the row id table
bigQueryProject.createTable(snapshotName, PDAO_ROW_ID_TABLE, rowIdTableSchema());
// populate root row ids. Must happen before the relationship walk.
// NOTE: when we have multiple sources, we can put this into a loop
SnapshotSource source = snapshot.getSnapshotSources().get(0);
String datasetBqDatasetName = prefixName(source.getDataset().getName());
AssetSpecification asset = source.getAssetSpecification();
Table rootTable = asset.getRootTable().getTable();
String rootTableId = rootTable.getId().toString();
if (rowIds.size() > 0) {
ST sqlTemplate = new ST(loadRootRowIdsTemplate);
sqlTemplate.add("project", projectId);
sqlTemplate.add("snapshot", snapshotName);
sqlTemplate.add("dataset", datasetBqDatasetName);
sqlTemplate.add("tableId", rootTableId);
sqlTemplate.add("rowIds", rowIds);
bigQueryProject.query(sqlTemplate.render());
}
ST sqlTemplate = new ST(validateRowIdsForRootTemplate);
sqlTemplate.add("project", projectId);
sqlTemplate.add("snapshot", snapshotName);
sqlTemplate.add("dataset", datasetBqDatasetName);
sqlTemplate.add("table", rootTable.getName());
TableResult result = bigQueryProject.query(sqlTemplate.render());
FieldValueList row = result.iterateAll().iterator().next();
FieldValue countValue = row.get(0);
if (countValue.getLongValue() != rowIds.size()) {
logger.error("Invalid row ids supplied: rowIds=" + rowIds.size() + " count=" + countValue.getLongValue());
for (String rowId : rowIds) {
logger.error(" rowIdIn: " + rowId);
}
throw new PdaoException("Invalid row ids supplied");
}
// walk and populate relationship table row ids
List<WalkRelationship> walkRelationships = WalkRelationship.ofAssetSpecification(asset);
walkRelationships(datasetBqDatasetName, snapshotName, walkRelationships, rootTableId, projectId, bigQuery);
snapshotViewCreation(datasetBqDatasetName, snapshotName, snapshot, projectId, bigQuery, bigQueryProject);
}
Aggregations