use of bio.terra.service.snapshot.RowIdMatch in project jade-data-repo by DataBiosphere.
the class BigQueryPdao method matchRowIds.
// for each table in a dataset (source), collect row id matches ON the row id
public RowIdMatch matchRowIds(Snapshot snapshot, SnapshotSource source, String tableName, List<String> rowIds) throws InterruptedException {
// One source: grab it and navigate to the relevant parts
BigQueryProject bigQueryProject = bigQueryProjectForSnapshot(snapshot);
Optional<SnapshotMapTable> optTable = source.getSnapshotMapTables().stream().filter(table -> table.getFromTable().getName().equals(tableName)).findFirst();
// create a column to point to the row id column in the source table to check that passed row ids exist in it
Column rowIdColumn = new Column().table(optTable.get().getFromTable()).name(PDAO_ROW_ID_COLUMN);
ST sqlTemplate = new ST(mapValuesToRowsTemplate);
sqlTemplate.add("project", bigQueryProject.getProjectId());
sqlTemplate.add("dataset", prefixName(source.getDataset().getName()));
sqlTemplate.add("table", tableName);
sqlTemplate.add("column", rowIdColumn.getName());
sqlTemplate.add("inputVals", rowIds);
// Execute the query building the row id match structure that tracks the matching
// ids and the mismatched ids
RowIdMatch rowIdMatch = new RowIdMatch();
String sql = sqlTemplate.render();
logger.debug("mapValuesToRows sql: " + sql);
TableResult result = bigQueryProject.query(sql);
for (FieldValueList row : result.iterateAll()) {
// Test getting these by name
FieldValue rowId = row.get(0);
FieldValue inputValue = row.get(1);
if (rowId.isNull()) {
rowIdMatch.addMismatch(inputValue.getStringValue());
logger.debug("rowId=<NULL>" + " inVal=" + inputValue.getStringValue());
} else {
rowIdMatch.addMatch(inputValue.getStringValue(), rowId.getStringValue());
logger.debug("rowId=" + rowId.getStringValue() + " inVal=" + inputValue.getStringValue());
}
}
return rowIdMatch;
}
use of bio.terra.service.snapshot.RowIdMatch in project jade-data-repo by DataBiosphere.
the class CreateSnapshotPrimaryDataRowIdsStep method doStep.
@Override
public StepResult doStep(FlightContext context) throws InterruptedException {
// TODO: this assumes single-dataset snapshots, will need to add a loop for multiple
SnapshotRequestContentsModel contentsModel = snapshotReq.getContents().get(0);
Snapshot snapshot = snapshotDao.retrieveSnapshotByName(snapshotReq.getName());
SnapshotSource source = snapshot.getSnapshotSources().get(0);
SnapshotRequestRowIdModel rowIdModel = contentsModel.getRowIdSpec();
// for each table, make sure all of the row ids match
for (SnapshotRequestRowIdTableModel table : rowIdModel.getTables()) {
List<String> rowIds = table.getRowIds();
if (!rowIds.isEmpty()) {
RowIdMatch rowIdMatch = bigQueryPdao.matchRowIds(snapshot, source, table.getTableName(), rowIds);
if (!rowIdMatch.getUnmatchedInputValues().isEmpty()) {
String unmatchedValues = String.join("', '", rowIdMatch.getUnmatchedInputValues());
String message = String.format("Mismatched row ids: '%s'", unmatchedValues);
FlightUtils.setErrorResponse(context, message, HttpStatus.BAD_REQUEST);
return new StepResult(StepStatus.STEP_RESULT_FAILURE_FATAL, new MismatchedValueException(message));
}
}
}
bigQueryPdao.createSnapshotWithProvidedIds(snapshot, contentsModel);
return StepResult.getStepResultSuccess();
}
use of bio.terra.service.snapshot.RowIdMatch in project jade-data-repo by DataBiosphere.
the class CreateSnapshotPrimaryDataAssetStep method doStep.
@Override
public StepResult doStep(FlightContext context) throws InterruptedException {
/*
* map field ids into row ids and validate
* then pass the row id array into create snapshot
*/
SnapshotRequestContentsModel contentsModel = snapshotReq.getContents().get(0);
SnapshotRequestAssetModel assetSpec = contentsModel.getAssetSpec();
Snapshot snapshot = snapshotDao.retrieveSnapshotByName(snapshotReq.getName());
SnapshotSource source = snapshot.getSnapshotSources().get(0);
RowIdMatch rowIdMatch = bigQueryPdao.mapValuesToRows(snapshot, source, assetSpec.getRootValues());
if (rowIdMatch.getUnmatchedInputValues().size() != 0) {
String unmatchedValues = String.join("', '", rowIdMatch.getUnmatchedInputValues());
String message = String.format("Mismatched input values: '%s'", unmatchedValues);
FlightUtils.setErrorResponse(context, message, HttpStatus.BAD_REQUEST);
return new StepResult(StepStatus.STEP_RESULT_FAILURE_FATAL, new MismatchedValueException(message));
}
bigQueryPdao.createSnapshot(snapshot, rowIdMatch.getMatchingRowIds());
return StepResult.getStepResultSuccess();
}
use of bio.terra.service.snapshot.RowIdMatch in project jade-data-repo by DataBiosphere.
the class BigQueryPdao method mapValuesToRows.
// compute the row ids from the input ids and validate all inputs have matches
// returns a structure with the matching row ids (suitable for calling create snapshot)
// and any mismatched input values that don't have corresponding roww.
// NOTE: In the fullness of time, we may not do this and kick the function into the UI.
// So this code assumes there is one source and one set of input values.
// The query it builds embeds data values into the query in an array. I think it will
// support about 25,000 input values. If that is not enough there is another, more
// complicated alternative:
// - create a scratch table at snapshot creation time
// - truncate before we start
// - load the values in
// - do the query
// - truncate (even tidier...)
// So if we need to make this work in the long term, we can take that approach.
@Override
public RowIdMatch mapValuesToRows(Snapshot snapshot, SnapshotSource source, List<String> inputValues) throws InterruptedException {
// One source: grab it and navigate to the relevant parts
BigQueryProject bigQueryProject = bigQueryProjectForSnapshot(snapshot);
AssetSpecification asset = source.getAssetSpecification();
Column column = asset.getRootColumn().getDatasetColumn();
ST sqlTemplate = new ST(mapValuesToRowsTemplate);
sqlTemplate.add("project", bigQueryProject.getProjectId());
sqlTemplate.add("dataset", prefixName(source.getDataset().getName()));
sqlTemplate.add("table", column.getTable().getName());
sqlTemplate.add("column", column.getName());
sqlTemplate.add("inputVals", inputValues);
// Execute the query building the row id match structure that tracks the matching
// ids and the mismatched ids
RowIdMatch rowIdMatch = new RowIdMatch();
String sql = sqlTemplate.render();
logger.debug("mapValuesToRows sql: " + sql);
TableResult result = bigQueryProject.query(sql);
for (FieldValueList row : result.iterateAll()) {
// Test getting these by name
FieldValue rowId = row.get(0);
FieldValue inputValue = row.get(1);
if (rowId.isNull()) {
rowIdMatch.addMismatch(inputValue.getStringValue());
logger.debug("rowId=<NULL>" + " inVal=" + inputValue.getStringValue());
} else {
rowIdMatch.addMatch(inputValue.getStringValue(), rowId.getStringValue());
logger.debug("rowId=" + rowId.getStringValue() + " inVal=" + inputValue.getStringValue());
}
}
return rowIdMatch;
}
Aggregations