use of bio.terra.common.exception.PdaoException in project jade-data-repo by DataBiosphere.
the class BigQueryPdao method queryForRowIds.
// insert the rowIds into the snapshot row ids table and then kick off the rest of the relationship walking
// once we have the row ids in addition to the asset spec, this should look familiar to wAsset
public void queryForRowIds(AssetSpecification assetSpecification, Snapshot snapshot, String sqlQuery) throws InterruptedException {
BigQueryProject bigQueryProject = bigQueryProjectForSnapshot(snapshot);
BigQuery bigQuery = bigQueryProject.getBigQuery();
String snapshotName = snapshot.getName();
Dataset dataset = snapshot.getSnapshotSources().get(0).getDataset();
String datasetBqDatasetName = prefixName(dataset.getName());
String projectId = bigQueryProject.getProjectId();
// create snapshot bq dataset
try {
// create snapshot BQ dataset
snapshotCreateBQDataset(bigQueryProject, snapshot);
// now create a temp table with all the selected row ids based on the query in it
bigQueryProject.createTable(snapshotName, PDAO_TEMP_TABLE, tempTableSchema());
QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(sqlQuery).setDestinationTable(TableId.of(snapshotName, PDAO_TEMP_TABLE)).setWriteDisposition(JobInfo.WriteDisposition.WRITE_APPEND).build();
try {
final TableResult query = bigQuery.query(queryConfig);
// get results and validate that it got back more than 0 value
if (query.getTotalRows() < 1) {
// should this be a different error?
throw new InvalidQueryException("Query returned 0 results");
}
} catch (InterruptedException ie) {
throw new PdaoException("Append query unexpectedly interrupted", ie);
}
// join on the root table to validate that the dataset's rootTable.rowid is never null
// and thus matches the PDAO_ROW_ID_COLUMN
AssetTable rootAssetTable = assetSpecification.getRootTable();
Table rootTable = rootAssetTable.getTable();
String datasetTableName = rootTable.getName();
String rootTableId = rootTable.getId().toString();
ST sqlTemplate = new ST(joinTablesToTestForMissingRowIds);
sqlTemplate.add("snapshotDatasetName", snapshotName);
sqlTemplate.add("tempTable", PDAO_TEMP_TABLE);
sqlTemplate.add("datasetDatasetName", datasetBqDatasetName);
sqlTemplate.add("datasetTable", datasetTableName);
sqlTemplate.add("commonColumn", PDAO_ROW_ID_COLUMN);
TableResult result = bigQueryProject.query(sqlTemplate.render());
FieldValueList mismatchedCount = result.getValues().iterator().next();
Long mismatchedCountLong = mismatchedCount.get(0).getLongValue();
if (mismatchedCountLong > 0) {
throw new MismatchedValueException("Query results did not match dataset root row ids");
}
// TODO should this be pulled up to the top of queryForRowIds() / added to snapshotCreateBQDataset() helper
bigQueryProject.createTable(snapshotName, PDAO_ROW_ID_TABLE, rowIdTableSchema());
// populate root row ids. Must happen before the relationship walk.
// NOTE: when we have multiple sources, we can put this into a loop
// insert into the PDAO_ROW_ID_TABLE the literal that is the table id
// and then all the row ids from the temp table
ST sqlLoadTemplate = new ST(loadRootRowIdsFromTempTableTemplate);
sqlLoadTemplate.add("project", projectId);
sqlLoadTemplate.add("snapshot", snapshotName);
sqlLoadTemplate.add("dataset", datasetBqDatasetName);
sqlLoadTemplate.add("tableId", rootTableId);
// this is the disc from classic asset
sqlLoadTemplate.add("commonColumn", PDAO_ROW_ID_COLUMN);
sqlLoadTemplate.add("tempTable", PDAO_TEMP_TABLE);
bigQueryProject.query(sqlLoadTemplate.render());
// ST sqlValidateTemplate = new ST(validateRowIdsForRootTemplate);
// TODO do we want to reuse this validation? if yes, maybe mismatchedCount / query should be updated
// walk and populate relationship table row ids
List<WalkRelationship> walkRelationships = WalkRelationship.ofAssetSpecification(assetSpecification);
walkRelationships(datasetBqDatasetName, snapshotName, walkRelationships, rootTableId, projectId, bigQuery);
// populate root row ids. Must happen before the relationship walk.
// NOTE: when we have multiple sources, we can put this into a loop
snapshotViewCreation(datasetBqDatasetName, snapshotName, snapshot, projectId, bigQuery, bigQueryProject);
} catch (PdaoException ex) {
// TODO what if the query is invalid? Seems like there might be more to catch here.
throw new PdaoException("createSnapshot failed", ex);
}
}
use of bio.terra.common.exception.PdaoException in project jade-data-repo by DataBiosphere.
the class BigQueryPdao method createDataset.
@Override
public void createDataset(Dataset dataset) throws InterruptedException {
BigQueryProject bigQueryProject = bigQueryProjectForDataset(dataset);
BigQuery bigQuery = bigQueryProject.getBigQuery();
// Keep the dataset name from colliding with a snapshot name by prefixing it.
// TODO: validate against people using the prefix for snapshots
String datasetName = prefixName(dataset.getName());
try {
// create it before and failed in the middle. We delete it and re-create it from scratch.
if (bigQueryProject.datasetExists(datasetName)) {
bigQueryProject.deleteDataset(datasetName);
}
bigQueryProject.createDataset(datasetName, dataset.getDescription());
bigQueryProject.createTable(datasetName, PDAO_LOAD_HISTORY_TABLE, buildLoadDatasetSchema());
for (DatasetTable table : dataset.getTables()) {
bigQueryProject.createTable(datasetName, table.getRawTableName(), buildSchema(table, true), table.getBigQueryPartitionConfig());
bigQueryProject.createTable(datasetName, table.getSoftDeleteTableName(), buildSoftDeletesSchema());
bigQuery.create(buildLiveView(bigQueryProject.getProjectId(), datasetName, table));
}
} catch (Exception ex) {
throw new PdaoException("create dataset failed for " + datasetName, ex);
}
}
use of bio.terra.common.exception.PdaoException in project jade-data-repo by DataBiosphere.
the class BigQueryPdao method createSnapshotWithProvidedIds.
public void createSnapshotWithProvidedIds(Snapshot snapshot, SnapshotRequestContentsModel contentsModel) throws InterruptedException {
BigQueryProject bigQueryProject = bigQueryProjectForSnapshot(snapshot);
String projectId = bigQueryProject.getProjectId();
String snapshotName = snapshot.getName();
BigQuery bigQuery = bigQueryProject.getBigQuery();
SnapshotRequestRowIdModel rowIdModel = contentsModel.getRowIdSpec();
// create snapshot BQ dataset
snapshotCreateBQDataset(bigQueryProject, snapshot);
// create the row id table
bigQueryProject.createTable(snapshotName, PDAO_ROW_ID_TABLE, rowIdTableSchema());
// populate root row ids. Must happen before the relationship walk.
// NOTE: when we have multiple sources, we can put this into a loop
SnapshotSource source = snapshot.getSnapshotSources().get(0);
String datasetBqDatasetName = prefixName(source.getDataset().getName());
for (SnapshotRequestRowIdTableModel table : rowIdModel.getTables()) {
String tableName = table.getTableName();
Table sourceTable = source.reverseTableLookup(tableName).orElseThrow(() -> new CorruptMetadataException("cannot find destination table: " + tableName));
List<String> rowIds = table.getRowIds();
if (rowIds.size() > 0) {
ST sqlTemplate = new ST(loadRootRowIdsTemplate);
sqlTemplate.add("project", projectId);
sqlTemplate.add("snapshot", snapshotName);
sqlTemplate.add("dataset", datasetBqDatasetName);
sqlTemplate.add("tableId", sourceTable.getId().toString());
sqlTemplate.add("rowIds", rowIds);
bigQueryProject.query(sqlTemplate.render());
}
ST sqlTemplate = new ST(validateRowIdsForRootTemplate);
sqlTemplate.add("project", projectId);
sqlTemplate.add("snapshot", snapshotName);
sqlTemplate.add("dataset", datasetBqDatasetName);
sqlTemplate.add("table", sourceTable.getName());
TableResult result = bigQueryProject.query(sqlTemplate.render());
FieldValueList row = result.iterateAll().iterator().next();
FieldValue countValue = row.get(0);
if (countValue.getLongValue() != rowIds.size()) {
logger.error("Invalid row ids supplied: rowIds=" + rowIds.size() + " count=" + countValue.getLongValue());
for (String rowId : rowIds) {
logger.error(" rowIdIn: " + rowId);
}
throw new PdaoException("Invalid row ids supplied");
}
}
snapshotViewCreation(datasetBqDatasetName, snapshotName, snapshot, projectId, bigQuery, bigQueryProject);
}
use of bio.terra.common.exception.PdaoException in project jade-data-repo by DataBiosphere.
the class BigQueryPdao method createSnapshot.
@Override
public void createSnapshot(Snapshot snapshot, List<String> rowIds) throws InterruptedException {
BigQueryProject bigQueryProject = bigQueryProjectForSnapshot(snapshot);
String projectId = bigQueryProject.getProjectId();
String snapshotName = snapshot.getName();
BigQuery bigQuery = bigQueryProject.getBigQuery();
// create snapshot BQ dataset
snapshotCreateBQDataset(bigQueryProject, snapshot);
// create the row id table
bigQueryProject.createTable(snapshotName, PDAO_ROW_ID_TABLE, rowIdTableSchema());
// populate root row ids. Must happen before the relationship walk.
// NOTE: when we have multiple sources, we can put this into a loop
SnapshotSource source = snapshot.getSnapshotSources().get(0);
String datasetBqDatasetName = prefixName(source.getDataset().getName());
AssetSpecification asset = source.getAssetSpecification();
Table rootTable = asset.getRootTable().getTable();
String rootTableId = rootTable.getId().toString();
if (rowIds.size() > 0) {
ST sqlTemplate = new ST(loadRootRowIdsTemplate);
sqlTemplate.add("project", projectId);
sqlTemplate.add("snapshot", snapshotName);
sqlTemplate.add("dataset", datasetBqDatasetName);
sqlTemplate.add("tableId", rootTableId);
sqlTemplate.add("rowIds", rowIds);
bigQueryProject.query(sqlTemplate.render());
}
ST sqlTemplate = new ST(validateRowIdsForRootTemplate);
sqlTemplate.add("project", projectId);
sqlTemplate.add("snapshot", snapshotName);
sqlTemplate.add("dataset", datasetBqDatasetName);
sqlTemplate.add("table", rootTable.getName());
TableResult result = bigQueryProject.query(sqlTemplate.render());
FieldValueList row = result.iterateAll().iterator().next();
FieldValue countValue = row.get(0);
if (countValue.getLongValue() != rowIds.size()) {
logger.error("Invalid row ids supplied: rowIds=" + rowIds.size() + " count=" + countValue.getLongValue());
for (String rowId : rowIds) {
logger.error(" rowIdIn: " + rowId);
}
throw new PdaoException("Invalid row ids supplied");
}
// walk and populate relationship table row ids
List<WalkRelationship> walkRelationships = WalkRelationship.ofAssetSpecification(asset);
walkRelationships(datasetBqDatasetName, snapshotName, walkRelationships, rootTableId, projectId, bigQuery);
snapshotViewCreation(datasetBqDatasetName, snapshotName, snapshot, projectId, bigQuery, bigQueryProject);
}
Aggregations