use of bio.terra.common.Table in project jade-data-repo by DataBiosphere.
the class IngestValidateRefsStep method doStep.
@Override
public StepResult doStep(FlightContext context) throws InterruptedException {
Dataset dataset = IngestUtils.getDataset(context, datasetService);
Table table = IngestUtils.getDatasetTable(context, dataset);
String stagingTableName = IngestUtils.getStagingTableName(context);
// For each fileref column, scan the staging table and build an array of file ids
// Then probe the file system to validate that the file exists and is part
// of this dataset. We check all ids and return one complete error.
List<String> invalidRefIds = new ArrayList<>();
for (Column column : table.getColumns()) {
if (StringUtils.equalsIgnoreCase(column.getType(), "FILEREF")) {
List<String> refIdArray = bigQueryPdao.getRefIds(dataset, stagingTableName, column);
List<String> badRefIds = fileDao.validateRefIds(dataset, refIdArray);
if (badRefIds != null) {
invalidRefIds.addAll(badRefIds);
}
}
}
int invalidIdCount = invalidRefIds.size();
if (invalidIdCount != 0) {
// Made a string buffer to appease findbugs; it saw + in the loop and said "bad!"
StringBuffer errorMessage = new StringBuffer("Invalid file ids found during ingest (");
List<String> errorDetails = new ArrayList<>();
int count = 0;
for (String badId : invalidRefIds) {
errorDetails.add(badId);
count++;
if (count > MAX_ERROR_REF_IDS) {
errorMessage.append(MAX_ERROR_REF_IDS + "out of ");
break;
}
}
errorMessage.append(invalidIdCount + " returned in details)");
throw new InvalidFileRefException(errorMessage.toString(), errorDetails);
}
return StepResult.getStepResultSuccess();
}
use of bio.terra.common.Table in project jade-data-repo by DataBiosphere.
the class BigQueryPdao method matchRowIds.
// for each table in a dataset (source), collect row id matches ON the row id
public RowIdMatch matchRowIds(Snapshot snapshot, SnapshotSource source, String tableName, List<String> rowIds) throws InterruptedException {
// One source: grab it and navigate to the relevant parts
BigQueryProject bigQueryProject = bigQueryProjectForSnapshot(snapshot);
Optional<SnapshotMapTable> optTable = source.getSnapshotMapTables().stream().filter(table -> table.getFromTable().getName().equals(tableName)).findFirst();
// create a column to point to the row id column in the source table to check that passed row ids exist in it
Column rowIdColumn = new Column().table(optTable.get().getFromTable()).name(PDAO_ROW_ID_COLUMN);
ST sqlTemplate = new ST(mapValuesToRowsTemplate);
sqlTemplate.add("project", bigQueryProject.getProjectId());
sqlTemplate.add("dataset", prefixName(source.getDataset().getName()));
sqlTemplate.add("table", tableName);
sqlTemplate.add("column", rowIdColumn.getName());
sqlTemplate.add("inputVals", rowIds);
// Execute the query building the row id match structure that tracks the matching
// ids and the mismatched ids
RowIdMatch rowIdMatch = new RowIdMatch();
String sql = sqlTemplate.render();
logger.debug("mapValuesToRows sql: " + sql);
TableResult result = bigQueryProject.query(sql);
for (FieldValueList row : result.iterateAll()) {
// Test getting these by name
FieldValue rowId = row.get(0);
FieldValue inputValue = row.get(1);
if (rowId.isNull()) {
rowIdMatch.addMismatch(inputValue.getStringValue());
logger.debug("rowId=<NULL>" + " inVal=" + inputValue.getStringValue());
} else {
rowIdMatch.addMatch(inputValue.getStringValue(), rowId.getStringValue());
logger.debug("rowId=" + rowId.getStringValue() + " inVal=" + inputValue.getStringValue());
}
}
return rowIdMatch;
}
use of bio.terra.common.Table in project jade-data-repo by DataBiosphere.
the class SnapshotService method createSnapshotRelationships.
/**
* Map from a list of source relationships (from a dataset or asset) into snapshot relationships.
*
* @param sourceRelationships relationships from a dataset or asset
* @param snapshotSource source with mapping between dataset tables and columns -> snapshot tables and columns
* @return a list of relationships tied to the snapshot tables
*/
public List<Relationship> createSnapshotRelationships(List<Relationship> sourceRelationships, SnapshotSource snapshotSource) {
// We'll copy the asset relationships into the snapshot.
List<Relationship> snapshotRelationships = new ArrayList<>();
// Create lookups from dataset table and column ids -> snapshot tables and columns, respectively
Map<UUID, Table> tableLookup = new HashMap<>();
Map<UUID, Column> columnLookup = new HashMap<>();
for (SnapshotMapTable mapTable : snapshotSource.getSnapshotMapTables()) {
tableLookup.put(mapTable.getFromTable().getId(), mapTable.getToTable());
for (SnapshotMapColumn mapColumn : mapTable.getSnapshotMapColumns()) {
columnLookup.put(mapColumn.getFromColumn().getId(), mapColumn.getToColumn());
}
}
for (Relationship sourceRelationship : sourceRelationships) {
UUID fromTableId = sourceRelationship.getFromTable().getId();
UUID fromColumnId = sourceRelationship.getFromColumn().getId();
UUID toTableId = sourceRelationship.getToTable().getId();
UUID toColumnId = sourceRelationship.getToColumn().getId();
if (tableLookup.containsKey(fromTableId) && tableLookup.containsKey(toTableId) && columnLookup.containsKey(fromColumnId) && columnLookup.containsKey(toColumnId)) {
Table fromTable = tableLookup.get(fromTableId);
Column fromColumn = columnLookup.get(fromColumnId);
Table toTable = tableLookup.get(toTableId);
Column toColumn = columnLookup.get(toColumnId);
snapshotRelationships.add(new Relationship().name(sourceRelationship.getName()).fromTable(fromTable).fromColumn(fromColumn).toTable(toTable).toColumn(toColumn));
}
}
return snapshotRelationships;
}
use of bio.terra.common.Table in project jade-data-repo by DataBiosphere.
the class SnapshotDaoTest method happyInOutTest.
@Test
public void happyInOutTest() throws Exception {
snapshotRequest.name(snapshotRequest.getName() + UUID.randomUUID().toString());
String flightId = "happyInOutTest_flightId";
Snapshot snapshot = snapshotService.makeSnapshotFromSnapshotRequest(snapshotRequest);
snapshotId = snapshotDao.createAndLock(snapshot, flightId);
snapshotDao.unlock(snapshotId, flightId);
Snapshot fromDB = snapshotDao.retrieveSnapshot(snapshotId);
assertThat("snapshot name set correctly", fromDB.getName(), equalTo(snapshot.getName()));
assertThat("snapshot description set correctly", fromDB.getDescription(), equalTo(snapshot.getDescription()));
assertThat("correct number of tables created", fromDB.getTables().size(), equalTo(2));
assertThat("correct number of sources created", fromDB.getSnapshotSources().size(), equalTo(1));
// verify source and map
SnapshotSource source = fromDB.getSnapshotSources().get(0);
assertThat("source points back to snapshot", source.getSnapshot().getId(), equalTo(snapshot.getId()));
assertThat("source points to the asset spec", source.getAssetSpecification().getId(), equalTo(dataset.getAssetSpecifications().get(0).getId()));
assertThat("correct number of map tables", source.getSnapshotMapTables().size(), equalTo(2));
// Verify map table
SnapshotMapTable mapTable = source.getSnapshotMapTables().stream().filter(t -> t.getFromTable().getName().equals("thetable")).findFirst().orElseThrow(AssertionError::new);
Table datasetTable = dataset.getTables().stream().filter(t -> t.getName().equals("thetable")).findFirst().orElseThrow(AssertionError::new);
Table snapshotTable = snapshot.getTables().stream().filter(t -> t.getName().equals("thetable")).findFirst().orElseThrow(AssertionError::new);
assertThat("correct map table dataset table", mapTable.getFromTable().getId(), equalTo(datasetTable.getId()));
assertThat("correct map table snapshot table", mapTable.getToTable().getId(), equalTo(snapshotTable.getId()));
assertThat("correct number of map columns", mapTable.getSnapshotMapColumns().size(), equalTo(1));
// Verify map columns
SnapshotMapColumn mapColumn = mapTable.getSnapshotMapColumns().get(0);
// Why is dataset columns Collection and not List?
Column datasetColumn = datasetTable.getColumns().iterator().next();
Column snapshotColumn = snapshotTable.getColumns().get(0);
assertThat("correct map column dataset column", mapColumn.getFromColumn().getId(), equalTo(datasetColumn.getId()));
assertThat("correct map column snapshot column", mapColumn.getToColumn().getId(), equalTo(snapshotColumn.getId()));
List<Relationship> relationships = fromDB.getRelationships();
assertThat("a relationship comes back", relationships.size(), equalTo(1));
Relationship relationship = relationships.get(0);
Table fromTable = relationship.getFromTable();
Column fromColumn = relationship.getFromColumn();
Table toTable = relationship.getToTable();
Column toColumn = relationship.getToColumn();
assertThat("from table name matches", fromTable.getName(), equalTo("thetable"));
assertThat("from column name matches", fromColumn.getName(), equalTo("thecolumn"));
assertThat("to table name matches", toTable.getName(), equalTo("anothertable"));
assertThat("to column name matches", toColumn.getName(), equalTo("anothercolumn"));
assertThat("relationship points to the snapshot table", fromTable.getId(), equalTo(snapshotTable.getId()));
}
use of bio.terra.common.Table in project jade-data-repo by DataBiosphere.
the class DatasetJsonConversion method relationshipModelToDatasetRelationship.
public static Relationship relationshipModelToDatasetRelationship(RelationshipModel relationshipModel, Map<String, DatasetTable> tables) {
Table fromTable = tables.get(relationshipModel.getFrom().getTable());
Table toTable = tables.get(relationshipModel.getTo().getTable());
return new Relationship().name(relationshipModel.getName()).fromTable(fromTable).fromColumn(fromTable.getColumnsMap().get(relationshipModel.getFrom().getColumn())).toTable(toTable).toColumn(toTable.getColumnsMap().get(relationshipModel.getTo().getColumn()));
}
Aggregations