use of bio.terra.common.Column in project jade-data-repo by DataBiosphere.
the class DatasetJsonConversion method tableModelToTable.
public static DatasetTable tableModelToTable(TableModel tableModel) {
Map<String, Column> columnMap = new HashMap<>();
List<Column> columns = new ArrayList<>();
DatasetTable datasetTable = new DatasetTable().name(tableModel.getName());
for (ColumnModel columnModel : tableModel.getColumns()) {
Column column = columnModelToDatasetColumn(columnModel).table(datasetTable);
columnMap.put(column.getName(), column);
columns.add(column);
}
List<Column> primaryKeyColumns = Optional.ofNullable(tableModel.getPrimaryKey()).orElse(Collections.emptyList()).stream().map(columnMap::get).collect(Collectors.toList());
datasetTable.primaryKey(primaryKeyColumns);
BigQueryPartitionConfigV1 partitionConfig;
switch(tableModel.getPartitionMode()) {
case DATE:
String column = tableModel.getDatePartitionOptions().getColumn();
boolean useIngestDate = column.equals(PdaoConstant.PDAO_INGEST_DATE_COLUMN_ALIAS);
partitionConfig = useIngestDate ? BigQueryPartitionConfigV1.ingestDate() : BigQueryPartitionConfigV1.date(column);
break;
case INT:
IntPartitionOptionsModel options = tableModel.getIntPartitionOptions();
partitionConfig = BigQueryPartitionConfigV1.intRange(options.getColumn(), options.getMin(), options.getMax(), options.getInterval());
break;
default:
partitionConfig = BigQueryPartitionConfigV1.none();
break;
}
return datasetTable.bigQueryPartitionConfig(partitionConfig).columns(columns);
}
use of bio.terra.common.Column in project jade-data-repo by DataBiosphere.
the class IngestValidateRefsStep method doStep.
@Override
public StepResult doStep(FlightContext context) throws InterruptedException {
Dataset dataset = IngestUtils.getDataset(context, datasetService);
Table table = IngestUtils.getDatasetTable(context, dataset);
String stagingTableName = IngestUtils.getStagingTableName(context);
// For each fileref column, scan the staging table and build an array of file ids
// Then probe the file system to validate that the file exists and is part
// of this dataset. We check all ids and return one complete error.
List<String> invalidRefIds = new ArrayList<>();
for (Column column : table.getColumns()) {
if (StringUtils.equalsIgnoreCase(column.getType(), "FILEREF")) {
List<String> refIdArray = bigQueryPdao.getRefIds(dataset, stagingTableName, column);
List<String> badRefIds = fileDao.validateRefIds(dataset, refIdArray);
if (badRefIds != null) {
invalidRefIds.addAll(badRefIds);
}
}
}
int invalidIdCount = invalidRefIds.size();
if (invalidIdCount != 0) {
// Made a string buffer to appease findbugs; it saw + in the loop and said "bad!"
StringBuffer errorMessage = new StringBuffer("Invalid file ids found during ingest (");
List<String> errorDetails = new ArrayList<>();
int count = 0;
for (String badId : invalidRefIds) {
errorDetails.add(badId);
count++;
if (count > MAX_ERROR_REF_IDS) {
errorMessage.append(MAX_ERROR_REF_IDS + "out of ");
break;
}
}
errorMessage.append(invalidIdCount + " returned in details)");
throw new InvalidFileRefException(errorMessage.toString(), errorDetails);
}
return StepResult.getStepResultSuccess();
}
use of bio.terra.common.Column in project jade-data-repo by DataBiosphere.
the class BigQueryPdao method matchRowIds.
// for each table in a dataset (source), collect row id matches ON the row id
public RowIdMatch matchRowIds(Snapshot snapshot, SnapshotSource source, String tableName, List<String> rowIds) throws InterruptedException {
// One source: grab it and navigate to the relevant parts
BigQueryProject bigQueryProject = bigQueryProjectForSnapshot(snapshot);
Optional<SnapshotMapTable> optTable = source.getSnapshotMapTables().stream().filter(table -> table.getFromTable().getName().equals(tableName)).findFirst();
// create a column to point to the row id column in the source table to check that passed row ids exist in it
Column rowIdColumn = new Column().table(optTable.get().getFromTable()).name(PDAO_ROW_ID_COLUMN);
ST sqlTemplate = new ST(mapValuesToRowsTemplate);
sqlTemplate.add("project", bigQueryProject.getProjectId());
sqlTemplate.add("dataset", prefixName(source.getDataset().getName()));
sqlTemplate.add("table", tableName);
sqlTemplate.add("column", rowIdColumn.getName());
sqlTemplate.add("inputVals", rowIds);
// Execute the query building the row id match structure that tracks the matching
// ids and the mismatched ids
RowIdMatch rowIdMatch = new RowIdMatch();
String sql = sqlTemplate.render();
logger.debug("mapValuesToRows sql: " + sql);
TableResult result = bigQueryProject.query(sql);
for (FieldValueList row : result.iterateAll()) {
// Test getting these by name
FieldValue rowId = row.get(0);
FieldValue inputValue = row.get(1);
if (rowId.isNull()) {
rowIdMatch.addMismatch(inputValue.getStringValue());
logger.debug("rowId=<NULL>" + " inVal=" + inputValue.getStringValue());
} else {
rowIdMatch.addMatch(inputValue.getStringValue(), rowId.getStringValue());
logger.debug("rowId=" + rowId.getStringValue() + " inVal=" + inputValue.getStringValue());
}
}
return rowIdMatch;
}
use of bio.terra.common.Column in project jade-data-repo by DataBiosphere.
the class BigQueryPdao method buildSchema.
private Schema buildSchema(DatasetTable table, boolean addRowIdColumn) {
List<Field> fieldList = new ArrayList<>();
List<String> primaryKeys = table.getPrimaryKey().stream().map(Column::getName).collect(Collectors.toList());
if (addRowIdColumn) {
fieldList.add(Field.of(PDAO_ROW_ID_COLUMN, LegacySQLTypeName.STRING));
}
for (Column column : table.getColumns()) {
Field.Mode mode;
if (primaryKeys.contains(column.getName())) {
mode = Field.Mode.REQUIRED;
} else if (column.isArrayOf()) {
mode = Field.Mode.REPEATED;
} else {
mode = Field.Mode.NULLABLE;
}
Field fieldSpec = Field.newBuilder(column.getName(), translateType(column.getType())).setMode(mode).build();
fieldList.add(fieldSpec);
}
return Schema.of(fieldList);
}
use of bio.terra.common.Column in project jade-data-repo by DataBiosphere.
the class BigQueryPdao method buildLiveView.
private TableInfo buildLiveView(String bigQueryProject, String datasetName, DatasetTable table) {
ST liveViewSql = new ST(liveViewTemplate);
liveViewSql.add("project", bigQueryProject);
liveViewSql.add("dataset", datasetName);
liveViewSql.add("rawTable", table.getRawTableName());
liveViewSql.add("sdTable", table.getSoftDeleteTableName());
liveViewSql.add("columns", PDAO_ROW_ID_COLUMN);
liveViewSql.add("columns", table.getColumns().stream().map(Column::getName).collect(Collectors.toList()));
if (table.getBigQueryPartitionConfig().getMode() == BigQueryPartitionConfigV1.Mode.INGEST_DATE) {
liveViewSql.add("columns", "_PARTITIONDATE AS " + PdaoConstant.PDAO_INGEST_DATE_COLUMN_ALIAS);
}
TableId liveViewId = TableId.of(datasetName, table.getName());
return TableInfo.of(liveViewId, ViewDefinition.of(liveViewSql.render()));
}
Aggregations