use of org.apache.gobblin.source.extractor.schema.ColumnAttributes in project incubator-gobblin by apache.
the class JdbcExtractor method extractMetadata.
@Override
public void extractMetadata(String schema, String entity, WorkUnit workUnit) throws SchemaException, IOException {
this.log.info("Extract metadata using JDBC");
String inputQuery = workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_QUERY);
if (hasJoinOperation(inputQuery)) {
throw new RuntimeException("Query across multiple tables not supported");
}
String watermarkColumn = workUnitState.getProp(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY);
this.enableDelimitedIdentifier = workUnitState.getPropAsBoolean(ConfigurationKeys.ENABLE_DELIMITED_IDENTIFIER, ConfigurationKeys.DEFAULT_ENABLE_DELIMITED_IDENTIFIER);
JsonObject defaultWatermark = this.getDefaultWatermark();
String derivedWatermarkColumnName = defaultWatermark.get("columnName").getAsString();
this.setSampleRecordCount(this.exractSampleRecordCountFromQuery(inputQuery));
inputQuery = this.removeSampleClauseFromQuery(inputQuery);
JsonArray targetSchema = new JsonArray();
List<String> headerColumns = new ArrayList<>();
try {
List<Command> cmds = this.getSchemaMetadata(schema, entity);
CommandOutput<?, ?> response = this.executePreparedSql(cmds);
JsonArray array = this.getSchema(response);
this.buildMetadataColumnMap(array);
this.parseInputQuery(inputQuery);
List<String> sourceColumns = this.getMetadataColumnList();
for (ColumnAttributes colMap : this.columnAliasMap) {
String alias = colMap.getAliasName();
String columnName = colMap.getColumnName();
String sourceColumnName = colMap.getSourceColumnName();
if (this.isMetadataColumn(columnName, sourceColumns)) {
String targetColumnName = this.getTargetColumnName(columnName, alias);
Schema obj = this.getUpdatedSchemaObject(columnName, alias, targetColumnName);
String jsonStr = gson.toJson(obj);
JsonObject jsonObject = gson.fromJson(jsonStr, JsonObject.class).getAsJsonObject();
targetSchema.add(jsonObject);
headerColumns.add(targetColumnName);
sourceColumnName = getLeftDelimitedIdentifier() + sourceColumnName + getRightDelimitedIdentifier();
this.columnList.add(sourceColumnName);
}
}
if (this.hasMultipleWatermarkColumns(watermarkColumn)) {
derivedWatermarkColumnName = getLeftDelimitedIdentifier() + derivedWatermarkColumnName + getRightDelimitedIdentifier();
this.columnList.add(derivedWatermarkColumnName);
headerColumns.add(derivedWatermarkColumnName);
targetSchema.add(defaultWatermark);
this.workUnitState.setProp(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY, derivedWatermarkColumnName);
}
String outputColProjection = Joiner.on(",").useForNull("null").join(this.columnList);
outputColProjection = outputColProjection.replace(derivedWatermarkColumnName, Utils.getCoalesceColumnNames(watermarkColumn) + " AS " + derivedWatermarkColumnName);
this.setOutputColumnProjection(outputColProjection);
String extractQuery = this.getExtractQuery(schema, entity, inputQuery);
this.setHeaderRecord(headerColumns);
this.setOutputSchema(targetSchema);
this.setExtractSql(extractQuery);
// this.workUnit.getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY,
// this.escapeCharsInColumnName(this.workUnit.getProp(ConfigurationKeys.SOURCE_ENTITY),
// ConfigurationKeys.ESCAPE_CHARS_IN_COLUMN_NAME, "_"));
this.log.info("Schema:" + targetSchema);
this.log.info("Extract query: " + this.getExtractSql());
} catch (RuntimeException | IOException | SchemaException e) {
throw new SchemaException("Failed to get metadata using JDBC; error - " + e.getMessage(), e);
}
}
use of org.apache.gobblin.source.extractor.schema.ColumnAttributes in project incubator-gobblin by apache.
the class JdbcExtractor method parseInputQuery.
/**
* Parse query provided in pull file Set input column projection - column
* projection in the input query Set columnAlias map - column and its alias
* mentioned in input query
*
* @param query input query
*/
private void parseInputQuery(String query) {
List<String> projectedColumns = new ArrayList<>();
if (StringUtils.isNotBlank(query)) {
String queryLowerCase = query.toLowerCase();
int startIndex = queryLowerCase.indexOf("select ") + 7;
int endIndex = queryLowerCase.indexOf(" from ");
if (startIndex >= 0 && endIndex >= 0) {
String columnProjection = query.substring(startIndex, endIndex);
this.setInputColumnProjection(columnProjection);
// parse the select list
StringBuffer sb = new StringBuffer();
int bracketCount = 0;
for (int i = 0; i < columnProjection.length(); i++) {
char c = columnProjection.charAt(i);
if (c == '(') {
bracketCount++;
}
if (c == ')') {
bracketCount--;
}
if (bracketCount != 0) {
sb.append(c);
} else {
if (c != ',') {
sb.append(c);
} else {
projectedColumns.add(sb.toString());
sb = new StringBuffer();
}
}
}
projectedColumns.add(sb.toString());
}
}
if (this.isSelectAllColumns()) {
List<String> columnList = this.getMetadataColumnList();
for (String columnName : columnList) {
ColumnAttributes col = new ColumnAttributes();
col.setColumnName(columnName);
col.setAliasName(columnName);
col.setSourceColumnName(columnName);
this.addToColumnAliasMap(col);
}
} else {
for (String projectedColumn : projectedColumns) {
String column = projectedColumn.trim();
String alias = null;
String sourceColumn = column;
int spaceOccurences = StringUtils.countMatches(column.trim(), " ");
if (spaceOccurences > 0) {
// separate column and alias if they are separated by "as"
// or space
int lastSpaceIndex = column.toLowerCase().lastIndexOf(" as ");
sourceColumn = column.substring(0, lastSpaceIndex);
alias = column.substring(lastSpaceIndex + 4);
}
// extract column name if projection has table name in it
String columnName = sourceColumn;
if (sourceColumn.contains(".")) {
columnName = sourceColumn.substring(sourceColumn.indexOf(".") + 1);
}
ColumnAttributes col = new ColumnAttributes();
col.setColumnName(columnName);
col.setAliasName(alias);
col.setSourceColumnName(sourceColumn);
this.addToColumnAliasMap(col);
}
}
}
Aggregations