Search in sources :

Example 1 with ColumnAttributes

use of org.apache.gobblin.source.extractor.schema.ColumnAttributes in project incubator-gobblin by apache.

the class JdbcExtractor method extractMetadata.

@Override
public void extractMetadata(String schema, String entity, WorkUnit workUnit) throws SchemaException, IOException {
    this.log.info("Extract metadata using JDBC");
    String inputQuery = workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_QUERY);
    if (hasJoinOperation(inputQuery)) {
        throw new RuntimeException("Query across multiple tables not supported");
    }
    String watermarkColumn = workUnitState.getProp(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY);
    this.enableDelimitedIdentifier = workUnitState.getPropAsBoolean(ConfigurationKeys.ENABLE_DELIMITED_IDENTIFIER, ConfigurationKeys.DEFAULT_ENABLE_DELIMITED_IDENTIFIER);
    JsonObject defaultWatermark = this.getDefaultWatermark();
    String derivedWatermarkColumnName = defaultWatermark.get("columnName").getAsString();
    this.setSampleRecordCount(this.exractSampleRecordCountFromQuery(inputQuery));
    inputQuery = this.removeSampleClauseFromQuery(inputQuery);
    JsonArray targetSchema = new JsonArray();
    List<String> headerColumns = new ArrayList<>();
    try {
        List<Command> cmds = this.getSchemaMetadata(schema, entity);
        CommandOutput<?, ?> response = this.executePreparedSql(cmds);
        JsonArray array = this.getSchema(response);
        this.buildMetadataColumnMap(array);
        this.parseInputQuery(inputQuery);
        List<String> sourceColumns = this.getMetadataColumnList();
        for (ColumnAttributes colMap : this.columnAliasMap) {
            String alias = colMap.getAliasName();
            String columnName = colMap.getColumnName();
            String sourceColumnName = colMap.getSourceColumnName();
            if (this.isMetadataColumn(columnName, sourceColumns)) {
                String targetColumnName = this.getTargetColumnName(columnName, alias);
                Schema obj = this.getUpdatedSchemaObject(columnName, alias, targetColumnName);
                String jsonStr = gson.toJson(obj);
                JsonObject jsonObject = gson.fromJson(jsonStr, JsonObject.class).getAsJsonObject();
                targetSchema.add(jsonObject);
                headerColumns.add(targetColumnName);
                sourceColumnName = getLeftDelimitedIdentifier() + sourceColumnName + getRightDelimitedIdentifier();
                this.columnList.add(sourceColumnName);
            }
        }
        if (this.hasMultipleWatermarkColumns(watermarkColumn)) {
            derivedWatermarkColumnName = getLeftDelimitedIdentifier() + derivedWatermarkColumnName + getRightDelimitedIdentifier();
            this.columnList.add(derivedWatermarkColumnName);
            headerColumns.add(derivedWatermarkColumnName);
            targetSchema.add(defaultWatermark);
            this.workUnitState.setProp(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY, derivedWatermarkColumnName);
        }
        String outputColProjection = Joiner.on(",").useForNull("null").join(this.columnList);
        outputColProjection = outputColProjection.replace(derivedWatermarkColumnName, Utils.getCoalesceColumnNames(watermarkColumn) + " AS " + derivedWatermarkColumnName);
        this.setOutputColumnProjection(outputColProjection);
        String extractQuery = this.getExtractQuery(schema, entity, inputQuery);
        this.setHeaderRecord(headerColumns);
        this.setOutputSchema(targetSchema);
        this.setExtractSql(extractQuery);
        // this.workUnit.getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY,
        // this.escapeCharsInColumnName(this.workUnit.getProp(ConfigurationKeys.SOURCE_ENTITY),
        // ConfigurationKeys.ESCAPE_CHARS_IN_COLUMN_NAME, "_"));
        this.log.info("Schema:" + targetSchema);
        this.log.info("Extract query: " + this.getExtractSql());
    } catch (RuntimeException | IOException | SchemaException e) {
        throw new SchemaException("Failed to get metadata using JDBC; error - " + e.getMessage(), e);
    }
}
Also used : SchemaException(org.apache.gobblin.source.extractor.exception.SchemaException) ColumnAttributes(org.apache.gobblin.source.extractor.schema.ColumnAttributes) Schema(org.apache.gobblin.source.extractor.schema.Schema) ArrayList(java.util.ArrayList) JsonObject(com.google.gson.JsonObject) IOException(java.io.IOException) JsonArray(com.google.gson.JsonArray) Command(org.apache.gobblin.source.extractor.extract.Command)

Example 2 with ColumnAttributes

use of org.apache.gobblin.source.extractor.schema.ColumnAttributes in project incubator-gobblin by apache.

the class JdbcExtractor method parseInputQuery.

/**
 * Parse query provided in pull file Set input column projection - column
 * projection in the input query Set columnAlias map - column and its alias
 * mentioned in input query
 *
 * @param query input query
 */
private void parseInputQuery(String query) {
    List<String> projectedColumns = new ArrayList<>();
    if (StringUtils.isNotBlank(query)) {
        String queryLowerCase = query.toLowerCase();
        int startIndex = queryLowerCase.indexOf("select ") + 7;
        int endIndex = queryLowerCase.indexOf(" from ");
        if (startIndex >= 0 && endIndex >= 0) {
            String columnProjection = query.substring(startIndex, endIndex);
            this.setInputColumnProjection(columnProjection);
            // parse the select list
            StringBuffer sb = new StringBuffer();
            int bracketCount = 0;
            for (int i = 0; i < columnProjection.length(); i++) {
                char c = columnProjection.charAt(i);
                if (c == '(') {
                    bracketCount++;
                }
                if (c == ')') {
                    bracketCount--;
                }
                if (bracketCount != 0) {
                    sb.append(c);
                } else {
                    if (c != ',') {
                        sb.append(c);
                    } else {
                        projectedColumns.add(sb.toString());
                        sb = new StringBuffer();
                    }
                }
            }
            projectedColumns.add(sb.toString());
        }
    }
    if (this.isSelectAllColumns()) {
        List<String> columnList = this.getMetadataColumnList();
        for (String columnName : columnList) {
            ColumnAttributes col = new ColumnAttributes();
            col.setColumnName(columnName);
            col.setAliasName(columnName);
            col.setSourceColumnName(columnName);
            this.addToColumnAliasMap(col);
        }
    } else {
        for (String projectedColumn : projectedColumns) {
            String column = projectedColumn.trim();
            String alias = null;
            String sourceColumn = column;
            int spaceOccurences = StringUtils.countMatches(column.trim(), " ");
            if (spaceOccurences > 0) {
                // separate column and alias if they are separated by "as"
                // or space
                int lastSpaceIndex = column.toLowerCase().lastIndexOf(" as ");
                sourceColumn = column.substring(0, lastSpaceIndex);
                alias = column.substring(lastSpaceIndex + 4);
            }
            // extract column name if projection has table name in it
            String columnName = sourceColumn;
            if (sourceColumn.contains(".")) {
                columnName = sourceColumn.substring(sourceColumn.indexOf(".") + 1);
            }
            ColumnAttributes col = new ColumnAttributes();
            col.setColumnName(columnName);
            col.setAliasName(alias);
            col.setSourceColumnName(sourceColumn);
            this.addToColumnAliasMap(col);
        }
    }
}
Also used : ColumnAttributes(org.apache.gobblin.source.extractor.schema.ColumnAttributes) ArrayList(java.util.ArrayList)

Aggregations

ArrayList (java.util.ArrayList)2 ColumnAttributes (org.apache.gobblin.source.extractor.schema.ColumnAttributes)2 JsonArray (com.google.gson.JsonArray)1 JsonObject (com.google.gson.JsonObject)1 IOException (java.io.IOException)1 SchemaException (org.apache.gobblin.source.extractor.exception.SchemaException)1 Command (org.apache.gobblin.source.extractor.extract.Command)1 Schema (org.apache.gobblin.source.extractor.schema.Schema)1