Search in sources :

Example 6 with Schema

use of org.apache.gobblin.source.extractor.schema.Schema in project incubator-gobblin by apache.

the class SalesforceExtractor method getSchema.

@Override
public JsonArray getSchema(CommandOutput<?, ?> response) throws SchemaException {
    log.info("Get schema from salesforce");
    String output;
    Iterator<String> itr = (Iterator<String>) response.getResults().values().iterator();
    if (itr.hasNext()) {
        output = itr.next();
    } else {
        throw new SchemaException("Failed to get schema from salesforce; REST response has no output");
    }
    JsonArray fieldJsonArray = new JsonArray();
    JsonElement element = GSON.fromJson(output, JsonObject.class);
    JsonObject jsonObject = element.getAsJsonObject();
    try {
        JsonArray array = jsonObject.getAsJsonArray("fields");
        for (JsonElement columnElement : array) {
            JsonObject field = columnElement.getAsJsonObject();
            Schema schema = new Schema();
            schema.setColumnName(field.get("name").getAsString());
            String dataType = field.get("type").getAsString();
            String elementDataType = "string";
            List<String> mapSymbols = null;
            JsonObject newDataType = this.convertDataType(field.get("name").getAsString(), dataType, elementDataType, mapSymbols);
            log.debug("ColumnName:" + field.get("name").getAsString() + ";   old datatype:" + dataType + ";   new datatype:" + newDataType);
            schema.setDataType(newDataType);
            schema.setLength(field.get("length").getAsLong());
            schema.setPrecision(field.get("precision").getAsInt());
            schema.setScale(field.get("scale").getAsInt());
            schema.setNullable(field.get("nillable").getAsBoolean());
            schema.setFormat(null);
            schema.setComment((field.get("label").isJsonNull() ? null : field.get("label").getAsString()));
            schema.setDefaultValue((field.get("defaultValue").isJsonNull() ? null : field.get("defaultValue").getAsString()));
            schema.setUnique(field.get("unique").getAsBoolean());
            String jsonStr = GSON.toJson(schema);
            JsonObject obj = GSON.fromJson(jsonStr, JsonObject.class).getAsJsonObject();
            fieldJsonArray.add(obj);
        }
    } catch (Exception e) {
        throw new SchemaException("Failed to get schema from salesforce; error - " + e.getMessage(), e);
    }
    return fieldJsonArray;
}
Also used : JsonArray(com.google.gson.JsonArray) SchemaException(org.apache.gobblin.source.extractor.exception.SchemaException) JsonElement(com.google.gson.JsonElement) Schema(org.apache.gobblin.source.extractor.schema.Schema) ListIterator(java.util.ListIterator) Iterator(java.util.Iterator) JsonObject(com.google.gson.JsonObject) ParseException(java.text.ParseException) RecordCountException(org.apache.gobblin.source.extractor.exception.RecordCountException) AsyncApiException(com.sforce.async.AsyncApiException) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException) SchemaException(org.apache.gobblin.source.extractor.exception.SchemaException) RestApiClientException(org.apache.gobblin.source.extractor.exception.RestApiClientException) IOException(java.io.IOException) HighWatermarkException(org.apache.gobblin.source.extractor.exception.HighWatermarkException) RestApiConnectionException(org.apache.gobblin.source.extractor.exception.RestApiConnectionException)

Example 7 with Schema

use of org.apache.gobblin.source.extractor.schema.Schema in project incubator-gobblin by apache.

the class JdbcExtractor method getTargetColumnName.

/**
 * Get target column name if column is not found in metadata, then name it
 * as unknown column If alias is not found, target column is nothing but
 * source column
 *
 * @param sourceColumnName
 * @param alias
 * @return targetColumnName
 */
private String getTargetColumnName(String sourceColumnName, String alias) {
    String targetColumnName = alias;
    Schema obj = this.getMetadataColumnMap().get(sourceColumnName.toLowerCase());
    if (obj == null) {
        targetColumnName = (targetColumnName == null ? "unknown" + this.unknownColumnCounter : targetColumnName);
        this.unknownColumnCounter++;
    } else {
        targetColumnName = (StringUtils.isNotBlank(targetColumnName) ? targetColumnName : sourceColumnName);
    }
    targetColumnName = this.toCase(targetColumnName);
    return Utils.escapeSpecialCharacters(targetColumnName, ConfigurationKeys.ESCAPE_CHARS_IN_COLUMN_NAME, "_");
}
Also used : Schema(org.apache.gobblin.source.extractor.schema.Schema)

Example 8 with Schema

use of org.apache.gobblin.source.extractor.schema.Schema in project incubator-gobblin by apache.

the class JdbcExtractor method getUpdatedSchemaObject.

/**
 * Update schema of source column Update column name with target column
 * name/alias Update watermark, nullable and primary key flags
 *
 * @param sourceColumnName
 * @param targetColumnName
 * @return schema object of a column
 */
private Schema getUpdatedSchemaObject(String sourceColumnName, String alias, String targetColumnName) {
    // Check for source column and alias
    Schema obj = this.getMetadataColumnMap().get(sourceColumnName.toLowerCase());
    if (obj == null && alias != null) {
        obj = this.getMetadataColumnMap().get(alias.toLowerCase());
    }
    if (obj == null) {
        obj = getCustomColumnSchema(targetColumnName);
    } else {
        String watermarkColumn = this.workUnitState.getProp(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY);
        String primarykeyColumn = this.workUnitState.getProp(ConfigurationKeys.EXTRACT_PRIMARY_KEY_FIELDS_KEY);
        boolean isMultiColumnWatermark = this.hasMultipleWatermarkColumns(watermarkColumn);
        obj.setColumnName(targetColumnName);
        boolean isWatermarkColumn = this.isWatermarkColumn(watermarkColumn, sourceColumnName);
        if (isWatermarkColumn) {
            this.updateDeltaFieldConfig(sourceColumnName, targetColumnName);
        } else if (alias != null) {
            // Check for alias
            isWatermarkColumn = this.isWatermarkColumn(watermarkColumn, alias);
            this.updateDeltaFieldConfig(alias, targetColumnName);
        }
        // watermark. Otherwise add a default watermark column in the end
        if (!isMultiColumnWatermark) {
            obj.setWaterMark(isWatermarkColumn);
        }
        // columns
        if ((isWatermarkColumn && !isMultiColumnWatermark) || this.getPrimarykeyIndex(primarykeyColumn, sourceColumnName) > 0) {
            obj.setNullable(false);
        } else {
            obj.setNullable(true);
        }
        // set primary key index for all the primary key fields
        int primarykeyIndex = this.getPrimarykeyIndex(primarykeyColumn, sourceColumnName);
        if (primarykeyIndex > 0 && (!sourceColumnName.equalsIgnoreCase(targetColumnName))) {
            this.updatePrimaryKeyConfig(sourceColumnName, targetColumnName);
        }
        obj.setPrimaryKey(primarykeyIndex);
    }
    return obj;
}
Also used : Schema(org.apache.gobblin.source.extractor.schema.Schema)

Example 9 with Schema

use of org.apache.gobblin.source.extractor.schema.Schema in project incubator-gobblin by apache.

the class JdbcExtractor method getCustomColumnSchema.

/**
 * Schema of a custom column - required if column not found in metadata
 *
 * @return column schema
 */
private Schema getCustomColumnSchema(String columnName) {
    Schema schema = new Schema();
    String dataType = "string";
    schema.setColumnName(columnName);
    String elementDataType = "string";
    List<String> mapSymbols = null;
    JsonObject newDataType = this.convertDataType(columnName, dataType, elementDataType, mapSymbols);
    schema.setDataType(newDataType);
    schema.setWaterMark(false);
    schema.setPrimaryKey(0);
    schema.setLength(0);
    schema.setPrecision(0);
    schema.setScale(0);
    schema.setNullable(true);
    schema.setFormat(null);
    schema.setComment("Custom column");
    schema.setDefaultValue(null);
    schema.setUnique(false);
    return schema;
}
Also used : Schema(org.apache.gobblin.source.extractor.schema.Schema) JsonObject(com.google.gson.JsonObject)

Example 10 with Schema

use of org.apache.gobblin.source.extractor.schema.Schema in project incubator-gobblin by apache.

the class TeradataExtractor method getSchema.

@Override
public JsonArray getSchema(CommandOutput<?, ?> response) throws SchemaException, IOException {
    log.debug("Extract schema from resultset");
    ResultSet resultset = null;
    Iterator<ResultSet> itr = (Iterator<ResultSet>) response.getResults().values().iterator();
    if (itr.hasNext()) {
        resultset = itr.next();
    } else {
        throw new SchemaException("Failed to get schema from Teradata - empty schema resultset");
    }
    JsonArray fieldJsonArray = new JsonArray();
    try {
        Schema schema = new Schema();
        ResultSetMetaData rsmd = resultset.getMetaData();
        String columnName, columnTypeName;
        for (int i = 1; i <= rsmd.getColumnCount(); i++) {
            columnName = rsmd.getColumnName(i);
            columnTypeName = rsmd.getColumnTypeName(i);
            schema.setColumnName(columnName);
            List<String> mapSymbols = null;
            JsonObject newDataType = this.convertDataType(columnName, columnTypeName, ELEMENT_DATA_TYPE, mapSymbols);
            schema.setDataType(newDataType);
            schema.setLength(rsmd.getColumnDisplaySize(i));
            schema.setPrecision(rsmd.getPrecision(i));
            schema.setScale(rsmd.getScale(i));
            schema.setNullable(rsmd.isNullable(i) == ResultSetMetaData.columnNullable);
            schema.setComment(rsmd.getColumnLabel(i));
            String jsonStr = gson.toJson(schema);
            JsonObject obj = gson.fromJson(jsonStr, JsonObject.class).getAsJsonObject();
            fieldJsonArray.add(obj);
        }
    } catch (Exception e) {
        throw new SchemaException("Failed to get schema from Teradaa; error - " + e.getMessage(), e);
    }
    return fieldJsonArray;
}
Also used : JsonArray(com.google.gson.JsonArray) ResultSetMetaData(java.sql.ResultSetMetaData) SchemaException(org.apache.gobblin.source.extractor.exception.SchemaException) Schema(org.apache.gobblin.source.extractor.schema.Schema) ResultSet(java.sql.ResultSet) Iterator(java.util.Iterator) JsonObject(com.google.gson.JsonObject) SchemaException(org.apache.gobblin.source.extractor.exception.SchemaException) IOException(java.io.IOException) HighWatermarkException(org.apache.gobblin.source.extractor.exception.HighWatermarkException) RecordCountException(org.apache.gobblin.source.extractor.exception.RecordCountException) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException)

Aggregations

Schema (org.apache.gobblin.source.extractor.schema.Schema)10 JsonObject (com.google.gson.JsonObject)7 JsonArray (com.google.gson.JsonArray)5 IOException (java.io.IOException)5 SchemaException (org.apache.gobblin.source.extractor.exception.SchemaException)5 JsonElement (com.google.gson.JsonElement)3 Iterator (java.util.Iterator)3 DataRecordException (org.apache.gobblin.source.extractor.DataRecordException)3 HighWatermarkException (org.apache.gobblin.source.extractor.exception.HighWatermarkException)3 RecordCountException (org.apache.gobblin.source.extractor.exception.RecordCountException)3 ResultSet (java.sql.ResultSet)2 ParseException (java.text.ParseException)2 RestApiConnectionException (org.apache.gobblin.source.extractor.exception.RestApiConnectionException)2 Command (org.apache.gobblin.source.extractor.extract.Command)2 Splitter (com.google.common.base.Splitter)1 AsyncApiException (com.sforce.async.AsyncApiException)1 ResultSetMetaData (java.sql.ResultSetMetaData)1 SQLException (java.sql.SQLException)1 ArrayList (java.util.ArrayList)1 ListIterator (java.util.ListIterator)1