use of org.apache.gobblin.source.extractor.schema.Schema in project incubator-gobblin by apache.
the class SalesforceExtractor method getSchema.
@Override
public JsonArray getSchema(CommandOutput<?, ?> response) throws SchemaException {
log.info("Get schema from salesforce");
String output;
Iterator<String> itr = (Iterator<String>) response.getResults().values().iterator();
if (itr.hasNext()) {
output = itr.next();
} else {
throw new SchemaException("Failed to get schema from salesforce; REST response has no output");
}
JsonArray fieldJsonArray = new JsonArray();
JsonElement element = GSON.fromJson(output, JsonObject.class);
JsonObject jsonObject = element.getAsJsonObject();
try {
JsonArray array = jsonObject.getAsJsonArray("fields");
for (JsonElement columnElement : array) {
JsonObject field = columnElement.getAsJsonObject();
Schema schema = new Schema();
schema.setColumnName(field.get("name").getAsString());
String dataType = field.get("type").getAsString();
String elementDataType = "string";
List<String> mapSymbols = null;
JsonObject newDataType = this.convertDataType(field.get("name").getAsString(), dataType, elementDataType, mapSymbols);
log.debug("ColumnName:" + field.get("name").getAsString() + "; old datatype:" + dataType + "; new datatype:" + newDataType);
schema.setDataType(newDataType);
schema.setLength(field.get("length").getAsLong());
schema.setPrecision(field.get("precision").getAsInt());
schema.setScale(field.get("scale").getAsInt());
schema.setNullable(field.get("nillable").getAsBoolean());
schema.setFormat(null);
schema.setComment((field.get("label").isJsonNull() ? null : field.get("label").getAsString()));
schema.setDefaultValue((field.get("defaultValue").isJsonNull() ? null : field.get("defaultValue").getAsString()));
schema.setUnique(field.get("unique").getAsBoolean());
String jsonStr = GSON.toJson(schema);
JsonObject obj = GSON.fromJson(jsonStr, JsonObject.class).getAsJsonObject();
fieldJsonArray.add(obj);
}
} catch (Exception e) {
throw new SchemaException("Failed to get schema from salesforce; error - " + e.getMessage(), e);
}
return fieldJsonArray;
}
use of org.apache.gobblin.source.extractor.schema.Schema in project incubator-gobblin by apache.
the class JdbcExtractor method getTargetColumnName.
/**
* Get target column name if column is not found in metadata, then name it
* as unknown column If alias is not found, target column is nothing but
* source column
*
* @param sourceColumnName
* @param alias
* @return targetColumnName
*/
private String getTargetColumnName(String sourceColumnName, String alias) {
String targetColumnName = alias;
Schema obj = this.getMetadataColumnMap().get(sourceColumnName.toLowerCase());
if (obj == null) {
targetColumnName = (targetColumnName == null ? "unknown" + this.unknownColumnCounter : targetColumnName);
this.unknownColumnCounter++;
} else {
targetColumnName = (StringUtils.isNotBlank(targetColumnName) ? targetColumnName : sourceColumnName);
}
targetColumnName = this.toCase(targetColumnName);
return Utils.escapeSpecialCharacters(targetColumnName, ConfigurationKeys.ESCAPE_CHARS_IN_COLUMN_NAME, "_");
}
use of org.apache.gobblin.source.extractor.schema.Schema in project incubator-gobblin by apache.
the class JdbcExtractor method getUpdatedSchemaObject.
/**
* Update schema of source column Update column name with target column
* name/alias Update watermark, nullable and primary key flags
*
* @param sourceColumnName
* @param targetColumnName
* @return schema object of a column
*/
private Schema getUpdatedSchemaObject(String sourceColumnName, String alias, String targetColumnName) {
// Check for source column and alias
Schema obj = this.getMetadataColumnMap().get(sourceColumnName.toLowerCase());
if (obj == null && alias != null) {
obj = this.getMetadataColumnMap().get(alias.toLowerCase());
}
if (obj == null) {
obj = getCustomColumnSchema(targetColumnName);
} else {
String watermarkColumn = this.workUnitState.getProp(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY);
String primarykeyColumn = this.workUnitState.getProp(ConfigurationKeys.EXTRACT_PRIMARY_KEY_FIELDS_KEY);
boolean isMultiColumnWatermark = this.hasMultipleWatermarkColumns(watermarkColumn);
obj.setColumnName(targetColumnName);
boolean isWatermarkColumn = this.isWatermarkColumn(watermarkColumn, sourceColumnName);
if (isWatermarkColumn) {
this.updateDeltaFieldConfig(sourceColumnName, targetColumnName);
} else if (alias != null) {
// Check for alias
isWatermarkColumn = this.isWatermarkColumn(watermarkColumn, alias);
this.updateDeltaFieldConfig(alias, targetColumnName);
}
// watermark. Otherwise add a default watermark column in the end
if (!isMultiColumnWatermark) {
obj.setWaterMark(isWatermarkColumn);
}
// columns
if ((isWatermarkColumn && !isMultiColumnWatermark) || this.getPrimarykeyIndex(primarykeyColumn, sourceColumnName) > 0) {
obj.setNullable(false);
} else {
obj.setNullable(true);
}
// set primary key index for all the primary key fields
int primarykeyIndex = this.getPrimarykeyIndex(primarykeyColumn, sourceColumnName);
if (primarykeyIndex > 0 && (!sourceColumnName.equalsIgnoreCase(targetColumnName))) {
this.updatePrimaryKeyConfig(sourceColumnName, targetColumnName);
}
obj.setPrimaryKey(primarykeyIndex);
}
return obj;
}
use of org.apache.gobblin.source.extractor.schema.Schema in project incubator-gobblin by apache.
the class JdbcExtractor method getCustomColumnSchema.
/**
* Schema of a custom column - required if column not found in metadata
*
* @return column schema
*/
private Schema getCustomColumnSchema(String columnName) {
Schema schema = new Schema();
String dataType = "string";
schema.setColumnName(columnName);
String elementDataType = "string";
List<String> mapSymbols = null;
JsonObject newDataType = this.convertDataType(columnName, dataType, elementDataType, mapSymbols);
schema.setDataType(newDataType);
schema.setWaterMark(false);
schema.setPrimaryKey(0);
schema.setLength(0);
schema.setPrecision(0);
schema.setScale(0);
schema.setNullable(true);
schema.setFormat(null);
schema.setComment("Custom column");
schema.setDefaultValue(null);
schema.setUnique(false);
return schema;
}
use of org.apache.gobblin.source.extractor.schema.Schema in project incubator-gobblin by apache.
the class TeradataExtractor method getSchema.
@Override
public JsonArray getSchema(CommandOutput<?, ?> response) throws SchemaException, IOException {
log.debug("Extract schema from resultset");
ResultSet resultset = null;
Iterator<ResultSet> itr = (Iterator<ResultSet>) response.getResults().values().iterator();
if (itr.hasNext()) {
resultset = itr.next();
} else {
throw new SchemaException("Failed to get schema from Teradata - empty schema resultset");
}
JsonArray fieldJsonArray = new JsonArray();
try {
Schema schema = new Schema();
ResultSetMetaData rsmd = resultset.getMetaData();
String columnName, columnTypeName;
for (int i = 1; i <= rsmd.getColumnCount(); i++) {
columnName = rsmd.getColumnName(i);
columnTypeName = rsmd.getColumnTypeName(i);
schema.setColumnName(columnName);
List<String> mapSymbols = null;
JsonObject newDataType = this.convertDataType(columnName, columnTypeName, ELEMENT_DATA_TYPE, mapSymbols);
schema.setDataType(newDataType);
schema.setLength(rsmd.getColumnDisplaySize(i));
schema.setPrecision(rsmd.getPrecision(i));
schema.setScale(rsmd.getScale(i));
schema.setNullable(rsmd.isNullable(i) == ResultSetMetaData.columnNullable);
schema.setComment(rsmd.getColumnLabel(i));
String jsonStr = gson.toJson(schema);
JsonObject obj = gson.fromJson(jsonStr, JsonObject.class).getAsJsonObject();
fieldJsonArray.add(obj);
}
} catch (Exception e) {
throw new SchemaException("Failed to get schema from Teradaa; error - " + e.getMessage(), e);
}
return fieldJsonArray;
}
Aggregations