use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class UnstructuredStorageReaderUtil method validateColumn.
public static void validateColumn(Configuration readerConfiguration) {
// column: 1. index type 2.value type 3.when type is Date, may have
// format
List<Configuration> columns = readerConfiguration.getListConfiguration(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COLUMN);
if (null == columns || columns.size() == 0) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.REQUIRED_VALUE, "您需要指定 columns");
}
// handle ["*"]
if (null != columns && 1 == columns.size()) {
String columnsInStr = columns.get(0).toString();
if ("\"*\"".equals(columnsInStr) || "'*'".equals(columnsInStr)) {
readerConfiguration.set(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COLUMN, null);
columns = null;
}
}
if (null != columns && columns.size() != 0) {
for (Configuration eachColumnConf : columns) {
eachColumnConf.getNecessaryValue(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.TYPE, UnstructuredStorageReaderErrorCode.REQUIRED_VALUE);
Integer columnIndex = eachColumnConf.getInt(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.INDEX);
String columnValue = eachColumnConf.getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.VALUE);
if (null == columnIndex && null == columnValue) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.NO_INDEX_VALUE, "由于您配置了type, 则至少需要配置 index 或 value");
}
if (null != columnIndex && null != columnValue) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.MIXED_INDEX_VALUE, "您混合配置了index, value, 每一列同时仅能选择其中一种");
}
if (null != columnIndex && columnIndex < 0) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.ILLEGAL_VALUE, String.format("index需要大于等于0, 您配置的index为[%s]", columnIndex));
}
}
}
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class OriginalConfPretreatmentUtil method dealJdbcAndTable.
private static void dealJdbcAndTable(Configuration originalConfig) {
String username = originalConfig.getString(Key.USERNAME);
String password = originalConfig.getString(Key.PASSWORD);
boolean checkSlave = originalConfig.getBool(Key.CHECK_SLAVE, false);
boolean isTableMode = originalConfig.getBool(Constant.IS_TABLE_MODE);
boolean isPreCheck = originalConfig.getBool(Key.DRYRUN, false);
List<Object> conns = originalConfig.getList(Constant.CONN_MARK, Object.class);
List<String> preSql = originalConfig.getList(Key.PRE_SQL, String.class);
int tableNum = 0;
for (int i = 0, len = conns.size(); i < len; i++) {
Configuration connConf = Configuration.from(conns.get(i).toString());
connConf.getNecessaryValue(Key.JDBC_URL, DBUtilErrorCode.REQUIRED_VALUE);
List<String> jdbcUrls = connConf.getList(Key.JDBC_URL, String.class);
String jdbcUrl;
if (isPreCheck) {
jdbcUrl = DBUtil.chooseJdbcUrlWithoutRetry(DATABASE_TYPE, jdbcUrls, username, password, preSql, checkSlave);
} else {
jdbcUrl = DBUtil.chooseJdbcUrl(DATABASE_TYPE, jdbcUrls, username, password, preSql, checkSlave);
}
jdbcUrl = DATABASE_TYPE.appendJDBCSuffixForReader(jdbcUrl);
// 回写到connection[i].jdbcUrl
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.JDBC_URL), jdbcUrl);
LOG.info("Available jdbcUrl:{}.", jdbcUrl);
if (isTableMode) {
// table 方式
// 对每一个connection 上配置的table 项进行解析(已对表名称进行了 ` 处理的)
List<String> tables = connConf.getList(Key.TABLE, String.class);
List<String> expandedTables = TableExpandUtil.expandTableConf(DATABASE_TYPE, tables);
if (null == expandedTables || expandedTables.isEmpty()) {
throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE, String.format("您所配置的读取数据库表:%s 不正确. 因为DataX根据您的配置找不到这张表. 请检查您的配置并作出修改." + "请先了解 DataX 配置.", StringUtils.join(tables, ",")));
}
tableNum += expandedTables.size();
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.TABLE), expandedTables);
} else {
// 说明是配置的 querySql 方式,不做处理.
}
}
originalConfig.set(Constant.TABLE_NUMBER_MARK, tableNum);
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class HdfsHelper method getColumnTypeInspectors.
/**
* 根据writer配置的字段类型,构建inspector
* @param columns
* @return
*/
public List<ObjectInspector> getColumnTypeInspectors(List<Configuration> columns) {
List<ObjectInspector> columnTypeInspectors = Lists.newArrayList();
for (Configuration eachColumnConf : columns) {
SupportHiveDataType columnType = SupportHiveDataType.valueOf(eachColumnConf.getString(Key.TYPE).toUpperCase());
ObjectInspector objectInspector = null;
switch(columnType) {
case TINYINT:
objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(Byte.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
break;
case SMALLINT:
objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(Short.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
break;
case INT:
objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
break;
case BIGINT:
objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
break;
case FLOAT:
objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(Float.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
break;
case DOUBLE:
objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(Double.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
break;
case TIMESTAMP:
objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(java.sql.Timestamp.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
break;
case DATE:
objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(java.sql.Date.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
break;
case STRING:
case VARCHAR:
case CHAR:
objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(String.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
break;
case BOOLEAN:
objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(Boolean.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
break;
default:
throw DataXException.asDataXException(HdfsWriterErrorCode.ILLEGAL_VALUE, String.format("您的配置文件中的列配置信息有误. 因为DataX 不支持数据库写入这种字段类型. 字段名:[%s], 字段类型:[%d]. 请修改表中该字段的类型或者不同步该字段.", eachColumnConf.getString(Key.NAME), eachColumnConf.getString(Key.TYPE)));
}
columnTypeInspectors.add(objectInspector);
}
return columnTypeInspectors;
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class ReaderSplitUtil method doSplit.
public static List<Configuration> doSplit(Configuration originalSliceConfig, int adviceNumber) {
boolean isTableMode = originalSliceConfig.getBool(Constant.IS_TABLE_MODE).booleanValue();
int eachTableShouldSplittedNumber = -1;
if (isTableMode) {
// adviceNumber这里是channel数量大小, 即datax并发task数量
// eachTableShouldSplittedNumber是单表应该切分的份数, 向上取整可能和adviceNumber没有比例关系了已经
eachTableShouldSplittedNumber = calculateEachTableShouldSplittedNumber(adviceNumber, originalSliceConfig.getInt(Constant.TABLE_NUMBER_MARK));
}
String column = originalSliceConfig.getString(Key.COLUMN);
String where = originalSliceConfig.getString(Key.WHERE, null);
List<Object> conns = originalSliceConfig.getList(Constant.CONN_MARK, Object.class);
List<Configuration> splittedConfigs = new ArrayList<Configuration>();
for (int i = 0, len = conns.size(); i < len; i++) {
Configuration sliceConfig = originalSliceConfig.clone();
Configuration connConf = Configuration.from(conns.get(i).toString());
String jdbcUrl = connConf.getString(Key.JDBC_URL);
sliceConfig.set(Key.JDBC_URL, jdbcUrl);
// 抽取 jdbcUrl 中的 ip/port 进行资源使用的打标,以提供给 core 做有意义的 shuffle 操作
sliceConfig.set(CommonConstant.LOAD_BALANCE_RESOURCE_MARK, DataBaseType.parseIpFromJdbcUrl(jdbcUrl));
sliceConfig.remove(Constant.CONN_MARK);
Configuration tempSlice;
// 说明是配置的 table 方式
if (isTableMode) {
// 已在之前进行了扩展和`处理,可以直接使用
List<String> tables = connConf.getList(Key.TABLE, String.class);
Validate.isTrue(null != tables && !tables.isEmpty(), "您读取数据库表配置错误.");
String splitPk = originalSliceConfig.getString(Key.SPLIT_PK, null);
//最终切分份数不一定等于 eachTableShouldSplittedNumber
boolean needSplitTable = eachTableShouldSplittedNumber > 1 && StringUtils.isNotBlank(splitPk);
if (needSplitTable) {
if (tables.size() == 1) {
//原来:如果是单表的,主键切分num=num*2+1
// splitPk is null这类的情况的数据量本身就比真实数据量少很多, 和channel大小比率关系时,不建议考虑
//eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 2 + 1;// 不应该加1导致长尾
//考虑其他比率数字?(splitPk is null, 忽略此长尾)
eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 5;
}
// 尝试对每个表,切分为eachTableShouldSplittedNumber 份
for (String table : tables) {
tempSlice = sliceConfig.clone();
tempSlice.set(Key.TABLE, table);
List<Configuration> splittedSlices = SingleTableSplitUtil.splitSingleTable(tempSlice, eachTableShouldSplittedNumber);
splittedConfigs.addAll(splittedSlices);
}
} else {
for (String table : tables) {
tempSlice = sliceConfig.clone();
tempSlice.set(Key.TABLE, table);
String queryColumn = HintUtil.buildQueryColumn(jdbcUrl, table, column);
tempSlice.set(Key.QUERY_SQL, SingleTableSplitUtil.buildQuerySql(queryColumn, table, where));
splittedConfigs.add(tempSlice);
}
}
} else {
// 说明是配置的 querySql 方式
List<String> sqls = connConf.getList(Key.QUERY_SQL, String.class);
// TODO 是否check 配置为多条语句??
for (String querySql : sqls) {
tempSlice = sliceConfig.clone();
tempSlice.set(Key.QUERY_SQL, querySql);
splittedConfigs.add(tempSlice);
}
}
}
return splittedConfigs;
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class SingleTableSplitUtil method splitSingleTable.
public static List<Configuration> splitSingleTable(Configuration configuration, int adviceNum) {
List<Configuration> pluginParams = new ArrayList<Configuration>();
List<String> rangeList;
String splitPkName = configuration.getString(Key.SPLIT_PK);
String column = configuration.getString(Key.COLUMN);
String table = configuration.getString(Key.TABLE);
String where = configuration.getString(Key.WHERE, null);
boolean hasWhere = StringUtils.isNotBlank(where);
//if (Constant.SPLIT_MODE_RANDOMSAMPLE.equals(splitMode) && DATABASE_TYPE == DataBaseType.Oracle) {
if (DATABASE_TYPE == DataBaseType.Oracle) {
rangeList = genSplitSqlForOracle(splitPkName, table, where, configuration, adviceNum);
// warn: mysql etc to be added...
} else {
Pair<Object, Object> minMaxPK = getPkRange(configuration);
if (null == minMaxPK) {
throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_SPLIT_PK, "根据切分主键切分表失败. DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. 请尝试使用其他的切分主键或者联系 DBA 进行处理.");
}
configuration.set(Key.QUERY_SQL, buildQuerySql(column, table, where));
if (null == minMaxPK.getLeft() || null == minMaxPK.getRight()) {
// 切分后获取到的start/end 有 Null 的情况
pluginParams.add(configuration);
return pluginParams;
}
boolean isStringType = Constant.PK_TYPE_STRING.equals(configuration.getString(Constant.PK_TYPE));
boolean isLongType = Constant.PK_TYPE_LONG.equals(configuration.getString(Constant.PK_TYPE));
if (isStringType) {
rangeList = RdbmsRangeSplitWrap.splitAndWrap(String.valueOf(minMaxPK.getLeft()), String.valueOf(minMaxPK.getRight()), adviceNum, splitPkName, "'", DATABASE_TYPE);
} else if (isLongType) {
rangeList = RdbmsRangeSplitWrap.splitAndWrap(new BigInteger(minMaxPK.getLeft().toString()), new BigInteger(minMaxPK.getRight().toString()), adviceNum, splitPkName);
} else {
throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_SPLIT_PK, "您配置的切分主键(splitPk) 类型 DataX 不支持. DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. 请尝试使用其他的切分主键或者联系 DBA 进行处理.");
}
}
String tempQuerySql;
List<String> allQuerySql = new ArrayList<String>();
if (null != rangeList && !rangeList.isEmpty()) {
for (String range : rangeList) {
Configuration tempConfig = configuration.clone();
tempQuerySql = buildQuerySql(column, table, where) + (hasWhere ? " and " : " where ") + range;
allQuerySql.add(tempQuerySql);
tempConfig.set(Key.QUERY_SQL, tempQuerySql);
pluginParams.add(tempConfig);
}
} else {
//pluginParams.add(configuration); // this is wrong for new & old split
Configuration tempConfig = configuration.clone();
tempQuerySql = buildQuerySql(column, table, where) + (hasWhere ? " and " : " where ") + String.format(" %s IS NOT NULL", splitPkName);
allQuerySql.add(tempQuerySql);
tempConfig.set(Key.QUERY_SQL, tempQuerySql);
pluginParams.add(tempConfig);
}
// deal pk is null
Configuration tempConfig = configuration.clone();
tempQuerySql = buildQuerySql(column, table, where) + (hasWhere ? " and " : " where ") + String.format(" %s IS NULL", splitPkName);
allQuerySql.add(tempQuerySql);
LOG.info("After split(), allQuerySql=[\n{}\n].", StringUtils.join(allQuerySql, "\n"));
tempConfig.set(Key.QUERY_SQL, tempQuerySql);
pluginParams.add(tempConfig);
return pluginParams;
}
Aggregations