use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class Hbase11xHelper method validateRowkeyColumn.
private static void validateRowkeyColumn(com.alibaba.datax.common.util.Configuration originalConfig) {
List<Configuration> rowkeyColumn = originalConfig.getListConfiguration(Key.ROWKEY_COLUMN);
if (rowkeyColumn == null || rowkeyColumn.isEmpty()) {
throw DataXException.asDataXException(Hbase11xWriterErrorCode.REQUIRED_VALUE, "rowkeyColumn为必填项,其形式为:rowkeyColumn:[{\"index\": 0,\"type\": \"string\"},{\"index\": -1,\"type\": \"string\",\"value\": \"_\"}]");
}
int rowkeyColumnSize = rowkeyColumn.size();
//包含{"index":0,"type":"string"} 或者 {"index":-1,"type":"string","value":"_"}
for (Configuration aRowkeyColumn : rowkeyColumn) {
Integer index = aRowkeyColumn.getInt(Key.INDEX);
String type = aRowkeyColumn.getNecessaryValue(Key.TYPE, Hbase11xWriterErrorCode.REQUIRED_VALUE);
ColumnType.getByTypeName(type);
if (index == null) {
throw DataXException.asDataXException(Hbase11xWriterErrorCode.REQUIRED_VALUE, "rowkeyColumn配置项中index为必填项");
}
//不能只有-1列,即rowkey连接串
if (rowkeyColumnSize == 1 && index == -1) {
throw DataXException.asDataXException(Hbase11xWriterErrorCode.ILLEGAL_VALUE, "rowkeyColumn配置项不能全为常量列,至少指定一个rowkey列");
}
if (index == -1) {
aRowkeyColumn.getNecessaryValue(Key.VALUE, Hbase11xWriterErrorCode.REQUIRED_VALUE);
}
}
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class Hbase11xHelper method validateColumn.
private static void validateColumn(com.alibaba.datax.common.util.Configuration originalConfig) {
List<Configuration> columns = originalConfig.getListConfiguration(Key.COLUMN);
if (columns == null || columns.isEmpty()) {
throw DataXException.asDataXException(Hbase11xWriterErrorCode.REQUIRED_VALUE, "column为必填项,其形式为:column:[{\"index\": 0,\"name\": \"cf0:column0\",\"type\": \"string\"},{\"index\": 1,\"name\": \"cf1:column1\",\"type\": \"long\"}]");
}
for (Configuration aColumn : columns) {
Integer index = aColumn.getInt(Key.INDEX);
String type = aColumn.getNecessaryValue(Key.TYPE, Hbase11xWriterErrorCode.REQUIRED_VALUE);
String name = aColumn.getNecessaryValue(Key.NAME, Hbase11xWriterErrorCode.REQUIRED_VALUE);
ColumnType.getByTypeName(type);
if (name.split(":").length != 2) {
throw DataXException.asDataXException(Hbase11xWriterErrorCode.ILLEGAL_VALUE, String.format("您column配置项中name配置的列格式[%s]不正确,name应该配置为 列族:列名 的形式, 如 {\"index\": 1,\"name\": \"cf1:q1\",\"type\": \"long\"}", name));
}
if (index == null || index < 0) {
throw DataXException.asDataXException(Hbase11xWriterErrorCode.ILLEGAL_VALUE, "您的column配置项不正确,配置项中中index为必填项,且为非负数,请检查并修改.");
}
}
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class OtsReaderMasterProxy method split.
public List<Configuration> split(int num) throws Exception {
LOG.info("Expect split num : " + num);
List<Configuration> configurations = new ArrayList<Configuration>();
List<OTSRange> ranges = null;
if (this.conf.getRangeSplit() != null) {
// 用户显示指定了拆分范围
LOG.info("Begin userDefinedRangeSplit");
ranges = userDefinedRangeSplit(meta, range, this.conf.getRangeSplit());
LOG.info("End userDefinedRangeSplit");
} else {
// 采用默认的切分算法
LOG.info("Begin defaultRangeSplit");
ranges = defaultRangeSplit(ots, meta, range, num);
LOG.info("End defaultRangeSplit");
}
// 解决大量的Split Point序列化消耗内存的问题
// 因为slave中不会使用这个配置,所以置为空
this.conf.setRangeSplit(null);
for (OTSRange item : ranges) {
Configuration configuration = Configuration.newDefault();
configuration.set(OTSConst.OTS_CONF, GsonParser.confToJson(this.conf));
configuration.set(OTSConst.OTS_RANGE, GsonParser.rangeToJson(item));
configuration.set(OTSConst.OTS_DIRECTION, GsonParser.directionToJson(direction));
configurations.add(configuration);
}
LOG.info("Configuration list count : " + configurations.size());
return configurations;
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class NormalTask method getRowkey.
public byte[] getRowkey(Record record) {
byte[] rowkeyBuffer = {};
for (Configuration aRowkeyColumn : rowkeyColumn) {
Integer index = aRowkeyColumn.getInt(Key.INDEX);
String type = aRowkeyColumn.getString(Key.TYPE);
ColumnType columnType = ColumnType.getByTypeName(type);
if (index == -1) {
String value = aRowkeyColumn.getString(Key.VALUE);
rowkeyBuffer = Bytes.add(rowkeyBuffer, getValueByte(columnType, value));
} else {
if (index >= record.getColumnNumber()) {
throw DataXException.asDataXException(Hbase11xWriterErrorCode.CONSTRUCT_ROWKEY_ERROR, String.format("您的rowkeyColumn配置项中中index值超出范围,根据reader端配置,index的值小于%s,而您配置的值为%s,请检查并修改.", record.getColumnNumber(), index));
}
byte[] value = getColumnByte(columnType, record.getColumn(index));
rowkeyBuffer = Bytes.add(rowkeyBuffer, value);
}
}
return rowkeyBuffer;
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class HbaseSplitUtil method doSplit.
private static List<Configuration> doSplit(Configuration config, byte[] startRowkeyByte, byte[] endRowkeyByte, Pair<byte[][], byte[][]> regionRanges) {
List<Configuration> configurations = new ArrayList<Configuration>();
for (int i = 0; i < regionRanges.getFirst().length; i++) {
byte[] regionStartKey = regionRanges.getFirst()[i];
byte[] regionEndKey = regionRanges.getSecond()[i];
// 注意如果用户指定userEndKey为"",则此判断应该不成立。userEndKey为""表示取得最大的region
if (Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) == 0 && (endRowkeyByte.length != 0 && (Bytes.compareTo(regionStartKey, endRowkeyByte) > 0))) {
continue;
}
// 用户配置的userStartKey大于等于region的endkey,则这个region不应该含在内
if ((Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) != 0) && (Bytes.compareTo(startRowkeyByte, regionEndKey) >= 0)) {
continue;
}
// 注意如果用户指定的userEndKey为"",则次判断应该不成立。userEndKey为""表示取得最大的region
if (endRowkeyByte.length != 0 && (Bytes.compareTo(endRowkeyByte, regionStartKey) <= 0)) {
continue;
}
Configuration p = config.clone();
String thisStartKey = getStartKey(startRowkeyByte, regionStartKey);
String thisEndKey = getEndKey(endRowkeyByte, regionEndKey);
p.set(Key.START_ROWKEY, thisStartKey);
p.set(Key.END_ROWKEY, thisEndKey);
LOG.debug("startRowkey:[{}], endRowkey:[{}] .", thisStartKey, thisEndKey);
configurations.add(p);
}
return configurations;
}
Aggregations