use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class NormalTask method getRowkey.
public byte[] getRowkey(Record record) {
byte[] rowkeyBuffer = {};
for (Configuration aRowkeyColumn : rowkeyColumn) {
Integer index = aRowkeyColumn.getInt(Key.INDEX);
String type = aRowkeyColumn.getString(Key.TYPE);
ColumnType columnType = ColumnType.getByTypeName(type);
if (index == -1) {
String value = aRowkeyColumn.getString(Key.VALUE);
rowkeyBuffer = Bytes.add(rowkeyBuffer, getValueByte(columnType, value));
} else {
if (index >= record.getColumnNumber()) {
throw DataXException.asDataXException(Hbase11xWriterErrorCode.CONSTRUCT_ROWKEY_ERROR, String.format("您的rowkeyColumn配置项中中index值超出范围,根据reader端配置,index的值小于%s,而您配置的值为%s,请检查并修改.", record.getColumnNumber(), index));
}
byte[] value = getColumnByte(columnType, record.getColumn(index));
rowkeyBuffer = Bytes.add(rowkeyBuffer, value);
}
}
return rowkeyBuffer;
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class HbaseSplitUtil method doSplit.
private static List<Configuration> doSplit(Configuration config, byte[] startRowkeyByte, byte[] endRowkeyByte, Pair<byte[][], byte[][]> regionRanges) {
List<Configuration> configurations = new ArrayList<Configuration>();
for (int i = 0; i < regionRanges.getFirst().length; i++) {
byte[] regionStartKey = regionRanges.getFirst()[i];
byte[] regionEndKey = regionRanges.getSecond()[i];
// 注意如果用户指定userEndKey为"",则此判断应该不成立。userEndKey为""表示取得最大的region
if (Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) == 0 && (endRowkeyByte.length != 0 && (Bytes.compareTo(regionStartKey, endRowkeyByte) > 0))) {
continue;
}
// 用户配置的userStartKey大于等于region的endkey,则这个region不应该含在内
if ((Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) != 0) && (Bytes.compareTo(startRowkeyByte, regionEndKey) >= 0)) {
continue;
}
// 注意如果用户指定的userEndKey为"",则次判断应该不成立。userEndKey为""表示取得最大的region
if (endRowkeyByte.length != 0 && (Bytes.compareTo(endRowkeyByte, regionStartKey) <= 0)) {
continue;
}
Configuration p = config.clone();
String thisStartKey = getStartKey(startRowkeyByte, regionStartKey);
String thisEndKey = getEndKey(endRowkeyByte, regionEndKey);
p.set(Key.START_ROWKEY, thisStartKey);
p.set(Key.END_ROWKEY, thisEndKey);
LOG.debug("startRowkey:[{}], endRowkey:[{}] .", thisStartKey, thisEndKey);
configurations.add(p);
}
return configurations;
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class HdfsHelper method textFileStartWrite.
/**
* 写textfile类型文件
* @param lineReceiver
* @param config
* @param fileName
* @param taskPluginCollector
*/
public void textFileStartWrite(RecordReceiver lineReceiver, Configuration config, String fileName, TaskPluginCollector taskPluginCollector) {
char fieldDelimiter = config.getChar(Key.FIELD_DELIMITER);
List<Configuration> columns = config.getListConfiguration(Key.COLUMN);
String compress = config.getString(Key.COMPRESS, null);
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmm");
String attempt = "attempt_" + dateFormat.format(new Date()) + "_0001_m_000000_0";
Path outputPath = new Path(fileName);
//todo 需要进一步确定TASK_ATTEMPT_ID
conf.set(JobContext.TASK_ATTEMPT_ID, attempt);
FileOutputFormat outFormat = new TextOutputFormat();
outFormat.setOutputPath(conf, outputPath);
outFormat.setWorkOutputPath(conf, outputPath);
if (null != compress) {
Class<? extends CompressionCodec> codecClass = getCompressCodec(compress);
if (null != codecClass) {
outFormat.setOutputCompressorClass(conf, codecClass);
}
}
try {
RecordWriter writer = outFormat.getRecordWriter(fileSystem, conf, outputPath.toString(), Reporter.NULL);
Record record = null;
while ((record = lineReceiver.getFromReader()) != null) {
MutablePair<Text, Boolean> transportResult = transportOneRecord(record, fieldDelimiter, columns, taskPluginCollector);
if (!transportResult.getRight()) {
writer.write(NullWritable.get(), transportResult.getLeft());
}
}
writer.close(Reporter.NULL);
} catch (Exception e) {
String message = String.format("写文件文件[%s]时发生IO异常,请检查您的网络是否正常!", fileName);
LOG.error(message);
Path path = new Path(fileName);
deleteDir(path.getParent());
throw DataXException.asDataXException(HdfsWriterErrorCode.Write_FILE_IO_ERROR, e);
}
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class HdfsHelper method orcFileStartWrite.
/**
* 写orcfile类型文件
* @param lineReceiver
* @param config
* @param fileName
* @param taskPluginCollector
*/
public void orcFileStartWrite(RecordReceiver lineReceiver, Configuration config, String fileName, TaskPluginCollector taskPluginCollector) {
List<Configuration> columns = config.getListConfiguration(Key.COLUMN);
String compress = config.getString(Key.COMPRESS, null);
List<String> columnNames = getColumnNames(columns);
List<ObjectInspector> columnTypeInspectors = getColumnTypeInspectors(columns);
StructObjectInspector inspector = (StructObjectInspector) ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnTypeInspectors);
OrcSerde orcSerde = new OrcSerde();
FileOutputFormat outFormat = new OrcOutputFormat();
if (!"NONE".equalsIgnoreCase(compress) && null != compress) {
Class<? extends CompressionCodec> codecClass = getCompressCodec(compress);
if (null != codecClass) {
outFormat.setOutputCompressorClass(conf, codecClass);
}
}
try {
RecordWriter writer = outFormat.getRecordWriter(fileSystem, conf, fileName, Reporter.NULL);
Record record = null;
while ((record = lineReceiver.getFromReader()) != null) {
MutablePair<List<Object>, Boolean> transportResult = transportOneRecord(record, columns, taskPluginCollector);
if (!transportResult.getRight()) {
writer.write(NullWritable.get(), orcSerde.serialize(transportResult.getLeft(), inspector));
}
}
writer.close(Reporter.NULL);
} catch (Exception e) {
String message = String.format("写文件文件[%s]时发生IO异常,请检查您的网络是否正常!", fileName);
LOG.error(message);
Path path = new Path(fileName);
deleteDir(path.getParent());
throw DataXException.asDataXException(HdfsWriterErrorCode.Write_FILE_IO_ERROR, e);
}
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class AbstractCollector method registerTGCommunication.
public void registerTGCommunication(List<Configuration> taskGroupConfigurationList) {
for (Configuration config : taskGroupConfigurationList) {
int taskGroupId = config.getInt(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_ID);
LocalTGCommunicationManager.registerTaskGroupCommunication(taskGroupId, new Communication());
}
}
Aggregations