use of com.aliyun.odps.data.Record in project DataX by alibaba.
the class ReaderProxy method doRead.
// warn: odps 分区列和正常列不能重名, 所有列都不不区分大小写
public void doRead() {
try {
LOG.info("start={}, count={}", start, count);
//RecordReader recordReader = downloadSession.openRecordReader(start, count, isCompress);
RecordReader recordReader = OdpsUtil.getRecordReader(downloadSession, start, count, isCompress);
Record odpsRecord;
Map<String, String> partitionMap = this.parseCurrentPartitionValue();
int retryTimes = 1;
while (true) {
try {
odpsRecord = recordReader.read();
} catch (Exception e) {
//odps read 异常后重试10次
LOG.warn("warn : odps read exception: {}", e.getMessage());
if (retryTimes < 10) {
try {
Thread.sleep(2000);
} catch (InterruptedException ignored) {
}
recordReader = downloadSession.openRecordReader(start, count, isCompress);
LOG.warn("odps-read-exception, 重试第{}次", retryTimes);
retryTimes++;
continue;
} else {
throw DataXException.asDataXException(OdpsReaderErrorCode.ODPS_READ_EXCEPTION, e);
}
}
//记录已经读取的点
start++;
count--;
if (odpsRecord != null) {
com.alibaba.datax.common.element.Record dataXRecord = recordSender.createRecord();
// sets(columnName), always contain
for (Pair<String, ColumnType> pair : this.parsedColumns) {
String columnName = pair.getLeft();
switch(pair.getRight()) {
case PARTITION:
String partitionColumnValue = this.getPartitionColumnValue(partitionMap, columnName);
this.odpsColumnToDataXField(odpsRecord, dataXRecord, this.columnTypeMap.get(columnName), partitionColumnValue, true);
break;
case NORMAL:
this.odpsColumnToDataXField(odpsRecord, dataXRecord, this.columnTypeMap.get(columnName), columnName, false);
break;
case CONSTANT:
dataXRecord.addColumn(new StringColumn(columnName));
break;
default:
break;
}
}
recordSender.sendToWriter(dataXRecord);
} else {
break;
}
}
//fixed, 避免recordReader.close失败,跟鸣天确认过,可以不用关闭RecordReader
try {
recordReader.close();
} catch (Exception e) {
LOG.warn("recordReader close exception", e);
}
} catch (DataXException e) {
throw e;
} catch (Exception e) {
// warn: if dirty
throw DataXException.asDataXException(OdpsReaderErrorCode.READ_DATA_FAIL, e);
}
}
use of com.aliyun.odps.data.Record in project DataX by alibaba.
the class OdpsWriterProxy method writeOneRecord.
public long writeOneRecord(com.alibaba.datax.common.element.Record dataXRecord, List<Long> blocks) throws Exception {
Record record = dataxRecordToOdpsRecord(dataXRecord);
if (null == record) {
return 0;
}
protobufRecordPack.append(record);
if (protobufRecordPack.getTotalBytes() >= maxBufferSize) {
long startTimeInNs = System.nanoTime();
OdpsUtil.slaveWriteOneBlock(this.slaveUpload, protobufRecordPack, blockId.get(), this.isCompress);
LOG.info("write block {} ok.", blockId.get());
blocks.add(blockId.get());
protobufRecordPack.reset();
this.blockId.incrementAndGet();
return System.nanoTime() - startTimeInNs;
}
return 0;
}
use of com.aliyun.odps.data.Record in project DataX by alibaba.
the class OdpsWriterProxy method dataxRecordToOdpsRecord.
public Record dataxRecordToOdpsRecord(com.alibaba.datax.common.element.Record dataXRecord) throws Exception {
int sourceColumnCount = dataXRecord.getColumnNumber();
Record odpsRecord = slaveUpload.newRecord();
int userConfiguredColumnNumber = this.columnPositions.size();
//todo
if (sourceColumnCount > userConfiguredColumnNumber) {
throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, String.format("亲,配置中的源表的列个数和目的端表不一致,源表中您配置的列数是:%s 大于目的端的列数是:%s , 这样会导致源头数据无法正确导入目的端, 请检查您的配置并修改.", sourceColumnCount, userConfiguredColumnNumber));
} else if (sourceColumnCount < userConfiguredColumnNumber) {
if (printColumnLess) {
LOG.warn("源表的列个数小于目的表的列个数,源表列数是:{} 目的表列数是:{} , 数目不匹配. DataX 会把目的端多出的列的值设置为空值. 如果这个默认配置不符合您的期望,请保持源表和目的表配置的列数目保持一致.", sourceColumnCount, userConfiguredColumnNumber);
}
printColumnLess = false;
}
int currentIndex;
int sourceIndex = 0;
try {
com.alibaba.datax.common.element.Column columnValue;
for (; sourceIndex < sourceColumnCount; sourceIndex++) {
currentIndex = columnPositions.get(sourceIndex);
OdpsType type = this.tableOriginalColumnTypeList.get(currentIndex);
columnValue = dataXRecord.getColumn(sourceIndex);
if (columnValue == null) {
continue;
}
// for compatible dt lib, "" as null
if (this.emptyAsNull && columnValue instanceof StringColumn && "".equals(columnValue.asString())) {
continue;
}
switch(type) {
case STRING:
odpsRecord.setString(currentIndex, columnValue.asString());
break;
case BIGINT:
odpsRecord.setBigint(currentIndex, columnValue.asLong());
break;
case BOOLEAN:
odpsRecord.setBoolean(currentIndex, columnValue.asBoolean());
break;
case DATETIME:
odpsRecord.setDatetime(currentIndex, columnValue.asDate());
break;
case DOUBLE:
odpsRecord.setDouble(currentIndex, columnValue.asDouble());
break;
case DECIMAL:
odpsRecord.setDecimal(currentIndex, columnValue.asBigDecimal());
String columnStr = columnValue.asString();
if (columnStr != null && columnStr.indexOf(".") >= 36) {
throw new Exception("Odps decimal 类型的整数位个数不能超过35");
}
default:
break;
}
}
return odpsRecord;
} catch (Exception e) {
String message = String.format("写入 ODPS 目的表时遇到了脏数据: 第[%s]个字段的数据出现错误,请检查该数据并作出修改 或者您可以增大阀值,忽略这条记录.", sourceIndex);
this.taskPluginCollector.collectDirtyRecord(dataXRecord, e, message);
return null;
}
}
Aggregations