Search in sources :

Example 1 with Record

use of com.aliyun.odps.data.Record in project DataX by alibaba.

the class ReaderProxy method doRead.

// warn: odps 分区列和正常列不能重名, 所有列都不不区分大小写
public void doRead() {
    try {
        LOG.info("start={}, count={}", start, count);
        //RecordReader recordReader = downloadSession.openRecordReader(start, count, isCompress);
        RecordReader recordReader = OdpsUtil.getRecordReader(downloadSession, start, count, isCompress);
        Record odpsRecord;
        Map<String, String> partitionMap = this.parseCurrentPartitionValue();
        int retryTimes = 1;
        while (true) {
            try {
                odpsRecord = recordReader.read();
            } catch (Exception e) {
                //odps read 异常后重试10次
                LOG.warn("warn : odps read exception: {}", e.getMessage());
                if (retryTimes < 10) {
                    try {
                        Thread.sleep(2000);
                    } catch (InterruptedException ignored) {
                    }
                    recordReader = downloadSession.openRecordReader(start, count, isCompress);
                    LOG.warn("odps-read-exception, 重试第{}次", retryTimes);
                    retryTimes++;
                    continue;
                } else {
                    throw DataXException.asDataXException(OdpsReaderErrorCode.ODPS_READ_EXCEPTION, e);
                }
            }
            //记录已经读取的点
            start++;
            count--;
            if (odpsRecord != null) {
                com.alibaba.datax.common.element.Record dataXRecord = recordSender.createRecord();
                // sets(columnName), always contain
                for (Pair<String, ColumnType> pair : this.parsedColumns) {
                    String columnName = pair.getLeft();
                    switch(pair.getRight()) {
                        case PARTITION:
                            String partitionColumnValue = this.getPartitionColumnValue(partitionMap, columnName);
                            this.odpsColumnToDataXField(odpsRecord, dataXRecord, this.columnTypeMap.get(columnName), partitionColumnValue, true);
                            break;
                        case NORMAL:
                            this.odpsColumnToDataXField(odpsRecord, dataXRecord, this.columnTypeMap.get(columnName), columnName, false);
                            break;
                        case CONSTANT:
                            dataXRecord.addColumn(new StringColumn(columnName));
                            break;
                        default:
                            break;
                    }
                }
                recordSender.sendToWriter(dataXRecord);
            } else {
                break;
            }
        }
        //fixed, 避免recordReader.close失败,跟鸣天确认过,可以不用关闭RecordReader
        try {
            recordReader.close();
        } catch (Exception e) {
            LOG.warn("recordReader close exception", e);
        }
    } catch (DataXException e) {
        throw e;
    } catch (Exception e) {
        // warn: if dirty
        throw DataXException.asDataXException(OdpsReaderErrorCode.READ_DATA_FAIL, e);
    }
}
Also used : RecordReader(com.aliyun.odps.data.RecordReader) DataXException(com.alibaba.datax.common.exception.DataXException) ParseException(java.text.ParseException) DataXException(com.alibaba.datax.common.exception.DataXException) Record(com.aliyun.odps.data.Record) com.alibaba.datax.common.element(com.alibaba.datax.common.element)

Example 2 with Record

use of com.aliyun.odps.data.Record in project DataX by alibaba.

the class OdpsWriterProxy method writeOneRecord.

public long writeOneRecord(com.alibaba.datax.common.element.Record dataXRecord, List<Long> blocks) throws Exception {
    Record record = dataxRecordToOdpsRecord(dataXRecord);
    if (null == record) {
        return 0;
    }
    protobufRecordPack.append(record);
    if (protobufRecordPack.getTotalBytes() >= maxBufferSize) {
        long startTimeInNs = System.nanoTime();
        OdpsUtil.slaveWriteOneBlock(this.slaveUpload, protobufRecordPack, blockId.get(), this.isCompress);
        LOG.info("write block {} ok.", blockId.get());
        blocks.add(blockId.get());
        protobufRecordPack.reset();
        this.blockId.incrementAndGet();
        return System.nanoTime() - startTimeInNs;
    }
    return 0;
}
Also used : Record(com.aliyun.odps.data.Record)

Example 3 with Record

use of com.aliyun.odps.data.Record in project DataX by alibaba.

the class OdpsWriterProxy method dataxRecordToOdpsRecord.

public Record dataxRecordToOdpsRecord(com.alibaba.datax.common.element.Record dataXRecord) throws Exception {
    int sourceColumnCount = dataXRecord.getColumnNumber();
    Record odpsRecord = slaveUpload.newRecord();
    int userConfiguredColumnNumber = this.columnPositions.size();
    //todo
    if (sourceColumnCount > userConfiguredColumnNumber) {
        throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, String.format("亲,配置中的源表的列个数和目的端表不一致,源表中您配置的列数是:%s 大于目的端的列数是:%s , 这样会导致源头数据无法正确导入目的端, 请检查您的配置并修改.", sourceColumnCount, userConfiguredColumnNumber));
    } else if (sourceColumnCount < userConfiguredColumnNumber) {
        if (printColumnLess) {
            LOG.warn("源表的列个数小于目的表的列个数,源表列数是:{} 目的表列数是:{} , 数目不匹配. DataX 会把目的端多出的列的值设置为空值. 如果这个默认配置不符合您的期望,请保持源表和目的表配置的列数目保持一致.", sourceColumnCount, userConfiguredColumnNumber);
        }
        printColumnLess = false;
    }
    int currentIndex;
    int sourceIndex = 0;
    try {
        com.alibaba.datax.common.element.Column columnValue;
        for (; sourceIndex < sourceColumnCount; sourceIndex++) {
            currentIndex = columnPositions.get(sourceIndex);
            OdpsType type = this.tableOriginalColumnTypeList.get(currentIndex);
            columnValue = dataXRecord.getColumn(sourceIndex);
            if (columnValue == null) {
                continue;
            }
            // for compatible dt lib, "" as null
            if (this.emptyAsNull && columnValue instanceof StringColumn && "".equals(columnValue.asString())) {
                continue;
            }
            switch(type) {
                case STRING:
                    odpsRecord.setString(currentIndex, columnValue.asString());
                    break;
                case BIGINT:
                    odpsRecord.setBigint(currentIndex, columnValue.asLong());
                    break;
                case BOOLEAN:
                    odpsRecord.setBoolean(currentIndex, columnValue.asBoolean());
                    break;
                case DATETIME:
                    odpsRecord.setDatetime(currentIndex, columnValue.asDate());
                    break;
                case DOUBLE:
                    odpsRecord.setDouble(currentIndex, columnValue.asDouble());
                    break;
                case DECIMAL:
                    odpsRecord.setDecimal(currentIndex, columnValue.asBigDecimal());
                    String columnStr = columnValue.asString();
                    if (columnStr != null && columnStr.indexOf(".") >= 36) {
                        throw new Exception("Odps decimal 类型的整数位个数不能超过35");
                    }
                default:
                    break;
            }
        }
        return odpsRecord;
    } catch (Exception e) {
        String message = String.format("写入 ODPS 目的表时遇到了脏数据: 第[%s]个字段的数据出现错误,请检查该数据并作出修改 或者您可以增大阀值,忽略这条记录.", sourceIndex);
        this.taskPluginCollector.collectDirtyRecord(dataXRecord, e, message);
        return null;
    }
}
Also used : StringColumn(com.alibaba.datax.common.element.StringColumn) OdpsType(com.aliyun.odps.OdpsType) Record(com.aliyun.odps.data.Record) TunnelException(com.aliyun.odps.tunnel.TunnelException) IOException(java.io.IOException) DataXException(com.alibaba.datax.common.exception.DataXException)

Aggregations

Record (com.aliyun.odps.data.Record)3 DataXException (com.alibaba.datax.common.exception.DataXException)2 com.alibaba.datax.common.element (com.alibaba.datax.common.element)1 StringColumn (com.alibaba.datax.common.element.StringColumn)1 OdpsType (com.aliyun.odps.OdpsType)1 RecordReader (com.aliyun.odps.data.RecordReader)1 TunnelException (com.aliyun.odps.tunnel.TunnelException)1 IOException (java.io.IOException)1 ParseException (java.text.ParseException)1