Search in sources :

Example 6 with Column

use of com.alibaba.datax.common.element.Column in project DataX by alibaba.

the class HdfsHelper method transportOneRecord.

public static MutablePair<List<Object>, Boolean> transportOneRecord(Record record, List<Configuration> columnsConfiguration, TaskPluginCollector taskPluginCollector) {
    MutablePair<List<Object>, Boolean> transportResult = new MutablePair<List<Object>, Boolean>();
    transportResult.setRight(false);
    List<Object> recordList = Lists.newArrayList();
    int recordLength = record.getColumnNumber();
    if (0 != recordLength) {
        Column column;
        for (int i = 0; i < recordLength; i++) {
            column = record.getColumn(i);
            //todo as method
            if (null != column.getRawData()) {
                String rowData = column.getRawData().toString();
                SupportHiveDataType columnType = SupportHiveDataType.valueOf(columnsConfiguration.get(i).getString(Key.TYPE).toUpperCase());
                //根据writer端类型配置做类型转换
                try {
                    switch(columnType) {
                        case TINYINT:
                            recordList.add(Byte.valueOf(rowData));
                            break;
                        case SMALLINT:
                            recordList.add(Short.valueOf(rowData));
                            break;
                        case INT:
                            recordList.add(Integer.valueOf(rowData));
                            break;
                        case BIGINT:
                            recordList.add(column.asLong());
                            break;
                        case FLOAT:
                            recordList.add(Float.valueOf(rowData));
                            break;
                        case DOUBLE:
                            recordList.add(column.asDouble());
                            break;
                        case STRING:
                        case VARCHAR:
                        case CHAR:
                            recordList.add(column.asString());
                            break;
                        case BOOLEAN:
                            recordList.add(column.asBoolean());
                            break;
                        case DATE:
                            recordList.add(new java.sql.Date(column.asDate().getTime()));
                            break;
                        case TIMESTAMP:
                            recordList.add(new java.sql.Timestamp(column.asDate().getTime()));
                            break;
                        default:
                            throw DataXException.asDataXException(HdfsWriterErrorCode.ILLEGAL_VALUE, String.format("您的配置文件中的列配置信息有误. 因为DataX 不支持数据库写入这种字段类型. 字段名:[%s], 字段类型:[%d]. 请修改表中该字段的类型或者不同步该字段.", columnsConfiguration.get(i).getString(Key.NAME), columnsConfiguration.get(i).getString(Key.TYPE)));
                    }
                } catch (Exception e) {
                    // warn: 此处认为脏数据
                    String message = String.format("字段类型转换错误:你目标字段为[%s]类型,实际字段值为[%s].", columnsConfiguration.get(i).getString(Key.TYPE), column.getRawData().toString());
                    taskPluginCollector.collectDirtyRecord(record, message);
                    transportResult.setRight(true);
                    break;
                }
            } else {
                // warn: it's all ok if nullFormat is null
                recordList.add(null);
            }
        }
    }
    transportResult.setLeft(recordList);
    return transportResult;
}
Also used : IOException(java.io.IOException) DataXException(com.alibaba.datax.common.exception.DataXException) MutablePair(org.apache.commons.lang3.tuple.MutablePair) Column(com.alibaba.datax.common.element.Column)

Example 7 with Column

use of com.alibaba.datax.common.element.Column in project DataX by alibaba.

the class UnstructuredStorageWriterUtil method transportOneRecord.

/**
     * 异常表示脏数据
     * */
public static void transportOneRecord(Record record, String nullFormat, DateFormat dateParse, TaskPluginCollector taskPluginCollector, UnstructuredWriter unstructuredWriter) {
    // warn: default is null
    if (null == nullFormat) {
        nullFormat = "null";
    }
    try {
        List<String> splitedRows = new ArrayList<String>();
        int recordLength = record.getColumnNumber();
        if (0 != recordLength) {
            Column column;
            for (int i = 0; i < recordLength; i++) {
                column = record.getColumn(i);
                if (null != column.getRawData()) {
                    boolean isDateColumn = column instanceof DateColumn;
                    if (!isDateColumn) {
                        splitedRows.add(column.asString());
                    } else {
                        if (null != dateParse) {
                            splitedRows.add(dateParse.format(column.asDate()));
                        } else {
                            splitedRows.add(column.asString());
                        }
                    }
                } else {
                    // warn: it's all ok if nullFormat is null
                    splitedRows.add(nullFormat);
                }
            }
        }
        unstructuredWriter.writeOneRecord(splitedRows);
    } catch (Exception e) {
        // warn: dirty data
        taskPluginCollector.collectDirtyRecord(record, e);
    }
}
Also used : DateColumn(com.alibaba.datax.common.element.DateColumn) Column(com.alibaba.datax.common.element.Column) DateColumn(com.alibaba.datax.common.element.DateColumn) ArrayList(java.util.ArrayList) IOException(java.io.IOException) DataXException(com.alibaba.datax.common.exception.DataXException) UnsupportedEncodingException(java.io.UnsupportedEncodingException)

Example 8 with Column

use of com.alibaba.datax.common.element.Column in project DataX by alibaba.

the class Common method getPKFromRecord.

public static RowPrimaryKey getPKFromRecord(List<OTSPKColumn> pkColumns, Record r) {
    RowPrimaryKey primaryKey = new RowPrimaryKey();
    int pkCount = pkColumns.size();
    for (int i = 0; i < pkCount; i++) {
        Column col = r.getColumn(i);
        OTSPKColumn expect = pkColumns.get(i);
        if (col.getRawData() == null) {
            throw new IllegalArgumentException(String.format(OTSErrorMessage.PK_COLUMN_VALUE_IS_NULL_ERROR, expect.getName()));
        }
        PrimaryKeyValue pk = ColumnConversion.columnToPrimaryKeyValue(col, expect);
        primaryKey.addPrimaryKeyColumn(expect.getName(), pk);
    }
    return primaryKey;
}
Also used : Column(com.alibaba.datax.common.element.Column) RowPrimaryKey(com.aliyun.openservices.ots.model.RowPrimaryKey) PrimaryKeyValue(com.aliyun.openservices.ots.model.PrimaryKeyValue)

Example 9 with Column

use of com.alibaba.datax.common.element.Column in project DataX by alibaba.

the class ReplaceTransformer method evaluate.

@Override
public Record evaluate(Record record, Object... paras) {
    int columnIndex;
    int startIndex;
    int length;
    String replaceString;
    try {
        if (paras.length != 4) {
            throw new RuntimeException("dx_replace paras must be 4");
        }
        columnIndex = (Integer) paras[0];
        startIndex = Integer.valueOf((String) paras[1]);
        length = Integer.valueOf((String) paras[2]);
        replaceString = (String) paras[3];
    } catch (Exception e) {
        throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_ILLEGAL_PARAMETER, "paras:" + Arrays.asList(paras).toString() + " => " + e.getMessage());
    }
    Column column = record.getColumn(columnIndex);
    try {
        String oriValue = column.asString();
        //如果字段为空,跳过replace处理
        if (oriValue == null) {
            return record;
        }
        String newValue;
        if (startIndex > oriValue.length()) {
            throw new RuntimeException(String.format("dx_replace startIndex(%s) out of range(%s)", startIndex, oriValue.length()));
        }
        if (startIndex + length >= oriValue.length()) {
            newValue = oriValue.substring(0, startIndex) + replaceString;
        } else {
            newValue = oriValue.substring(0, startIndex) + replaceString + oriValue.substring(startIndex + length, oriValue.length());
        }
        record.setColumn(columnIndex, new StringColumn(newValue));
    } catch (Exception e) {
        throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_RUN_EXCEPTION, e.getMessage(), e);
    }
    return record;
}
Also used : StringColumn(com.alibaba.datax.common.element.StringColumn) Column(com.alibaba.datax.common.element.Column) StringColumn(com.alibaba.datax.common.element.StringColumn) DataXException(com.alibaba.datax.common.exception.DataXException)

Example 10 with Column

use of com.alibaba.datax.common.element.Column in project DataX by alibaba.

the class Common method getAttrFromRecord.

public static List<Pair<String, ColumnValue>> getAttrFromRecord(int pkCount, List<OTSAttrColumn> attrColumns, Record r) {
    List<Pair<String, ColumnValue>> attr = new ArrayList<Pair<String, ColumnValue>>(r.getColumnNumber());
    for (int i = 0; i < attrColumns.size(); i++) {
        Column col = r.getColumn(i + pkCount);
        OTSAttrColumn expect = attrColumns.get(i);
        if (col.getRawData() == null) {
            attr.add(new Pair<String, ColumnValue>(expect.getName(), null));
            continue;
        }
        ColumnValue cv = ColumnConversion.columnToColumnValue(col, expect);
        attr.add(new Pair<String, ColumnValue>(expect.getName(), cv));
    }
    return attr;
}
Also used : Column(com.alibaba.datax.common.element.Column) ArrayList(java.util.ArrayList) ColumnValue(com.aliyun.openservices.ots.model.ColumnValue) Pair(org.apache.commons.math3.util.Pair)

Aggregations

Column (com.alibaba.datax.common.element.Column)10 StringColumn (com.alibaba.datax.common.element.StringColumn)5 DataXException (com.alibaba.datax.common.exception.DataXException)5 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Result (org.apache.hadoop.hbase.client.Result)2 DateColumn (com.alibaba.datax.common.element.DateColumn)1 LongColumn (com.alibaba.datax.common.element.LongColumn)1 Record (com.alibaba.datax.common.element.Record)1 DefaultRecord (com.alibaba.datax.core.transport.record.DefaultRecord)1 ColumnValue (com.aliyun.openservices.ots.model.ColumnValue)1 PrimaryKeyValue (com.aliyun.openservices.ots.model.PrimaryKeyValue)1 RowPrimaryKey (com.aliyun.openservices.ots.model.RowPrimaryKey)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 MutablePair (org.apache.commons.lang3.tuple.MutablePair)1 Pair (org.apache.commons.math3.util.Pair)1 Test (org.junit.Test)1