Search in sources :

Example 6 with DataXException

use of com.alibaba.datax.common.exception.DataXException in project DataX by alibaba.

the class SingleTableSplitUtil method genSplitSqlForOracle.

/**
     * support Number and String split
     * */
public static List<String> genSplitSqlForOracle(String splitPK, String table, String where, Configuration configuration, int adviceNum) {
    if (adviceNum < 1) {
        throw new IllegalArgumentException(String.format("切分份数不能小于1. 此处:adviceNum=[%s].", adviceNum));
    } else if (adviceNum == 1) {
        return null;
    }
    String whereSql = String.format("%s IS NOT NULL", splitPK);
    if (StringUtils.isNotBlank(where)) {
        whereSql = String.format(" WHERE (%s) AND (%s) ", whereSql, where);
    } else {
        whereSql = String.format(" WHERE (%s) ", whereSql);
    }
    Double percentage = configuration.getDouble(Key.SAMPLE_PERCENTAGE, 0.1);
    String sampleSqlTemplate = "SELECT * FROM ( SELECT %s FROM %s SAMPLE (%s) %s ORDER BY DBMS_RANDOM.VALUE) WHERE ROWNUM <= %s ORDER by %s ASC";
    String splitSql = String.format(sampleSqlTemplate, splitPK, table, percentage, whereSql, adviceNum, splitPK);
    int fetchSize = configuration.getInt(Constant.FETCH_SIZE, 32);
    String jdbcURL = configuration.getString(Key.JDBC_URL);
    String username = configuration.getString(Key.USERNAME);
    String password = configuration.getString(Key.PASSWORD);
    Connection conn = DBUtil.getConnection(DATABASE_TYPE, jdbcURL, username, password);
    LOG.info("split pk [sql={}] is running... ", splitSql);
    ResultSet rs = null;
    List<Pair<Object, Integer>> splitedRange = new ArrayList<Pair<Object, Integer>>();
    try {
        try {
            rs = DBUtil.query(conn, splitSql, fetchSize);
        } catch (Exception e) {
            throw RdbmsException.asQueryException(DATABASE_TYPE, e, splitSql, table, username);
        }
        if (configuration != null) {
            configuration.set(Constant.PK_TYPE, Constant.PK_TYPE_MONTECARLO);
        }
        ResultSetMetaData rsMetaData = rs.getMetaData();
        while (DBUtil.asyncResultSetNext(rs)) {
            ImmutablePair<Object, Integer> eachPoint = new ImmutablePair<Object, Integer>(rs.getObject(1), rsMetaData.getColumnType(1));
            splitedRange.add(eachPoint);
        }
    } catch (DataXException e) {
        throw e;
    } catch (Exception e) {
        throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_SPLIT_PK, "DataX尝试切分表发生错误. 请检查您的配置并作出修改.", e);
    } finally {
        DBUtil.closeDBResources(rs, null, null);
    }
    LOG.debug(JSON.toJSONString(splitedRange));
    List<String> rangeSql = new ArrayList<String>();
    int splitedRangeSize = splitedRange.size();
    // demo: Parameter rangeResult can not be null and its length can not <2. detail:rangeResult=[24999930].
    if (splitedRangeSize >= 2) {
        // warn: oracle Number is long type here
        if (isLongType(splitedRange.get(0).getRight())) {
            BigInteger[] integerPoints = new BigInteger[splitedRange.size()];
            for (int i = 0; i < splitedRangeSize; i++) {
                integerPoints[i] = new BigInteger(splitedRange.get(i).getLeft().toString());
            }
            rangeSql.addAll(RdbmsRangeSplitWrap.wrapRange(integerPoints, splitPK));
            // its ok if splitedRangeSize is 1
            rangeSql.add(RdbmsRangeSplitWrap.wrapFirstLastPoint(integerPoints[0], integerPoints[splitedRangeSize - 1], splitPK));
        } else if (isStringType(splitedRange.get(0).getRight())) {
            // warn: treated as string type
            String[] stringPoints = new String[splitedRange.size()];
            for (int i = 0; i < splitedRangeSize; i++) {
                stringPoints[i] = new String(splitedRange.get(i).getLeft().toString());
            }
            rangeSql.addAll(RdbmsRangeSplitWrap.wrapRange(stringPoints, splitPK, "'", DATABASE_TYPE));
            // its ok if splitedRangeSize is 1
            rangeSql.add(RdbmsRangeSplitWrap.wrapFirstLastPoint(stringPoints[0], stringPoints[splitedRangeSize - 1], splitPK, "'", DATABASE_TYPE));
        } else {
            throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_SPLIT_PK, "您配置的DataX切分主键(splitPk)有误. 因为您配置的切分主键(splitPk) 类型 DataX 不支持. DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. 请尝试使用其他的切分主键或者联系 DBA 进行处理.");
        }
    }
    return rangeSql;
}
Also used : Connection(java.sql.Connection) ArrayList(java.util.ArrayList) DataXException(com.alibaba.datax.common.exception.DataXException) BigInteger(java.math.BigInteger) ResultSetMetaData(java.sql.ResultSetMetaData) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) DataXException(com.alibaba.datax.common.exception.DataXException) ResultSet(java.sql.ResultSet) BigInteger(java.math.BigInteger) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) Pair(org.apache.commons.lang3.tuple.Pair)

Example 7 with DataXException

use of com.alibaba.datax.common.exception.DataXException in project DataX by alibaba.

the class PreCheckTask method call.

@Override
public Boolean call() throws DataXException {
    String jdbcUrl = this.connection.getString(Key.JDBC_URL);
    List<Object> querySqls = this.connection.getList(Key.QUERY_SQL, Object.class);
    List<Object> splitPkSqls = this.connection.getList(Key.SPLIT_PK_SQL, Object.class);
    List<Object> tables = this.connection.getList(Key.TABLE, Object.class);
    Connection conn = DBUtil.getConnectionWithoutRetry(this.dataBaseType, jdbcUrl, this.userName, password);
    int fetchSize = 1;
    if (DataBaseType.MySql.equals(dataBaseType) || DataBaseType.DRDS.equals(dataBaseType)) {
        fetchSize = Integer.MIN_VALUE;
    }
    try {
        for (int i = 0; i < querySqls.size(); i++) {
            String splitPkSql = null;
            String querySql = querySqls.get(i).toString();
            String table = null;
            if (tables != null && !tables.isEmpty()) {
                table = tables.get(i).toString();
            }
            /*verify query*/
            ResultSet rs = null;
            try {
                DBUtil.sqlValid(querySql, dataBaseType);
                if (i == 0) {
                    rs = DBUtil.query(conn, querySql, fetchSize);
                }
            } catch (ParserException e) {
                throw RdbmsException.asSqlParserException(this.dataBaseType, e, querySql);
            } catch (Exception e) {
                throw RdbmsException.asQueryException(this.dataBaseType, e, querySql, table, userName);
            } finally {
                DBUtil.closeDBResources(rs, null, null);
            }
            /*verify splitPK*/
            try {
                if (splitPkSqls != null && !splitPkSqls.isEmpty()) {
                    splitPkSql = splitPkSqls.get(i).toString();
                    DBUtil.sqlValid(splitPkSql, dataBaseType);
                    if (i == 0) {
                        SingleTableSplitUtil.precheckSplitPk(conn, splitPkSql, fetchSize, table, userName);
                    }
                }
            } catch (ParserException e) {
                throw RdbmsException.asSqlParserException(this.dataBaseType, e, splitPkSql);
            } catch (DataXException e) {
                throw e;
            } catch (Exception e) {
                throw RdbmsException.asSplitPKException(this.dataBaseType, e, splitPkSql, this.splitPkId.trim());
            }
        }
    } finally {
        DBUtil.closeDBResources(null, conn);
    }
    return true;
}
Also used : ParserException(com.alibaba.druid.sql.parser.ParserException) DataXException(com.alibaba.datax.common.exception.DataXException) Connection(java.sql.Connection) ResultSet(java.sql.ResultSet) ParserException(com.alibaba.druid.sql.parser.ParserException) RdbmsException(com.alibaba.datax.plugin.rdbms.util.RdbmsException) DataXException(com.alibaba.datax.common.exception.DataXException)

Example 8 with DataXException

use of com.alibaba.datax.common.exception.DataXException in project DataX by alibaba.

the class DFSUtil method transportOneRecord.

private Record transportOneRecord(List<ColumnEntry> columnConfigs, List<Object> recordFields, RecordSender recordSender, TaskPluginCollector taskPluginCollector, boolean isReadAllColumns, String nullFormat) {
    Record record = recordSender.createRecord();
    Column columnGenerated;
    try {
        if (isReadAllColumns) {
            // 读取所有列,创建都为String类型的column
            for (Object recordField : recordFields) {
                String columnValue = null;
                if (recordField != null) {
                    columnValue = recordField.toString();
                }
                columnGenerated = new StringColumn(columnValue);
                record.addColumn(columnGenerated);
            }
        } else {
            for (ColumnEntry columnConfig : columnConfigs) {
                String columnType = columnConfig.getType();
                Integer columnIndex = columnConfig.getIndex();
                String columnConst = columnConfig.getValue();
                String columnValue = null;
                if (null != columnIndex) {
                    if (null != recordFields.get(columnIndex))
                        columnValue = recordFields.get(columnIndex).toString();
                } else {
                    columnValue = columnConst;
                }
                Type type = Type.valueOf(columnType.toUpperCase());
                // it's all ok if nullFormat is null
                if (StringUtils.equals(columnValue, nullFormat)) {
                    columnValue = null;
                }
                switch(type) {
                    case STRING:
                        columnGenerated = new StringColumn(columnValue);
                        break;
                    case LONG:
                        try {
                            columnGenerated = new LongColumn(columnValue);
                        } catch (Exception e) {
                            throw new IllegalArgumentException(String.format("类型转换错误, 无法将[%s] 转换为[%s]", columnValue, "LONG"));
                        }
                        break;
                    case DOUBLE:
                        try {
                            columnGenerated = new DoubleColumn(columnValue);
                        } catch (Exception e) {
                            throw new IllegalArgumentException(String.format("类型转换错误, 无法将[%s] 转换为[%s]", columnValue, "DOUBLE"));
                        }
                        break;
                    case BOOLEAN:
                        try {
                            columnGenerated = new BoolColumn(columnValue);
                        } catch (Exception e) {
                            throw new IllegalArgumentException(String.format("类型转换错误, 无法将[%s] 转换为[%s]", columnValue, "BOOLEAN"));
                        }
                        break;
                    case DATE:
                        try {
                            if (columnValue == null) {
                                columnGenerated = new DateColumn((Date) null);
                            } else {
                                String formatString = columnConfig.getFormat();
                                if (StringUtils.isNotBlank(formatString)) {
                                    // 用户自己配置的格式转换
                                    SimpleDateFormat format = new SimpleDateFormat(formatString);
                                    columnGenerated = new DateColumn(format.parse(columnValue));
                                } else {
                                    // 框架尝试转换
                                    columnGenerated = new DateColumn(new StringColumn(columnValue).asDate());
                                }
                            }
                        } catch (Exception e) {
                            throw new IllegalArgumentException(String.format("类型转换错误, 无法将[%s] 转换为[%s]", columnValue, "DATE"));
                        }
                        break;
                    default:
                        String errorMessage = String.format("您配置的列类型暂不支持 : [%s]", columnType);
                        LOG.error(errorMessage);
                        throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.NOT_SUPPORT_TYPE, errorMessage);
                }
                record.addColumn(columnGenerated);
            }
        }
        recordSender.sendToWriter(record);
    } catch (IllegalArgumentException iae) {
        taskPluginCollector.collectDirtyRecord(record, iae.getMessage());
    } catch (IndexOutOfBoundsException ioe) {
        taskPluginCollector.collectDirtyRecord(record, ioe.getMessage());
    } catch (Exception e) {
        if (e instanceof DataXException) {
            throw (DataXException) e;
        }
        // 每一种转换失败都是脏数据处理,包括数字格式 & 日期格式
        taskPluginCollector.collectDirtyRecord(record, e.getMessage());
    }
    return record;
}
Also used : ColumnEntry(com.alibaba.datax.plugin.unstructuredstorage.reader.ColumnEntry) IOException(java.io.IOException) DataXException(com.alibaba.datax.common.exception.DataXException) DataXException(com.alibaba.datax.common.exception.DataXException) JSONObject(com.alibaba.fastjson.JSONObject) SimpleDateFormat(java.text.SimpleDateFormat)

Example 9 with DataXException

use of com.alibaba.datax.common.exception.DataXException in project DataX by alibaba.

the class SingleTableSplitUtil method checkSplitPk.

/**
     * 检测splitPk的配置是否正确。
     * configuration为null, 是precheck的逻辑,不需要回写PK_TYPE到configuration中
     *
     */
private static Pair<Object, Object> checkSplitPk(Connection conn, String pkRangeSQL, int fetchSize, String table, String username, Configuration configuration) {
    LOG.info("split pk [sql={}] is running... ", pkRangeSQL);
    ResultSet rs = null;
    Pair<Object, Object> minMaxPK = null;
    try {
        try {
            rs = DBUtil.query(conn, pkRangeSQL, fetchSize);
        } catch (Exception e) {
            throw RdbmsException.asQueryException(DATABASE_TYPE, e, pkRangeSQL, table, username);
        }
        ResultSetMetaData rsMetaData = rs.getMetaData();
        if (isPKTypeValid(rsMetaData)) {
            if (isStringType(rsMetaData.getColumnType(1))) {
                if (configuration != null) {
                    configuration.set(Constant.PK_TYPE, Constant.PK_TYPE_STRING);
                }
                while (DBUtil.asyncResultSetNext(rs)) {
                    minMaxPK = new ImmutablePair<Object, Object>(rs.getString(1), rs.getString(2));
                }
            } else if (isLongType(rsMetaData.getColumnType(1))) {
                if (configuration != null) {
                    configuration.set(Constant.PK_TYPE, Constant.PK_TYPE_LONG);
                }
                while (DBUtil.asyncResultSetNext(rs)) {
                    minMaxPK = new ImmutablePair<Object, Object>(rs.getString(1), rs.getString(2));
                    // check: string shouldn't contain '.', for oracle
                    String minMax = rs.getString(1) + rs.getString(2);
                    if (StringUtils.contains(minMax, '.')) {
                        throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_SPLIT_PK, "您配置的DataX切分主键(splitPk)有误. 因为您配置的切分主键(splitPk) 类型 DataX 不支持. DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. 请尝试使用其他的切分主键或者联系 DBA 进行处理.");
                    }
                }
            } else {
                throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_SPLIT_PK, "您配置的DataX切分主键(splitPk)有误. 因为您配置的切分主键(splitPk) 类型 DataX 不支持. DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. 请尝试使用其他的切分主键或者联系 DBA 进行处理.");
            }
        } else {
            throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_SPLIT_PK, "您配置的DataX切分主键(splitPk)有误. 因为您配置的切分主键(splitPk) 类型 DataX 不支持. DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. 请尝试使用其他的切分主键或者联系 DBA 进行处理.");
        }
    } catch (DataXException e) {
        throw e;
    } catch (Exception e) {
        throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_SPLIT_PK, "DataX尝试切分表发生错误. 请检查您的配置并作出修改.", e);
    } finally {
        DBUtil.closeDBResources(rs, null, null);
    }
    return minMaxPK;
}
Also used : ResultSetMetaData(java.sql.ResultSetMetaData) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) DataXException(com.alibaba.datax.common.exception.DataXException) ResultSet(java.sql.ResultSet) DataXException(com.alibaba.datax.common.exception.DataXException)

Aggregations

DataXException (com.alibaba.datax.common.exception.DataXException)9 ResultSet (java.sql.ResultSet)3 URISyntaxException (java.net.URISyntaxException)2 Connection (java.sql.Connection)2 ResultSetMetaData (java.sql.ResultSetMetaData)2 ArrayList (java.util.ArrayList)2 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)2 HttpGet (org.apache.http.client.methods.HttpGet)2 HttpRequestBase (org.apache.http.client.methods.HttpRequestBase)2 Test (org.junit.Test)2 com.alibaba.datax.common.element (com.alibaba.datax.common.element)1 RdbmsException (com.alibaba.datax.plugin.rdbms.util.RdbmsException)1 ColumnEntry (com.alibaba.datax.plugin.unstructuredstorage.reader.ColumnEntry)1 AdsException (com.alibaba.datax.plugin.writer.adswriter.AdsException)1 ColumnInfo (com.alibaba.datax.plugin.writer.adswriter.ads.ColumnInfo)1 TableInfo (com.alibaba.datax.plugin.writer.adswriter.ads.TableInfo)1 ParserException (com.alibaba.druid.sql.parser.ParserException)1 JSONObject (com.alibaba.fastjson.JSONObject)1 Record (com.aliyun.odps.data.Record)1 RecordReader (com.aliyun.odps.data.RecordReader)1