Search in sources :

Example 46 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class OdpsSplitUtil method splitOnePartition.

private static List<Configuration> splitOnePartition(Odps odps, String onePartition, int adviceNum, Configuration sliceConfig) {
    List<Configuration> params = new ArrayList<Configuration>();
    String tunnelServer = sliceConfig.getString(Key.TUNNEL_SERVER);
    String tableName = sliceConfig.getString(Key.TABLE);
    String projectName = sliceConfig.getString(Key.PROJECT);
    DownloadSession session = OdpsUtil.createMasterSessionForPartitionedTable(odps, tunnelServer, projectName, tableName, onePartition);
    String id = session.getId();
    long count = session.getRecordCount();
    List<Pair<Long, Long>> splitResult = splitRecordCount(count, adviceNum);
    for (Pair<Long, Long> pair : splitResult) {
        Configuration iParam = sliceConfig.clone();
        iParam.set(Key.PARTITION, onePartition);
        iParam.set(Constant.SESSION_ID, id);
        iParam.set(Constant.START_INDEX, pair.getLeft().longValue());
        iParam.set(Constant.STEP_COUNT, pair.getRight().longValue());
        params.add(iParam);
    }
    return params;
}
Also used : DownloadSession(com.aliyun.odps.tunnel.TableTunnel.DownloadSession) Configuration(com.alibaba.datax.common.util.Configuration) ArrayList(java.util.ArrayList) Pair(org.apache.commons.lang3.tuple.Pair) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair)

Example 47 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class NormalTask method convertRecordToPut.

@Override
public Put convertRecordToPut(Record record) {
    byte[] rowkey = getRowkey(record);
    Put put = null;
    if (this.versionColumn == null) {
        put = new Put(rowkey);
        put.setWriteToWAL(super.walFlag);
    } else {
        long timestamp = getVersion(record);
        put = new Put(rowkey, timestamp);
    }
    for (Configuration aColumn : columns) {
        Integer index = aColumn.getInt(Key.INDEX);
        String type = aColumn.getString(Key.TYPE);
        ColumnType columnType = ColumnType.getByTypeName(type);
        String name = aColumn.getString(Key.NAME);
        String promptInfo = "Hbasewriter 中,column 的列配置格式应该是:列族:列名. 您配置的列错误:" + name;
        String[] cfAndQualifier = name.split(":");
        Validate.isTrue(cfAndQualifier != null && cfAndQualifier.length == 2 && StringUtils.isNotBlank(cfAndQualifier[0]) && StringUtils.isNotBlank(cfAndQualifier[1]), promptInfo);
        if (index >= record.getColumnNumber()) {
            throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, String.format("您的column配置项中中index值超出范围,根据reader端配置,index的值小于%s,而您配置的值为%s,请检查并修改.", record.getColumnNumber(), index));
        }
        byte[] columnBytes = getColumnByte(columnType, record.getColumn(index));
        //columnBytes 为null忽略这列
        if (null != columnBytes) {
            put.add(Bytes.toBytes(cfAndQualifier[0]), Bytes.toBytes(cfAndQualifier[1]), columnBytes);
        } else {
            continue;
        }
    }
    return put;
}
Also used : Configuration(com.alibaba.datax.common.util.Configuration) Put(org.apache.hadoop.hbase.client.Put)

Example 48 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class Hbase11xHelper method validateRowkeyColumn.

private static void validateRowkeyColumn(com.alibaba.datax.common.util.Configuration originalConfig) {
    List<Configuration> rowkeyColumn = originalConfig.getListConfiguration(Key.ROWKEY_COLUMN);
    if (rowkeyColumn == null || rowkeyColumn.isEmpty()) {
        throw DataXException.asDataXException(Hbase11xWriterErrorCode.REQUIRED_VALUE, "rowkeyColumn为必填项,其形式为:rowkeyColumn:[{\"index\": 0,\"type\": \"string\"},{\"index\": -1,\"type\": \"string\",\"value\": \"_\"}]");
    }
    int rowkeyColumnSize = rowkeyColumn.size();
    //包含{"index":0,"type":"string"} 或者 {"index":-1,"type":"string","value":"_"}
    for (Configuration aRowkeyColumn : rowkeyColumn) {
        Integer index = aRowkeyColumn.getInt(Key.INDEX);
        String type = aRowkeyColumn.getNecessaryValue(Key.TYPE, Hbase11xWriterErrorCode.REQUIRED_VALUE);
        ColumnType.getByTypeName(type);
        if (index == null) {
            throw DataXException.asDataXException(Hbase11xWriterErrorCode.REQUIRED_VALUE, "rowkeyColumn配置项中index为必填项");
        }
        //不能只有-1列,即rowkey连接串
        if (rowkeyColumnSize == 1 && index == -1) {
            throw DataXException.asDataXException(Hbase11xWriterErrorCode.ILLEGAL_VALUE, "rowkeyColumn配置项不能全为常量列,至少指定一个rowkey列");
        }
        if (index == -1) {
            aRowkeyColumn.getNecessaryValue(Key.VALUE, Hbase11xWriterErrorCode.REQUIRED_VALUE);
        }
    }
}
Also used : Configuration(com.alibaba.datax.common.util.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration)

Example 49 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class Hbase11xHelper method validateColumn.

private static void validateColumn(com.alibaba.datax.common.util.Configuration originalConfig) {
    List<Configuration> columns = originalConfig.getListConfiguration(Key.COLUMN);
    if (columns == null || columns.isEmpty()) {
        throw DataXException.asDataXException(Hbase11xWriterErrorCode.REQUIRED_VALUE, "column为必填项,其形式为:column:[{\"index\": 0,\"name\": \"cf0:column0\",\"type\": \"string\"},{\"index\": 1,\"name\": \"cf1:column1\",\"type\": \"long\"}]");
    }
    for (Configuration aColumn : columns) {
        Integer index = aColumn.getInt(Key.INDEX);
        String type = aColumn.getNecessaryValue(Key.TYPE, Hbase11xWriterErrorCode.REQUIRED_VALUE);
        String name = aColumn.getNecessaryValue(Key.NAME, Hbase11xWriterErrorCode.REQUIRED_VALUE);
        ColumnType.getByTypeName(type);
        if (name.split(":").length != 2) {
            throw DataXException.asDataXException(Hbase11xWriterErrorCode.ILLEGAL_VALUE, String.format("您column配置项中name配置的列格式[%s]不正确,name应该配置为 列族:列名  的形式, 如 {\"index\": 1,\"name\": \"cf1:q1\",\"type\": \"long\"}", name));
        }
        if (index == null || index < 0) {
            throw DataXException.asDataXException(Hbase11xWriterErrorCode.ILLEGAL_VALUE, "您的column配置项不正确,配置项中中index为必填项,且为非负数,请检查并修改.");
        }
    }
}
Also used : Configuration(com.alibaba.datax.common.util.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration)

Example 50 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class OtsReaderMasterProxy method split.

public List<Configuration> split(int num) throws Exception {
    LOG.info("Expect split num : " + num);
    List<Configuration> configurations = new ArrayList<Configuration>();
    List<OTSRange> ranges = null;
    if (this.conf.getRangeSplit() != null) {
        // 用户显示指定了拆分范围
        LOG.info("Begin userDefinedRangeSplit");
        ranges = userDefinedRangeSplit(meta, range, this.conf.getRangeSplit());
        LOG.info("End userDefinedRangeSplit");
    } else {
        // 采用默认的切分算法 
        LOG.info("Begin defaultRangeSplit");
        ranges = defaultRangeSplit(ots, meta, range, num);
        LOG.info("End defaultRangeSplit");
    }
    // 解决大量的Split Point序列化消耗内存的问题
    // 因为slave中不会使用这个配置,所以置为空
    this.conf.setRangeSplit(null);
    for (OTSRange item : ranges) {
        Configuration configuration = Configuration.newDefault();
        configuration.set(OTSConst.OTS_CONF, GsonParser.confToJson(this.conf));
        configuration.set(OTSConst.OTS_RANGE, GsonParser.rangeToJson(item));
        configuration.set(OTSConst.OTS_DIRECTION, GsonParser.directionToJson(direction));
        configurations.add(configuration);
    }
    LOG.info("Configuration list count : " + configurations.size());
    return configurations;
}
Also used : Configuration(com.alibaba.datax.common.util.Configuration) OTSRange(com.alibaba.datax.plugin.reader.otsreader.model.OTSRange) ArrayList(java.util.ArrayList)

Aggregations

Configuration (com.alibaba.datax.common.util.Configuration)82 ArrayList (java.util.ArrayList)27 Test (org.junit.Test)19 Communication (com.alibaba.datax.core.statistics.communication.Communication)13 DataXException (com.alibaba.datax.common.exception.DataXException)9 Method (java.lang.reflect.Method)8 Record (com.alibaba.datax.common.element.Record)7 JobContainer (com.alibaba.datax.core.job.JobContainer)6 IOException (java.io.IOException)5 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)5 LongColumn (com.alibaba.datax.common.element.LongColumn)4 TaskPluginCollector (com.alibaba.datax.common.plugin.TaskPluginCollector)4 TaskGroupContainer (com.alibaba.datax.core.taskgroup.TaskGroupContainer)4 Channel (com.alibaba.datax.core.transport.channel.Channel)4 MemoryChannel (com.alibaba.datax.core.transport.channel.memory.MemoryChannel)4 DefaultRecord (com.alibaba.datax.core.transport.record.DefaultRecord)4 File (java.io.File)4 HashSet (java.util.HashSet)3 List (java.util.List)3 VMInfo (com.alibaba.datax.common.statistics.VMInfo)2