Search in sources :

Example 16 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class NormalTask method getRowkey.

public byte[] getRowkey(Record record) {
    byte[] rowkeyBuffer = {};
    for (Configuration aRowkeyColumn : rowkeyColumn) {
        Integer index = aRowkeyColumn.getInt(Key.INDEX);
        String type = aRowkeyColumn.getString(Key.TYPE);
        ColumnType columnType = ColumnType.getByTypeName(type);
        if (index == -1) {
            String value = aRowkeyColumn.getString(Key.VALUE);
            rowkeyBuffer = Bytes.add(rowkeyBuffer, getValueByte(columnType, value));
        } else {
            if (index >= record.getColumnNumber()) {
                throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_ROWKEY_ERROR, String.format("您的rowkeyColumn配置项中中index值超出范围,根据reader端配置,index的值小于%s,而您配置的值为%s,请检查并修改.", record.getColumnNumber(), index));
            }
            byte[] value = getColumnByte(columnType, record.getColumn(index));
            rowkeyBuffer = Bytes.add(rowkeyBuffer, value);
        }
    }
    return rowkeyBuffer;
}
Also used : Configuration(com.alibaba.datax.common.util.Configuration)

Example 17 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class Hbase11xHelper method doSplit.

private static List<Configuration> doSplit(Configuration config, byte[] startRowkeyByte, byte[] endRowkeyByte, Pair<byte[][], byte[][]> regionRanges) {
    List<Configuration> configurations = new ArrayList<Configuration>();
    for (int i = 0; i < regionRanges.getFirst().length; i++) {
        byte[] regionStartKey = regionRanges.getFirst()[i];
        byte[] regionEndKey = regionRanges.getSecond()[i];
        // 注意如果用户指定userEndKey为"",则此判断应该不成立。userEndKey为""表示取得最大的region
        if (Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) == 0 && (endRowkeyByte.length != 0 && (Bytes.compareTo(regionStartKey, endRowkeyByte) > 0))) {
            continue;
        }
        // 用户配置的userStartKey大于等于region的endkey,则这个region不应该含在内
        if ((Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) != 0) && (Bytes.compareTo(startRowkeyByte, regionEndKey) >= 0)) {
            continue;
        }
        // 注意如果用户指定的userEndKey为"",则次判断应该不成立。userEndKey为""表示取得最大的region
        if (endRowkeyByte.length != 0 && (Bytes.compareTo(endRowkeyByte, regionStartKey) <= 0)) {
            continue;
        }
        Configuration p = config.clone();
        String thisStartKey = getStartKey(startRowkeyByte, regionStartKey);
        String thisEndKey = getEndKey(endRowkeyByte, regionEndKey);
        p.set(Key.START_ROWKEY, thisStartKey);
        p.set(Key.END_ROWKEY, thisEndKey);
        LOG.debug("startRowkey:[{}], endRowkey:[{}] .", thisStartKey, thisEndKey);
        configurations.add(p);
    }
    return configurations;
}
Also used : Configuration(com.alibaba.datax.common.util.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) ArrayList(java.util.ArrayList)

Example 18 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class NormalTask method convertRecordToPut.

@Override
public Put convertRecordToPut(Record record) {
    byte[] rowkey = getRowkey(record);
    Put put = null;
    if (this.versionColumn == null) {
        put = new Put(rowkey);
        if (!super.walFlag) {
            //等价与0.94 put.setWriteToWAL(super.walFlag);
            put.setDurability(Durability.SKIP_WAL);
        }
    } else {
        long timestamp = getVersion(record);
        put = new Put(rowkey, timestamp);
    }
    for (Configuration aColumn : columns) {
        Integer index = aColumn.getInt(Key.INDEX);
        String type = aColumn.getString(Key.TYPE);
        ColumnType columnType = ColumnType.getByTypeName(type);
        String name = aColumn.getString(Key.NAME);
        String promptInfo = "Hbasewriter 中,column 的列配置格式应该是:列族:列名. 您配置的列错误:" + name;
        String[] cfAndQualifier = name.split(":");
        Validate.isTrue(cfAndQualifier != null && cfAndQualifier.length == 2 && StringUtils.isNotBlank(cfAndQualifier[0]) && StringUtils.isNotBlank(cfAndQualifier[1]), promptInfo);
        if (index >= record.getColumnNumber()) {
            throw DataXException.asDataXException(Hbase11xWriterErrorCode.ILLEGAL_VALUE, String.format("您的column配置项中中index值超出范围,根据reader端配置,index的值小于%s,而您配置的值为%s,请检查并修改.", record.getColumnNumber(), index));
        }
        byte[] columnBytes = getColumnByte(columnType, record.getColumn(index));
        //columnBytes 为null忽略这列
        if (null != columnBytes) {
            put.addColumn(Bytes.toBytes(cfAndQualifier[0]), Bytes.toBytes(cfAndQualifier[1]), columnBytes);
        } else {
            continue;
        }
    }
    return put;
}
Also used : Configuration(com.alibaba.datax.common.util.Configuration) Put(org.apache.hadoop.hbase.client.Put)

Example 19 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class RecordExchangerTest method test_BufferExchanger.

@Test
public void test_BufferExchanger() {
    Configuration configuration = ConfigurationProducer.produce();
    configuration.set(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_ID, 1);
    Channel channel = new MemoryChannel(configuration);
    channel.setCommunication(new Communication());
    TaskPluginCollector pluginCollector = mock(TaskPluginCollector.class);
    int capacity = 10;
    Record record = null;
    BufferedRecordExchanger recordExchanger = new BufferedRecordExchanger(channel, pluginCollector);
    for (int i = 0; i < capacity; i++) {
        record = RecordProducer.produceRecord();
        record.setColumn(0, new LongColumn(i));
        recordExchanger.sendToWriter(record);
    }
    recordExchanger.flush();
    channel.close();
    int counter = 0;
    while ((record = recordExchanger.getFromReader()) != null) {
        System.out.println(record.getColumn(0).toString());
        Assert.assertTrue(record.getColumn(0).asLong() == counter);
        counter++;
    }
    System.out.println(String.format("Capacity: %d Counter: %d .", capacity, counter));
    Assert.assertTrue(capacity == counter);
}
Also used : MemoryChannel(com.alibaba.datax.core.transport.channel.memory.MemoryChannel) TaskPluginCollector(com.alibaba.datax.common.plugin.TaskPluginCollector) LongColumn(com.alibaba.datax.common.element.LongColumn) Configuration(com.alibaba.datax.common.util.Configuration) Channel(com.alibaba.datax.core.transport.channel.Channel) MemoryChannel(com.alibaba.datax.core.transport.channel.memory.MemoryChannel) DefaultRecord(com.alibaba.datax.core.transport.record.DefaultRecord) Record(com.alibaba.datax.common.element.Record) Communication(com.alibaba.datax.core.statistics.communication.Communication) Test(org.junit.Test)

Example 20 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class HbaseSplitUtil method split.

public static List<Configuration> split(Configuration configuration) {
    byte[] startRowkeyByte = HbaseUtil.convertUserStartRowkey(configuration);
    byte[] endRowkeyByte = HbaseUtil.convertUserEndRowkey(configuration);
    /* 如果用户配置了 startRowkey 和 endRowkey,需要确保:startRowkey <= endRowkey */
    if (startRowkeyByte.length != 0 && endRowkeyByte.length != 0 && Bytes.compareTo(startRowkeyByte, endRowkeyByte) > 0) {
        throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, "Hbasereader 中 startRowkey 不得大于 endRowkey.");
    }
    HTable htable = HbaseUtil.initHtable(configuration);
    List<Configuration> resultConfigurations;
    try {
        Pair<byte[][], byte[][]> regionRanges = htable.getStartEndKeys();
        if (null == regionRanges) {
            throw DataXException.asDataXException(HbaseReaderErrorCode.SPLIT_ERROR, "获取源头 Hbase 表的 rowkey 范围失败.");
        }
        resultConfigurations = HbaseSplitUtil.doSplit(configuration, startRowkeyByte, endRowkeyByte, regionRanges);
        LOG.info("HBaseReader split job into {} tasks.", resultConfigurations.size());
        return resultConfigurations;
    } catch (Exception e) {
        throw DataXException.asDataXException(HbaseReaderErrorCode.SPLIT_ERROR, "切分源头 Hbase 表失败.", e);
    } finally {
        try {
            HTableManager.closeHTable(htable);
        } catch (Exception e) {
        //
        }
    }
}
Also used : Configuration(com.alibaba.datax.common.util.Configuration) HTable(org.apache.hadoop.hbase.client.HTable) DataXException(com.alibaba.datax.common.exception.DataXException)

Aggregations

Configuration (com.alibaba.datax.common.util.Configuration)82 ArrayList (java.util.ArrayList)27 Test (org.junit.Test)19 Communication (com.alibaba.datax.core.statistics.communication.Communication)13 DataXException (com.alibaba.datax.common.exception.DataXException)9 Method (java.lang.reflect.Method)8 Record (com.alibaba.datax.common.element.Record)7 JobContainer (com.alibaba.datax.core.job.JobContainer)6 IOException (java.io.IOException)5 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)5 LongColumn (com.alibaba.datax.common.element.LongColumn)4 TaskPluginCollector (com.alibaba.datax.common.plugin.TaskPluginCollector)4 TaskGroupContainer (com.alibaba.datax.core.taskgroup.TaskGroupContainer)4 Channel (com.alibaba.datax.core.transport.channel.Channel)4 MemoryChannel (com.alibaba.datax.core.transport.channel.memory.MemoryChannel)4 DefaultRecord (com.alibaba.datax.core.transport.record.DefaultRecord)4 File (java.io.File)4 HashSet (java.util.HashSet)3 List (java.util.List)3 VMInfo (com.alibaba.datax.common.statistics.VMInfo)2