Search in sources :

Example 61 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class JobAssignUtil method doAssign.

/**
     * /**
     * 需要实现的效果通过例子来说是:
     * <pre>
     * a 库上有表:0, 1, 2
     * a 库上有表:3, 4
     * c 库上有表:5, 6, 7
     *
     * 如果有 4个 taskGroup
     * 则 assign 后的结果为:
     * taskGroup-0: 0,  4,
     * taskGroup-1: 3,  6,
     * taskGroup-2: 5,  2,
     * taskGroup-3: 1,  7
     *
     * </pre>
     */
private static List<Configuration> doAssign(LinkedHashMap<String, List<Integer>> resourceMarkAndTaskIdMap, Configuration jobConfiguration, int taskGroupNumber) {
    List<Configuration> contentConfig = jobConfiguration.getListConfiguration(CoreConstant.DATAX_JOB_CONTENT);
    Configuration taskGroupTemplate = jobConfiguration.clone();
    taskGroupTemplate.remove(CoreConstant.DATAX_JOB_CONTENT);
    List<Configuration> result = new LinkedList<Configuration>();
    List<List<Configuration>> taskGroupConfigList = new ArrayList<List<Configuration>>(taskGroupNumber);
    for (int i = 0; i < taskGroupNumber; i++) {
        taskGroupConfigList.add(new LinkedList<Configuration>());
    }
    int mapValueMaxLength = -1;
    List<String> resourceMarks = new ArrayList<String>();
    for (Map.Entry<String, List<Integer>> entry : resourceMarkAndTaskIdMap.entrySet()) {
        resourceMarks.add(entry.getKey());
        if (entry.getValue().size() > mapValueMaxLength) {
            mapValueMaxLength = entry.getValue().size();
        }
    }
    int taskGroupIndex = 0;
    for (int i = 0; i < mapValueMaxLength; i++) {
        for (String resourceMark : resourceMarks) {
            if (resourceMarkAndTaskIdMap.get(resourceMark).size() > 0) {
                int taskId = resourceMarkAndTaskIdMap.get(resourceMark).get(0);
                taskGroupConfigList.get(taskGroupIndex % taskGroupNumber).add(contentConfig.get(taskId));
                taskGroupIndex++;
                resourceMarkAndTaskIdMap.get(resourceMark).remove(0);
            }
        }
    }
    Configuration tempTaskGroupConfig;
    for (int i = 0; i < taskGroupNumber; i++) {
        tempTaskGroupConfig = taskGroupTemplate.clone();
        tempTaskGroupConfig.set(CoreConstant.DATAX_JOB_CONTENT, taskGroupConfigList.get(i));
        tempTaskGroupConfig.set(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_ID, i);
        result.add(tempTaskGroupConfig);
    }
    return result;
}
Also used : Configuration(com.alibaba.datax.common.util.Configuration)

Example 62 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class AdsUtil method generateConf.

/*生成ODPSWriter Plugin所需要的配置文件
    * */
public static Configuration generateConf(Configuration originalConfig, String odpsTableName, TableMeta tableMeta, TransferProjectConf transConf) {
    Configuration newConfig = originalConfig.clone();
    newConfig.set(Key.ODPSTABLENAME, odpsTableName);
    newConfig.set(Key.ODPS_SERVER, transConf.getOdpsServer());
    newConfig.set(Key.TUNNEL_SERVER, transConf.getOdpsTunnel());
    newConfig.set(Key.ACCESS_ID, transConf.getAccessId());
    newConfig.set(Key.ACCESS_KEY, transConf.getAccessKey());
    newConfig.set(Key.PROJECT, transConf.getProject());
    newConfig.set(Key.TRUNCATE, true);
    newConfig.set(Key.PARTITION, null);
    //        newConfig.remove(Key.PARTITION);
    List<FieldSchema> cols = tableMeta.getCols();
    List<String> allColumns = new ArrayList<String>();
    if (cols != null && !cols.isEmpty()) {
        for (FieldSchema col : cols) {
            allColumns.add(col.getName());
        }
    }
    newConfig.set(Key.COLUMN, allColumns);
    return newConfig;
}
Also used : Configuration(com.alibaba.datax.common.util.Configuration) FieldSchema(com.alibaba.datax.plugin.writer.adswriter.odps.FieldSchema) ArrayList(java.util.ArrayList)

Example 63 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class ColumnCastTest method test_string.

@Test
public void test_string() throws IOException, ParseException {
    Configuration configuration = this.produce();
    StringCast.init(configuration);
    System.out.println(StringCast.asDate(new StringColumn("2014-09-18")));
    Assert.assertTrue(StringCast.asDate(new StringColumn("2014-09-18")).getTime() == 1410969600000L);
    Assert.assertTrue(StringCast.asDate(new StringColumn("20140918")).getTime() == 1410969600000L);
    Assert.assertTrue(StringCast.asDate(new StringColumn("08:00:00")).getTime() == 0L);
    Assert.assertTrue(StringCast.asDate(new StringColumn("2014-09-18 16:00:00")).getTime() == 1411027200000L);
    configuration.set("common.column.datetimeFormat", "yyyy/MM/dd HH:mm:ss");
    StringCast.init(configuration);
    Assert.assertTrue(StringCast.asDate(new StringColumn("2014/09/18 16:00:00")).getTime() == 1411027200000L);
    configuration.set("common.column.timeZone", "GMT");
    StringCast.init(configuration);
    java.util.Date date = StringCast.asDate(new StringColumn("2014/09/18 16:00:00"));
    System.out.println(DateFormatUtils.format(date, "yyyy/MM/dd HH:mm:ss"));
    Assert.assertTrue("2014/09/19 00:00:00".equals(DateFormatUtils.format(date, "yyyy/MM/dd HH:mm:ss")));
}
Also used : Configuration(com.alibaba.datax.common.util.Configuration) Test(org.junit.Test)

Example 64 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class JobAssignUtil method assignFairly.

/**
     * 公平的分配 task 到对应的 taskGroup 中。
     * 公平体现在:会考虑 task 中对资源负载作的 load 标识进行更均衡的作业分配操作。
     * TODO 具体文档举例说明
     */
public static List<Configuration> assignFairly(Configuration configuration, int channelNumber, int channelsPerTaskGroup) {
    Validate.isTrue(configuration != null, "框架获得的 Job 不能为 null.");
    List<Configuration> contentConfig = configuration.getListConfiguration(CoreConstant.DATAX_JOB_CONTENT);
    Validate.isTrue(contentConfig.size() > 0, "框架获得的切分后的 Job 无内容.");
    Validate.isTrue(channelNumber > 0 && channelsPerTaskGroup > 0, "每个channel的平均task数[averTaskPerChannel],channel数目[channelNumber],每个taskGroup的平均channel数[channelsPerTaskGroup]都应该为正数");
    int taskGroupNumber = (int) Math.ceil(1.0 * channelNumber / channelsPerTaskGroup);
    Configuration aTaskConfig = contentConfig.get(0);
    String readerResourceMark = aTaskConfig.getString(CoreConstant.JOB_READER_PARAMETER + "." + CommonConstant.LOAD_BALANCE_RESOURCE_MARK);
    String writerResourceMark = aTaskConfig.getString(CoreConstant.JOB_WRITER_PARAMETER + "." + CommonConstant.LOAD_BALANCE_RESOURCE_MARK);
    boolean hasLoadBalanceResourceMark = StringUtils.isNotBlank(readerResourceMark) || StringUtils.isNotBlank(writerResourceMark);
    if (!hasLoadBalanceResourceMark) {
        // fake 一个固定的 key 作为资源标识(在 reader 或者 writer 上均可,此处选择在 reader 上进行 fake)
        for (Configuration conf : contentConfig) {
            conf.set(CoreConstant.JOB_READER_PARAMETER + "." + CommonConstant.LOAD_BALANCE_RESOURCE_MARK, "aFakeResourceMarkForLoadBalance");
        }
        // 是为了避免某些插件没有设置 资源标识 而进行了一次随机打乱操作
        Collections.shuffle(contentConfig, new Random(System.currentTimeMillis()));
    }
    LinkedHashMap<String, List<Integer>> resourceMarkAndTaskIdMap = parseAndGetResourceMarkAndTaskIdMap(contentConfig);
    List<Configuration> taskGroupConfig = doAssign(resourceMarkAndTaskIdMap, configuration, taskGroupNumber);
    // 调整 每个 taskGroup 对应的 Channel 个数(属于优化范畴)
    adjustChannelNumPerTaskGroup(taskGroupConfig, channelNumber);
    return taskGroupConfig;
}
Also used : Configuration(com.alibaba.datax.common.util.Configuration)

Example 65 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class JobContainer method mergeReaderAndWriterTaskConfigs.

private List<Configuration> mergeReaderAndWriterTaskConfigs(List<Configuration> readerTasksConfigs, List<Configuration> writerTasksConfigs, List<Configuration> transformerConfigs) {
    if (readerTasksConfigs.size() != writerTasksConfigs.size()) {
        throw DataXException.asDataXException(FrameworkErrorCode.PLUGIN_SPLIT_ERROR, String.format("reader切分的task数目[%d]不等于writer切分的task数目[%d].", readerTasksConfigs.size(), writerTasksConfigs.size()));
    }
    List<Configuration> contentConfigs = new ArrayList<Configuration>();
    for (int i = 0; i < readerTasksConfigs.size(); i++) {
        Configuration taskConfig = Configuration.newDefault();
        taskConfig.set(CoreConstant.JOB_READER_NAME, this.readerPluginName);
        taskConfig.set(CoreConstant.JOB_READER_PARAMETER, readerTasksConfigs.get(i));
        taskConfig.set(CoreConstant.JOB_WRITER_NAME, this.writerPluginName);
        taskConfig.set(CoreConstant.JOB_WRITER_PARAMETER, writerTasksConfigs.get(i));
        if (transformerConfigs != null && transformerConfigs.size() > 0) {
            taskConfig.set(CoreConstant.JOB_TRANSFORMER, transformerConfigs);
        }
        taskConfig.set(CoreConstant.TASK_ID, i);
        contentConfigs.add(taskConfig);
    }
    return contentConfigs;
}
Also used : Configuration(com.alibaba.datax.common.util.Configuration) ArrayList(java.util.ArrayList)

Aggregations

Configuration (com.alibaba.datax.common.util.Configuration)82 ArrayList (java.util.ArrayList)27 Test (org.junit.Test)19 Communication (com.alibaba.datax.core.statistics.communication.Communication)13 DataXException (com.alibaba.datax.common.exception.DataXException)9 Method (java.lang.reflect.Method)8 Record (com.alibaba.datax.common.element.Record)7 JobContainer (com.alibaba.datax.core.job.JobContainer)6 IOException (java.io.IOException)5 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)5 LongColumn (com.alibaba.datax.common.element.LongColumn)4 TaskPluginCollector (com.alibaba.datax.common.plugin.TaskPluginCollector)4 TaskGroupContainer (com.alibaba.datax.core.taskgroup.TaskGroupContainer)4 Channel (com.alibaba.datax.core.transport.channel.Channel)4 MemoryChannel (com.alibaba.datax.core.transport.channel.memory.MemoryChannel)4 DefaultRecord (com.alibaba.datax.core.transport.record.DefaultRecord)4 File (java.io.File)4 HashSet (java.util.HashSet)3 List (java.util.List)3 VMInfo (com.alibaba.datax.common.statistics.VMInfo)2