use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class JobAssignUtil method doAssign.
/**
* /**
* 需要实现的效果通过例子来说是:
* <pre>
* a 库上有表:0, 1, 2
* a 库上有表:3, 4
* c 库上有表:5, 6, 7
*
* 如果有 4个 taskGroup
* 则 assign 后的结果为:
* taskGroup-0: 0, 4,
* taskGroup-1: 3, 6,
* taskGroup-2: 5, 2,
* taskGroup-3: 1, 7
*
* </pre>
*/
private static List<Configuration> doAssign(LinkedHashMap<String, List<Integer>> resourceMarkAndTaskIdMap, Configuration jobConfiguration, int taskGroupNumber) {
List<Configuration> contentConfig = jobConfiguration.getListConfiguration(CoreConstant.DATAX_JOB_CONTENT);
Configuration taskGroupTemplate = jobConfiguration.clone();
taskGroupTemplate.remove(CoreConstant.DATAX_JOB_CONTENT);
List<Configuration> result = new LinkedList<Configuration>();
List<List<Configuration>> taskGroupConfigList = new ArrayList<List<Configuration>>(taskGroupNumber);
for (int i = 0; i < taskGroupNumber; i++) {
taskGroupConfigList.add(new LinkedList<Configuration>());
}
int mapValueMaxLength = -1;
List<String> resourceMarks = new ArrayList<String>();
for (Map.Entry<String, List<Integer>> entry : resourceMarkAndTaskIdMap.entrySet()) {
resourceMarks.add(entry.getKey());
if (entry.getValue().size() > mapValueMaxLength) {
mapValueMaxLength = entry.getValue().size();
}
}
int taskGroupIndex = 0;
for (int i = 0; i < mapValueMaxLength; i++) {
for (String resourceMark : resourceMarks) {
if (resourceMarkAndTaskIdMap.get(resourceMark).size() > 0) {
int taskId = resourceMarkAndTaskIdMap.get(resourceMark).get(0);
taskGroupConfigList.get(taskGroupIndex % taskGroupNumber).add(contentConfig.get(taskId));
taskGroupIndex++;
resourceMarkAndTaskIdMap.get(resourceMark).remove(0);
}
}
}
Configuration tempTaskGroupConfig;
for (int i = 0; i < taskGroupNumber; i++) {
tempTaskGroupConfig = taskGroupTemplate.clone();
tempTaskGroupConfig.set(CoreConstant.DATAX_JOB_CONTENT, taskGroupConfigList.get(i));
tempTaskGroupConfig.set(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_ID, i);
result.add(tempTaskGroupConfig);
}
return result;
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class AdsUtil method generateConf.
/*生成ODPSWriter Plugin所需要的配置文件
* */
public static Configuration generateConf(Configuration originalConfig, String odpsTableName, TableMeta tableMeta, TransferProjectConf transConf) {
Configuration newConfig = originalConfig.clone();
newConfig.set(Key.ODPSTABLENAME, odpsTableName);
newConfig.set(Key.ODPS_SERVER, transConf.getOdpsServer());
newConfig.set(Key.TUNNEL_SERVER, transConf.getOdpsTunnel());
newConfig.set(Key.ACCESS_ID, transConf.getAccessId());
newConfig.set(Key.ACCESS_KEY, transConf.getAccessKey());
newConfig.set(Key.PROJECT, transConf.getProject());
newConfig.set(Key.TRUNCATE, true);
newConfig.set(Key.PARTITION, null);
// newConfig.remove(Key.PARTITION);
List<FieldSchema> cols = tableMeta.getCols();
List<String> allColumns = new ArrayList<String>();
if (cols != null && !cols.isEmpty()) {
for (FieldSchema col : cols) {
allColumns.add(col.getName());
}
}
newConfig.set(Key.COLUMN, allColumns);
return newConfig;
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class ColumnCastTest method test_string.
@Test
public void test_string() throws IOException, ParseException {
Configuration configuration = this.produce();
StringCast.init(configuration);
System.out.println(StringCast.asDate(new StringColumn("2014-09-18")));
Assert.assertTrue(StringCast.asDate(new StringColumn("2014-09-18")).getTime() == 1410969600000L);
Assert.assertTrue(StringCast.asDate(new StringColumn("20140918")).getTime() == 1410969600000L);
Assert.assertTrue(StringCast.asDate(new StringColumn("08:00:00")).getTime() == 0L);
Assert.assertTrue(StringCast.asDate(new StringColumn("2014-09-18 16:00:00")).getTime() == 1411027200000L);
configuration.set("common.column.datetimeFormat", "yyyy/MM/dd HH:mm:ss");
StringCast.init(configuration);
Assert.assertTrue(StringCast.asDate(new StringColumn("2014/09/18 16:00:00")).getTime() == 1411027200000L);
configuration.set("common.column.timeZone", "GMT");
StringCast.init(configuration);
java.util.Date date = StringCast.asDate(new StringColumn("2014/09/18 16:00:00"));
System.out.println(DateFormatUtils.format(date, "yyyy/MM/dd HH:mm:ss"));
Assert.assertTrue("2014/09/19 00:00:00".equals(DateFormatUtils.format(date, "yyyy/MM/dd HH:mm:ss")));
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class JobAssignUtil method assignFairly.
/**
* 公平的分配 task 到对应的 taskGroup 中。
* 公平体现在:会考虑 task 中对资源负载作的 load 标识进行更均衡的作业分配操作。
* TODO 具体文档举例说明
*/
public static List<Configuration> assignFairly(Configuration configuration, int channelNumber, int channelsPerTaskGroup) {
Validate.isTrue(configuration != null, "框架获得的 Job 不能为 null.");
List<Configuration> contentConfig = configuration.getListConfiguration(CoreConstant.DATAX_JOB_CONTENT);
Validate.isTrue(contentConfig.size() > 0, "框架获得的切分后的 Job 无内容.");
Validate.isTrue(channelNumber > 0 && channelsPerTaskGroup > 0, "每个channel的平均task数[averTaskPerChannel],channel数目[channelNumber],每个taskGroup的平均channel数[channelsPerTaskGroup]都应该为正数");
int taskGroupNumber = (int) Math.ceil(1.0 * channelNumber / channelsPerTaskGroup);
Configuration aTaskConfig = contentConfig.get(0);
String readerResourceMark = aTaskConfig.getString(CoreConstant.JOB_READER_PARAMETER + "." + CommonConstant.LOAD_BALANCE_RESOURCE_MARK);
String writerResourceMark = aTaskConfig.getString(CoreConstant.JOB_WRITER_PARAMETER + "." + CommonConstant.LOAD_BALANCE_RESOURCE_MARK);
boolean hasLoadBalanceResourceMark = StringUtils.isNotBlank(readerResourceMark) || StringUtils.isNotBlank(writerResourceMark);
if (!hasLoadBalanceResourceMark) {
// fake 一个固定的 key 作为资源标识(在 reader 或者 writer 上均可,此处选择在 reader 上进行 fake)
for (Configuration conf : contentConfig) {
conf.set(CoreConstant.JOB_READER_PARAMETER + "." + CommonConstant.LOAD_BALANCE_RESOURCE_MARK, "aFakeResourceMarkForLoadBalance");
}
// 是为了避免某些插件没有设置 资源标识 而进行了一次随机打乱操作
Collections.shuffle(contentConfig, new Random(System.currentTimeMillis()));
}
LinkedHashMap<String, List<Integer>> resourceMarkAndTaskIdMap = parseAndGetResourceMarkAndTaskIdMap(contentConfig);
List<Configuration> taskGroupConfig = doAssign(resourceMarkAndTaskIdMap, configuration, taskGroupNumber);
// 调整 每个 taskGroup 对应的 Channel 个数(属于优化范畴)
adjustChannelNumPerTaskGroup(taskGroupConfig, channelNumber);
return taskGroupConfig;
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class JobContainer method mergeReaderAndWriterTaskConfigs.
private List<Configuration> mergeReaderAndWriterTaskConfigs(List<Configuration> readerTasksConfigs, List<Configuration> writerTasksConfigs, List<Configuration> transformerConfigs) {
if (readerTasksConfigs.size() != writerTasksConfigs.size()) {
throw DataXException.asDataXException(FrameworkErrorCode.PLUGIN_SPLIT_ERROR, String.format("reader切分的task数目[%d]不等于writer切分的task数目[%d].", readerTasksConfigs.size(), writerTasksConfigs.size()));
}
List<Configuration> contentConfigs = new ArrayList<Configuration>();
for (int i = 0; i < readerTasksConfigs.size(); i++) {
Configuration taskConfig = Configuration.newDefault();
taskConfig.set(CoreConstant.JOB_READER_NAME, this.readerPluginName);
taskConfig.set(CoreConstant.JOB_READER_PARAMETER, readerTasksConfigs.get(i));
taskConfig.set(CoreConstant.JOB_WRITER_NAME, this.writerPluginName);
taskConfig.set(CoreConstant.JOB_WRITER_PARAMETER, writerTasksConfigs.get(i));
if (transformerConfigs != null && transformerConfigs.size() > 0) {
taskConfig.set(CoreConstant.JOB_TRANSFORMER, transformerConfigs);
}
taskConfig.set(CoreConstant.TASK_ID, i);
contentConfigs.add(taskConfig);
}
return contentConfigs;
}
Aggregations