use of com.webank.wedatasphere.qualitis.bean.RuleTaskDetail in project Qualitis by WeBankFinTech.
the class ExecutionManagerImpl method getRule.
/**
* Get task rule
* @param source
* @param dataQualityTask
* @return
*/
private List<TaskRule> getRule(List<Rule> source, DataQualityTask dataQualityTask) {
List<TaskRule> result = new ArrayList<>();
for (RuleTaskDetail ruleTaskDetail : dataQualityTask.getRuleTaskDetails()) {
TaskRule taskRule = new TaskRule();
Rule rule = source.stream().filter(r -> r.getId().equals(ruleTaskDetail.getRule().getId())).collect(Collectors.toList()).get(0);
taskRule.setRuleId(rule.getId());
taskRule.setRuleGroupName(rule.getRuleGroup().getRuleGroupName());
taskRule.setRuleType(rule.getRuleType());
taskRule.setRuleName(rule.getName());
taskRule.setCnName(rule.getCnName());
taskRule.setRuleDetail(rule.getDetail());
taskRule.setTemplateName(rule.getTemplate().getName());
taskRule.setMidTableName(ruleTaskDetail.getMidTableName());
taskRule.setProjectId(ruleTaskDetail.getRule().getProject().getId());
taskRule.setProjectName(ruleTaskDetail.getRule().getProject().getName());
taskRule.setProjectCnName(ruleTaskDetail.getRule().getProject().getCnName());
taskRule.setProjectCreator(ruleTaskDetail.getRule().getProject().getCreateUser());
if (rule.getChildRule() != null) {
taskRule.setChildRuleId(rule.getChildRule().getId());
taskRule.setChildRuleType(rule.getChildRule().getRuleType());
taskRule.setChildTaskRuleDataSourceList(getTaskRuleDataSourceBean(rule.getChildRule()));
taskRule.setChildTaskRuleAlarmConfigsBeans(getTaskRuleAlarmConfigBean(rule.getChildRule()));
}
taskRule.setTaskRuleDataSourceList(getTaskRuleDataSourceBean(rule));
taskRule.setTaskRuleAlarmConfigBeans(getTaskRuleAlarmConfigBean(rule));
taskRule.setDeleteFailCheckResult(rule.getDeleteFailCheckResult());
result.add(taskRule);
}
return result;
}
use of com.webank.wedatasphere.qualitis.bean.RuleTaskDetail in project Qualitis by WeBankFinTech.
the class SqlTemplateConverter method convert.
/**
* Convert task into scala code
* @param dataQualityTask
* @param date
* @param setFlag
* @param execParams
* @param runDate
* @param clusterType
* @param dataSourceMysqlConnect
* @return
* @throws ConvertException
* @throws DataQualityTaskException
* @throws RuleVariableNotSupportException
* @throws RuleVariableNotFoundException
*/
@Override
public DataQualityJob convert(DataQualityTask dataQualityTask, Date date, String setFlag, Map<String, String> execParams, String runDate, String clusterType, Map<Long, Map> dataSourceMysqlConnect) throws ConvertException, DataQualityTaskException, RuleVariableNotSupportException, RuleVariableNotFoundException, IOException, UnExpectedRequestException {
LOGGER.info("Start to convert template to actual code, task: " + dataQualityTask);
if (null == dataQualityTask || dataQualityTask.getRuleTaskDetails().isEmpty()) {
throw new DataQualityTaskException("Task can not be null or empty");
}
DataQualityJob job = new DataQualityJob();
List<String> initSentence = abstractTranslator.getInitSentence();
job.getJobCode().addAll(initSentence);
LOGGER.info("Succeed to get init code. codes: " + initSentence);
if (StringUtils.isNotBlank(setFlag)) {
LOGGER.info("Start to solve with set flag. Spark set conf string: {}", setFlag);
String[] setStrs = setFlag.split(SpecCharEnum.DIVIDER.getValue());
for (String str : setStrs) {
job.getJobCode().add("spark.sql(\"set " + str + "\")");
}
LOGGER.info("Finish to solve with set flag.");
}
int count = 0;
for (RuleTaskDetail ruleTaskDetail : dataQualityTask.getRuleTaskDetails()) {
count++;
List<String> codes = generateSparkSqlByTask(ruleTaskDetail.getRule(), date, dataQualityTask.getApplicationId(), ruleTaskDetail.getMidTableName(), dataQualityTask.getCreateTime(), new StringBuffer(dataQualityTask.getPartition()), execParams, count, runDate, dataSourceMysqlConnect);
job.getJobCode().addAll(codes);
LOGGER.info("Succeed to convert rule into code. rule_id: {}, rul_name: {}, codes: {}", ruleTaskDetail.getRule().getId(), ruleTaskDetail.getRule().getName(), codes);
}
LOGGER.info("Succeed to convert all rule into actual scala code.");
job.setTaskId(dataQualityTask.getTaskId());
job.setStartupParam(dataQualityTask.getStartupParam());
return job;
}
use of com.webank.wedatasphere.qualitis.bean.RuleTaskDetail in project Qualitis by WeBankFinTech.
the class SameDataSourceTaskDivider method divide.
@Override
public List<DataQualityTask> divide(List<Rule> rules, String applicationId, String createTime, String partition, Date date, String database, String user, Integer threshold) throws ArgumentException {
LOGGER.info("Start to classify rules by datasource");
Map<String, List<Rule>> sameDataSourceRule = new HashMap<>(4);
for (Rule rule : rules) {
String key = getKey(rule, user);
// Rules without specific execution parameters can be split into the same task, and rules with execution parameters must be treated as a separate task.
Boolean specifyStaticStartupParam = (rule.getSpecifyStaticStartupParam() != null && rule.getSpecifyStaticStartupParam());
if (sameDataSourceRule.containsKey(key) && !specifyStaticStartupParam) {
sameDataSourceRule.get(key).add(rule);
} else if (specifyStaticStartupParam) {
List<Rule> tmp = new ArrayList<>();
tmp.add(rule);
sameDataSourceRule.put(UUID.randomUUID().toString().replace("-", "") + "." + key, tmp);
} else {
List<Rule> tmp = new ArrayList<>();
tmp.add(rule);
sameDataSourceRule.put(key, tmp);
}
}
LOGGER.info("Succeed to classify rules by datasource. Result: {}", sameDataSourceRule);
List<DataQualityTask> result = new ArrayList<>();
for (String key : sameDataSourceRule.keySet()) {
List<Rule> ruleList = sameDataSourceRule.get(key);
String ruleStartup = ruleList.stream().map(Rule::getStaticStartupParam).filter(staticStartupParam -> StringUtils.isNotBlank(staticStartupParam)).collect(Collectors.joining());
List<Long> ruleIdList = ruleList.stream().map(Rule::getId).collect(Collectors.toList());
LOGGER.info("Start to divide rules: {} into a task.", ruleIdList);
LOGGER.info("Start to divide rules. Key: {}", key);
String[] keys = key.split("\\.");
String proxyUser = keys[keys.length - 1];
List<RuleTaskDetail> ruleTaskDetails = new ArrayList<>();
if (StringUtils.isNotBlank(proxyUser) && database.contains("_ind")) {
database = proxyUser.concat("_ind");
}
for (Rule rule : ruleList) {
String tableName = generateTable(rule);
String midTableName = database + "." + tableName;
if (ruleTaskDetails.size() < threshold) {
ruleTaskDetails.add(new RuleTaskDetail(rule, midTableName));
} else {
List<RuleTaskDetail> ruleTaskDetailCopy = new ArrayList<>();
ruleTaskDetailCopy.addAll(ruleTaskDetails);
DataQualityTask tmp = new DataQualityTask(applicationId, createTime, partition, ruleTaskDetailCopy);
if (StringUtils.isNotBlank(ruleStartup)) {
tmp.setStartupParam(ruleStartup);
}
if (StringUtils.isNotBlank(proxyUser)) {
LOGGER.info("Start to divide rules. Proxy user: {}", proxyUser);
tmp.setUser(proxyUser);
}
result.add(tmp);
ruleTaskDetails = new ArrayList<>();
}
}
if (ruleTaskDetails.size() > 0) {
DataQualityTask tmp = new DataQualityTask(applicationId, createTime, partition, ruleTaskDetails);
if (StringUtils.isNotBlank(ruleStartup)) {
tmp.setStartupParam(ruleStartup);
}
if (StringUtils.isNotBlank(proxyUser)) {
tmp.setUser(proxyUser);
}
result.add(tmp);
LOGGER.info("Succeed to divide rules: {} into a task {}", ruleIdList, tmp);
}
}
LOGGER.info("Succeed to divide all rules into tasks. result: {}", result);
return result;
}
Aggregations