use of com.rainsoft.domain.TaskBean in project beijingThirdPeriod by weidongcao.
the class BcpFileImport method getFtpTask.
/**
* ftp的任务
* @return
*/
private static TaskBean getFtpTask() {
String task = BigDataConstants.CONTENT_TYPE_FTP;
String oracleTableName = NamingRuleUtils.getOracleContentTableName(task);
TaskBean ftp = new TaskBean();
// BCP文件路径
ftp.setBcpPath(ConfigurationManager.getProperty("bcp.file.path") + "/" + task);
// HBase表名
ftp.setHbaseTableName(hbaseTablePrefix + oracleTableName.toUpperCase());
ftp.setHbaseTableName(NamingRuleUtils.getHBaseTableName(task));
// HBase列簇
ftp.setHbaseCF(NamingRuleUtils.getHBaseContentTableCF());
// HFile在HDFS上的临时存储目录
ftp.setHfileTmpStorePath(NamingRuleUtils.getHFileTaskDir(NamingRuleUtils.getBcpTaskKey(task)));
// 数据类型
ftp.setContentType(task);
// 全部字段名数组
ftp.setColumns(FieldConstants.BCP_FILE_COLUMN_MAP.get(NamingRuleUtils.getBcpTaskKey(task)));
// 需要过滤的关键字段
ftp.setKeyColumns(new String[] { "file_name" });
logger.info("任务信息: {}", ftp.toString());
return ftp;
}
use of com.rainsoft.domain.TaskBean in project beijingThirdPeriod by weidongcao.
the class BcpFileImport method getImchatTask.
/**
* 聊天的任务
* @return
*/
private static TaskBean getImchatTask() {
String task = BigDataConstants.CONTENT_TYPE_IM_CHAT;
TaskBean imChat = new TaskBean();
// BCP文件路径
imChat.setBcpPath(ConfigurationManager.getProperty("bcp.file.path") + "/" + task);
// HBase表名
imChat.setHbaseTableName(NamingRuleUtils.getHBaseTableName(task));
// HBase列簇
imChat.setHbaseCF(NamingRuleUtils.getHBaseContentTableCF());
// HFile在HDFS上的临时存储目录
imChat.setHfileTmpStorePath(NamingRuleUtils.getHFileTaskDir(NamingRuleUtils.getBcpTaskKey(task)));
// 数据类型
imChat.setContentType(task);
// 全部字段名数组
imChat.setColumns(FieldConstants.BCP_FILE_COLUMN_MAP.get(NamingRuleUtils.getBcpTaskKey(task)));
// 需要过滤的关键字段
imChat.setKeyColumns(new String[] {});
logger.info("任务信息: {}", imChat.toString());
return imChat;
}
use of com.rainsoft.domain.TaskBean in project beijingThirdPeriod by weidongcao.
the class Main method getImchatTask.
/**
* 聊天的任务
* @return
*/
private static TaskBean getImchatTask() {
String task = BigDataConstants.CONTENT_TYPE_IM_CHAT;
TaskBean imChat = new TaskBean();
imChat.setBcpPath(tsvDataPathTemplate.replace("${task}", task));
imChat.setCaptureTimeIndex(20);
imChat.setContentType(task);
imChat.setDocType(BigDataConstants.SOLR_DOC_TYPE_IMCHAT_VALUE);
imChat.setColumns(FieldConstants.BCP_FILE_COLUMN_MAP.get(NamingRuleUtils.getBcpTaskKey(task)));
imChat.setHfileTmpStorePath(NamingRuleUtils.getHFileTaskDir(NamingRuleUtils.getBcpTaskKey(task)));
imChat.setHbaseCF(NamingRuleUtils.getHBaseContentTableCF());
imChat.setHbaseTableName(NamingRuleUtils.getHBaseTableName(task));
String path = ConfigurationManager.getProperty("bcp.file.path") + File.separator + task;
logger.info("替换 {} 的BCP数据的目录: {}", imChat.getContentType(), path);
// imChat.replaceFileRN(path);
return imChat;
}
use of com.rainsoft.domain.TaskBean in project beijingThirdPeriod by weidongcao.
the class Main method getFtpTask.
/**
* ftp的任务
* @return
*/
private static TaskBean getFtpTask() {
String task = BigDataConstants.CONTENT_TYPE_FTP;
TaskBean ftp = new TaskBean();
ftp.setBcpPath(tsvDataPathTemplate.replace("${task}", task));
ftp.setCaptureTimeIndex(17);
ftp.setContentType(task);
ftp.setDocType(BigDataConstants.SOLR_DOC_TYPE_FTP_VALUE);
ftp.setColumns(FieldConstants.BCP_FILE_COLUMN_MAP.get(NamingRuleUtils.getBcpTaskKey(task)));
ftp.setHbaseCF(NamingRuleUtils.getHBaseContentTableCF());
ftp.setHfileTmpStorePath(NamingRuleUtils.getHFileTaskDir(NamingRuleUtils.getBcpTaskKey(task)));
ftp.setHbaseTableName(NamingRuleUtils.getHBaseTableName(task));
return ftp;
}
use of com.rainsoft.domain.TaskBean in project beijingThirdPeriod by weidongcao.
the class SparkOperateBcp method bcpWriteIntoSolr.
public static void bcpWriteIntoSolr(JavaRDD<String[]> javaRDD, TaskBean task) {
logger.info("开始将 {} 的BCP数据索引到Solr", task.getContentType());
/*
* 数据写入Solr
*/
javaRDD.foreachPartition((VoidFunction<Iterator<String[]>>) iterator -> {
List<SolrInputDocument> list = new ArrayList<>();
while (iterator.hasNext()) {
String[] str = iterator.next();
SolrInputDocument doc = new SolrInputDocument();
String rowkey = str[0];
doc.addField("ID", rowkey.split("_")[1]);
doc.addField(BigDataConstants.SOLR_CONTENT_ID.toUpperCase(), rowkey);
doc.addField(BigDataConstants.SOLR_DOC_TYPE_KEY, FieldConstants.DOC_TYPE_MAP.get(task.getContentType()));
doc.addField("capture_time", rowkey.split("_")[0]);
doc.addField("import_time".toUpperCase(), DateFormatUtils.DATE_TIME_FORMAT.format(new Date()));
String[] values = ArrayUtils.subarray(str, 1, str.length);
for (int i = 0; i < values.length; i++) {
String value = values[i];
String key = task.getColumns()[i].toUpperCase();
if ((null != value) && (!"".equals(value))) {
if (!"FILE_URL".equalsIgnoreCase(key) && !"FILE_SIZE".equalsIgnoreCase(key)) {
doc.addField(key, value);
}
}
}
list.add(doc);
}
SolrUtil.submitToSolr(client, list, 0, new Date());
});
logger.info("####### {}的BCP数据索引Solr完成 #######", task.getContentType());
}
Aggregations