Search in sources :

Example 6 with TaskBean

use of com.rainsoft.domain.TaskBean in project beijingThirdPeriod by weidongcao.

the class BcpFileImport method getFtpTask.

/**
 * ftp的任务
 * @return
 */
private static TaskBean getFtpTask() {
    String task = BigDataConstants.CONTENT_TYPE_FTP;
    String oracleTableName = NamingRuleUtils.getOracleContentTableName(task);
    TaskBean ftp = new TaskBean();
    // BCP文件路径
    ftp.setBcpPath(ConfigurationManager.getProperty("bcp.file.path") + "/" + task);
    // HBase表名
    ftp.setHbaseTableName(hbaseTablePrefix + oracleTableName.toUpperCase());
    ftp.setHbaseTableName(NamingRuleUtils.getHBaseTableName(task));
    // HBase列簇
    ftp.setHbaseCF(NamingRuleUtils.getHBaseContentTableCF());
    // HFile在HDFS上的临时存储目录
    ftp.setHfileTmpStorePath(NamingRuleUtils.getHFileTaskDir(NamingRuleUtils.getBcpTaskKey(task)));
    // 数据类型
    ftp.setContentType(task);
    // 全部字段名数组
    ftp.setColumns(FieldConstants.BCP_FILE_COLUMN_MAP.get(NamingRuleUtils.getBcpTaskKey(task)));
    // 需要过滤的关键字段
    ftp.setKeyColumns(new String[] { "file_name" });
    logger.info("任务信息: {}", ftp.toString());
    return ftp;
}
Also used : TaskBean(com.rainsoft.domain.TaskBean)

Example 7 with TaskBean

use of com.rainsoft.domain.TaskBean in project beijingThirdPeriod by weidongcao.

the class BcpFileImport method getImchatTask.

/**
 *  聊天的任务
 * @return
 */
private static TaskBean getImchatTask() {
    String task = BigDataConstants.CONTENT_TYPE_IM_CHAT;
    TaskBean imChat = new TaskBean();
    // BCP文件路径
    imChat.setBcpPath(ConfigurationManager.getProperty("bcp.file.path") + "/" + task);
    // HBase表名
    imChat.setHbaseTableName(NamingRuleUtils.getHBaseTableName(task));
    // HBase列簇
    imChat.setHbaseCF(NamingRuleUtils.getHBaseContentTableCF());
    // HFile在HDFS上的临时存储目录
    imChat.setHfileTmpStorePath(NamingRuleUtils.getHFileTaskDir(NamingRuleUtils.getBcpTaskKey(task)));
    // 数据类型
    imChat.setContentType(task);
    // 全部字段名数组
    imChat.setColumns(FieldConstants.BCP_FILE_COLUMN_MAP.get(NamingRuleUtils.getBcpTaskKey(task)));
    // 需要过滤的关键字段
    imChat.setKeyColumns(new String[] {});
    logger.info("任务信息: {}", imChat.toString());
    return imChat;
}
Also used : TaskBean(com.rainsoft.domain.TaskBean)

Example 8 with TaskBean

use of com.rainsoft.domain.TaskBean in project beijingThirdPeriod by weidongcao.

the class Main method getImchatTask.

/**
 *  聊天的任务
 * @return
 */
private static TaskBean getImchatTask() {
    String task = BigDataConstants.CONTENT_TYPE_IM_CHAT;
    TaskBean imChat = new TaskBean();
    imChat.setBcpPath(tsvDataPathTemplate.replace("${task}", task));
    imChat.setCaptureTimeIndex(20);
    imChat.setContentType(task);
    imChat.setDocType(BigDataConstants.SOLR_DOC_TYPE_IMCHAT_VALUE);
    imChat.setColumns(FieldConstants.BCP_FILE_COLUMN_MAP.get(NamingRuleUtils.getBcpTaskKey(task)));
    imChat.setHfileTmpStorePath(NamingRuleUtils.getHFileTaskDir(NamingRuleUtils.getBcpTaskKey(task)));
    imChat.setHbaseCF(NamingRuleUtils.getHBaseContentTableCF());
    imChat.setHbaseTableName(NamingRuleUtils.getHBaseTableName(task));
    String path = ConfigurationManager.getProperty("bcp.file.path") + File.separator + task;
    logger.info("替换 {} 的BCP数据的目录: {}", imChat.getContentType(), path);
    // imChat.replaceFileRN(path);
    return imChat;
}
Also used : TaskBean(com.rainsoft.domain.TaskBean)

Example 9 with TaskBean

use of com.rainsoft.domain.TaskBean in project beijingThirdPeriod by weidongcao.

the class Main method getFtpTask.

/**
 * ftp的任务
 * @return
 */
private static TaskBean getFtpTask() {
    String task = BigDataConstants.CONTENT_TYPE_FTP;
    TaskBean ftp = new TaskBean();
    ftp.setBcpPath(tsvDataPathTemplate.replace("${task}", task));
    ftp.setCaptureTimeIndex(17);
    ftp.setContentType(task);
    ftp.setDocType(BigDataConstants.SOLR_DOC_TYPE_FTP_VALUE);
    ftp.setColumns(FieldConstants.BCP_FILE_COLUMN_MAP.get(NamingRuleUtils.getBcpTaskKey(task)));
    ftp.setHbaseCF(NamingRuleUtils.getHBaseContentTableCF());
    ftp.setHfileTmpStorePath(NamingRuleUtils.getHFileTaskDir(NamingRuleUtils.getBcpTaskKey(task)));
    ftp.setHbaseTableName(NamingRuleUtils.getHBaseTableName(task));
    return ftp;
}
Also used : TaskBean(com.rainsoft.domain.TaskBean)

Example 10 with TaskBean

use of com.rainsoft.domain.TaskBean in project beijingThirdPeriod by weidongcao.

the class SparkOperateBcp method bcpWriteIntoSolr.

public static void bcpWriteIntoSolr(JavaRDD<String[]> javaRDD, TaskBean task) {
    logger.info("开始将 {} 的BCP数据索引到Solr", task.getContentType());
    /*
         * 数据写入Solr
         */
    javaRDD.foreachPartition((VoidFunction<Iterator<String[]>>) iterator -> {
        List<SolrInputDocument> list = new ArrayList<>();
        while (iterator.hasNext()) {
            String[] str = iterator.next();
            SolrInputDocument doc = new SolrInputDocument();
            String rowkey = str[0];
            doc.addField("ID", rowkey.split("_")[1]);
            doc.addField(BigDataConstants.SOLR_CONTENT_ID.toUpperCase(), rowkey);
            doc.addField(BigDataConstants.SOLR_DOC_TYPE_KEY, FieldConstants.DOC_TYPE_MAP.get(task.getContentType()));
            doc.addField("capture_time", rowkey.split("_")[0]);
            doc.addField("import_time".toUpperCase(), DateFormatUtils.DATE_TIME_FORMAT.format(new Date()));
            String[] values = ArrayUtils.subarray(str, 1, str.length);
            for (int i = 0; i < values.length; i++) {
                String value = values[i];
                String key = task.getColumns()[i].toUpperCase();
                if ((null != value) && (!"".equals(value))) {
                    if (!"FILE_URL".equalsIgnoreCase(key) && !"FILE_SIZE".equalsIgnoreCase(key)) {
                        doc.addField(key, value);
                    }
                }
            }
            list.add(doc);
        }
        SolrUtil.submitToSolr(client, list, 0, new Date());
    });
    logger.info("####### {}的BCP数据索引Solr完成 #######", task.getContentType());
}
Also used : PairFlatMapFunction(org.apache.spark.api.java.function.PairFlatMapFunction) Date(java.util.Date) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) LoggerFactory(org.slf4j.LoggerFactory) ArrayUtils(org.apache.commons.lang3.ArrayUtils) VoidFunction(org.apache.spark.api.java.function.VoidFunction) DateFormatUtils(com.rainsoft.utils.DateFormatUtils) ArrayList(java.util.ArrayList) TaskBean(com.rainsoft.domain.TaskBean) ClassPathXmlApplicationContext(org.springframework.context.support.ClassPathXmlApplicationContext) BigDataConstants(com.rainsoft.BigDataConstants) JavaRDD(org.apache.spark.api.java.JavaRDD) FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) SolrUtil(com.rainsoft.utils.SolrUtil) SparkConf(org.apache.spark.SparkConf) RowkeyColumnSecondarySort(com.rainsoft.hbase.RowkeyColumnSecondarySort) Tuple2(scala.Tuple2) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) SolrClient(org.apache.solr.client.solrj.SolrClient) Serializable(java.io.Serializable) HBaseUtils(com.rainsoft.utils.HBaseUtils) List(java.util.List) AbstractApplicationContext(org.springframework.context.support.AbstractApplicationContext) FieldConstants(com.rainsoft.FieldConstants) Function(org.apache.spark.api.java.function.Function) SolrInputDocument(org.apache.solr.common.SolrInputDocument) SolrInputDocument(org.apache.solr.common.SolrInputDocument) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) Date(java.util.Date)

Aggregations

TaskBean (com.rainsoft.domain.TaskBean)10 BigDataConstants (com.rainsoft.BigDataConstants)3 FieldConstants (com.rainsoft.FieldConstants)3 RowkeyColumnSecondarySort (com.rainsoft.hbase.RowkeyColumnSecondarySort)3 DateFormatUtils (com.rainsoft.utils.DateFormatUtils)3 HBaseUtils (com.rainsoft.utils.HBaseUtils)3 SolrUtil (com.rainsoft.utils.SolrUtil)3 ArrayUtils (org.apache.commons.lang3.ArrayUtils)3 SolrClient (org.apache.solr.client.solrj.SolrClient)3 SolrInputDocument (org.apache.solr.common.SolrInputDocument)3 SparkConf (org.apache.spark.SparkConf)3 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)3 JavaRDD (org.apache.spark.api.java.JavaRDD)3 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)3 FlatMapFunction (org.apache.spark.api.java.function.FlatMapFunction)3 Function (org.apache.spark.api.java.function.Function)3 PairFlatMapFunction (org.apache.spark.api.java.function.PairFlatMapFunction)3 VoidFunction (org.apache.spark.api.java.function.VoidFunction)3 Logger (org.slf4j.Logger)3 LoggerFactory (org.slf4j.LoggerFactory)3