use of org.apache.sdap.mudrod.driver.ESDriver in project incubator-sdap-mudrod by apache.
the class CrawlerDetection method checkByRateInParallel.
void checkByRateInParallel() throws InterruptedException, IOException {
JavaRDD<String> userRDD = getUserRDD(this.httpType);
LOG.info("Original User count: {}", userRDD.count());
int userCount = 0;
userCount = userRDD.mapPartitions((FlatMapFunction<Iterator<String>, Integer>) iterator -> {
ESDriver tmpES = new ESDriver(props);
tmpES.createBulkProcessor();
List<Integer> realUserNums = new ArrayList<>();
while (iterator.hasNext()) {
String s = iterator.next();
Integer realUser = checkByRate(tmpES, s);
realUserNums.add(realUser);
}
tmpES.destroyBulkProcessor();
tmpES.close();
return realUserNums.iterator();
}).reduce((Function2<Integer, Integer, Integer>) (a, b) -> a + b);
LOG.info("User count: {}", Integer.toString(userCount));
}
use of org.apache.sdap.mudrod.driver.ESDriver in project incubator-sdap-mudrod by apache.
the class SessionGenerator method combineShortSessionsInParallel.
public void combineShortSessionsInParallel(int timeThres) throws InterruptedException, IOException {
JavaRDD<String> userRDD = getUserRDD(this.cleanupType);
userRDD.foreachPartition(new VoidFunction<Iterator<String>>() {
/**
*/
private static final long serialVersionUID = 1L;
@Override
public void call(Iterator<String> arg0) throws Exception {
ESDriver tmpES = new ESDriver(props);
tmpES.createBulkProcessor();
while (arg0.hasNext()) {
String s = arg0.next();
combineShortSessions(tmpES, s, timeThres);
}
tmpES.destroyBulkProcessor();
tmpES.close();
}
});
}
use of org.apache.sdap.mudrod.driver.ESDriver in project incubator-sdap-mudrod by apache.
the class SessionGenerator method genSessionByRefererInParallel.
public void genSessionByRefererInParallel(int timeThres) throws InterruptedException, IOException {
JavaRDD<String> userRDD = getUserRDD(this.cleanupType);
int sessionCount = 0;
sessionCount = userRDD.mapPartitions(new FlatMapFunction<Iterator<String>, Integer>() {
/**
*/
private static final long serialVersionUID = 1L;
@Override
public Iterator<Integer> call(Iterator<String> arg0) throws Exception {
ESDriver tmpES = new ESDriver(props);
tmpES.createBulkProcessor();
List<Integer> sessionNums = new ArrayList<>();
while (arg0.hasNext()) {
String s = arg0.next();
Integer sessionNum = genSessionByReferer(tmpES, s, timeThres);
sessionNums.add(sessionNum);
}
tmpES.destroyBulkProcessor();
tmpES.close();
return sessionNums.iterator();
}
}).reduce(new Function2<Integer, Integer, Integer>() {
/**
*/
private static final long serialVersionUID = 1L;
@Override
public Integer call(Integer a, Integer b) {
return a + b;
}
});
LOG.info("Initial Session count: {}", Integer.toString(sessionCount));
}
use of org.apache.sdap.mudrod.driver.ESDriver in project incubator-sdap-mudrod by apache.
the class SessionExtractor method extractRankingTrainDataInParallel.
protected JavaRDD<RankingTrainData> extractRankingTrainDataInParallel(Properties props, SparkDriver spark, ESDriver es) {
List<String> logIndexList = es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));
LOG.info(logIndexList.toString());
List<String> sessionIdList = new ArrayList<>();
for (String logIndex : logIndexList) {
List<String> tmpsessionList = this.getSessions(props, es, logIndex);
sessionIdList.addAll(tmpsessionList);
}
JavaRDD<String> sessionRDD = spark.sc.parallelize(sessionIdList, 16);
JavaRDD<RankingTrainData> clickStreamRDD = sessionRDD.mapPartitions(new FlatMapFunction<Iterator<String>, RankingTrainData>() {
/**
*/
private static final long serialVersionUID = 1L;
@Override
public Iterator<RankingTrainData> call(Iterator<String> arg0) throws Exception {
ESDriver tmpES = new ESDriver(props);
tmpES.createBulkProcessor();
Session session = new Session(props, tmpES);
List<RankingTrainData> clickstreams = new ArrayList<>();
while (arg0.hasNext()) {
String s = arg0.next();
String[] sArr = s.split(",");
List<RankingTrainData> clicks = session.getRankingTrainData(sArr[1], sArr[2], sArr[0]);
clickstreams.addAll(clicks);
}
tmpES.destroyBulkProcessor();
tmpES.close();
return clickstreams.iterator();
}
});
LOG.info("Clickstream number: {}", clickStreamRDD.count());
return clickStreamRDD;
}
use of org.apache.sdap.mudrod.driver.ESDriver in project incubator-sdap-mudrod by apache.
the class MudrodContextListener method contextInitialized.
/**
* @see ServletContextListener#contextInitialized(ServletContextEvent)
*/
@Override
public void contextInitialized(ServletContextEvent arg0) {
me = new MudrodEngine();
Properties props = me.loadConfig();
me.setESDriver(new ESDriver(props));
me.setSparkDriver(new SparkDriver(props));
ServletContext ctx = arg0.getServletContext();
Searcher searcher = new Searcher(props, me.getESDriver(), null);
Ranker ranker = new Ranker(props, me.getESDriver(), me.getSparkDriver());
ctx.setAttribute("MudrodInstance", me);
ctx.setAttribute("MudrodSearcher", searcher);
ctx.setAttribute("MudrodRanker", ranker);
}
Aggregations