use of org.apache.sdap.mudrod.driver.ESDriver in project incubator-sdap-mudrod by apache.
the class HybridRecommendation method main.
public static void main(String[] args) throws IOException {
MudrodEngine me = new MudrodEngine();
Properties props = me.loadConfig();
ESDriver es = new ESDriver(me.getConfig());
HybridRecommendation test = new HybridRecommendation(props, es, null);
// String input = "NSCAT_LEVEL_1.7_V2";
String input = "AQUARIUS_L3_SSS_SMIA_MONTHLY-CLIMATOLOGY_V4";
JsonObject json = test.getRecomDataInJson(input, 10);
System.out.println(json.toString());
}
use of org.apache.sdap.mudrod.driver.ESDriver in project incubator-sdap-mudrod by apache.
the class SessionStatistic method processSessionInParallel.
public void processSessionInParallel() throws InterruptedException, IOException {
List<String> sessions = this.getSessions();
JavaRDD<String> sessionRDD = spark.sc.parallelize(sessions, partition);
int sessionCount = 0;
sessionCount = sessionRDD.mapPartitions(new FlatMapFunction<Iterator<String>, Integer>() {
@Override
public Iterator<Integer> call(Iterator<String> arg0) throws Exception {
ESDriver tmpES = new ESDriver(props);
tmpES.createBulkProcessor();
List<Integer> sessionNums = new ArrayList<>();
sessionNums.add(0);
while (arg0.hasNext()) {
String s = arg0.next();
Integer sessionNum = processSession(tmpES, s);
sessionNums.add(sessionNum);
}
tmpES.destroyBulkProcessor();
tmpES.close();
return sessionNums.iterator();
}
}).reduce(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer a, Integer b) {
return a + b;
}
});
LOG.info("Final Session count: {}", Integer.toString(sessionCount));
}
use of org.apache.sdap.mudrod.driver.ESDriver in project incubator-sdap-mudrod by apache.
the class SessionExtractor method getClickStreamListInParallel.
protected JavaRDD<ClickStream> getClickStreamListInParallel(Properties props, SparkDriver spark, ESDriver es) {
List<String> logIndexList = es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));
LOG.info("Retrieved {}", logIndexList.toString());
List<String> sessionIdList = new ArrayList<>();
for (String logIndex : logIndexList) {
List<String> tmpsessionList = this.getSessions(props, es, logIndex);
sessionIdList.addAll(tmpsessionList);
}
JavaRDD<String> sessionRDD = spark.sc.parallelize(sessionIdList, 16);
JavaRDD<ClickStream> clickStreamRDD = sessionRDD.mapPartitions(new FlatMapFunction<Iterator<String>, ClickStream>() {
/**
*/
private static final long serialVersionUID = 1L;
@Override
public Iterator<ClickStream> call(Iterator<String> arg0) throws Exception {
ESDriver tmpES = new ESDriver(props);
tmpES.createBulkProcessor();
Session session = new Session(props, tmpES);
List<ClickStream> clickstreams = new ArrayList<>();
while (arg0.hasNext()) {
String s = arg0.next();
String[] sArr = s.split(",");
List<ClickStream> clicks = session.getClickStreamList(sArr[1], sArr[2], sArr[0]);
clickstreams.addAll(clicks);
}
tmpES.destroyBulkProcessor();
tmpES.close();
return clickstreams.iterator();
}
});
LOG.info("Clickstream number: {}", clickStreamRDD.count());
return clickStreamRDD;
}
use of org.apache.sdap.mudrod.driver.ESDriver in project incubator-sdap-mudrod by apache.
the class MudrodEngine method main.
/**
* Main program invocation. Accepts one argument denoting location (on disk)
* to a log file which is to be ingested. Help will be provided if invoked
* with incorrect parameters.
*
* @param args
* {@link java.lang.String} array contaning correct parameters.
*/
public static void main(String[] args) {
// boolean options
Option helpOpt = new Option("h", "help", false, "show this help message");
// log ingest (preprocessing + processing)
Option logIngestOpt = new Option("l", LOG_INGEST, false, "begin log ingest");
// metadata ingest (preprocessing + processing)
Option metaIngestOpt = new Option("m", META_INGEST, false, "begin metadata ingest");
// ingest both log and metadata
Option fullIngestOpt = new Option("f", FULL_INGEST, false, "begin full ingest Mudrod workflow");
// processing only, assuming that preprocessing results is in dataDir
Option processingOpt = new Option("p", PROCESSING, false, "begin processing with preprocessing results");
// argument options
Option dataDirOpt = OptionBuilder.hasArg(true).withArgName("/path/to/data/directory").hasArgs(1).withDescription("the data directory to be processed by Mudrod").withLongOpt("dataDirectory").isRequired().create(DATA_DIR);
Option esHostOpt = OptionBuilder.hasArg(true).withArgName("host_name").hasArgs(1).withDescription("elasticsearch cluster unicast host").withLongOpt("elasticSearchHost").isRequired(false).create(ES_HOST);
Option esTCPPortOpt = OptionBuilder.hasArg(true).withArgName("port_num").hasArgs(1).withDescription("elasticsearch transport TCP port").withLongOpt("elasticSearchTransportTCPPort").isRequired(false).create(ES_TCP_PORT);
Option esPortOpt = OptionBuilder.hasArg(true).withArgName("port_num").hasArgs(1).withDescription("elasticsearch HTTP/REST port").withLongOpt("elasticSearchHTTPPort").isRequired(false).create(ES_HTTP_PORT);
// create the options
Options options = new Options();
options.addOption(helpOpt);
options.addOption(logIngestOpt);
options.addOption(metaIngestOpt);
options.addOption(fullIngestOpt);
options.addOption(processingOpt);
options.addOption(dataDirOpt);
options.addOption(esHostOpt);
options.addOption(esTCPPortOpt);
options.addOption(esPortOpt);
CommandLineParser parser = new GnuParser();
try {
CommandLine line = parser.parse(options, args);
String processingType = null;
if (line.hasOption(LOG_INGEST)) {
processingType = LOG_INGEST;
} else if (line.hasOption(PROCESSING)) {
processingType = PROCESSING;
} else if (line.hasOption(META_INGEST)) {
processingType = META_INGEST;
} else if (line.hasOption(FULL_INGEST)) {
processingType = FULL_INGEST;
}
String dataDir = line.getOptionValue(DATA_DIR).replace("\\", "/");
if (!dataDir.endsWith("/")) {
dataDir += "/";
}
MudrodEngine me = new MudrodEngine();
me.loadConfig();
me.props.put(DATA_DIR, dataDir);
if (line.hasOption(ES_HOST)) {
String esHost = line.getOptionValue(ES_HOST);
me.props.put(MudrodConstants.ES_UNICAST_HOSTS, esHost);
}
if (line.hasOption(ES_TCP_PORT)) {
String esTcpPort = line.getOptionValue(ES_TCP_PORT);
me.props.put(MudrodConstants.ES_TRANSPORT_TCP_PORT, esTcpPort);
}
if (line.hasOption(ES_HTTP_PORT)) {
String esHttpPort = line.getOptionValue(ES_HTTP_PORT);
me.props.put(MudrodConstants.ES_HTTP_PORT, esHttpPort);
}
me.es = new ESDriver(me.getConfig());
me.spark = new SparkDriver(me.getConfig());
loadPathConfig(me, dataDir);
if (processingType != null) {
switch(processingType) {
case PROCESSING:
me.startProcessing();
break;
case LOG_INGEST:
me.startLogIngest();
break;
case META_INGEST:
me.startMetaIngest();
break;
case FULL_INGEST:
me.startFullIngest();
break;
default:
break;
}
}
me.end();
} catch (Exception e) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("MudrodEngine: 'dataDir' argument is mandatory. " + "User must also provide an ingest method.", options, true);
LOG.error("Error whilst parsing command line.", e);
}
}
Aggregations