Search in sources :

Example 6 with ESDriver

use of org.apache.sdap.mudrod.driver.ESDriver in project incubator-sdap-mudrod by apache.

the class HybridRecommendation method main.

public static void main(String[] args) throws IOException {
    MudrodEngine me = new MudrodEngine();
    Properties props = me.loadConfig();
    ESDriver es = new ESDriver(me.getConfig());
    HybridRecommendation test = new HybridRecommendation(props, es, null);
    // String input = "NSCAT_LEVEL_1.7_V2";
    String input = "AQUARIUS_L3_SSS_SMIA_MONTHLY-CLIMATOLOGY_V4";
    JsonObject json = test.getRecomDataInJson(input, 10);
    System.out.println(json.toString());
}
Also used : MudrodEngine(org.apache.sdap.mudrod.main.MudrodEngine) ESDriver(org.apache.sdap.mudrod.driver.ESDriver) JsonObject(com.google.gson.JsonObject)

Example 7 with ESDriver

use of org.apache.sdap.mudrod.driver.ESDriver in project incubator-sdap-mudrod by apache.

the class SessionStatistic method processSessionInParallel.

public void processSessionInParallel() throws InterruptedException, IOException {
    List<String> sessions = this.getSessions();
    JavaRDD<String> sessionRDD = spark.sc.parallelize(sessions, partition);
    int sessionCount = 0;
    sessionCount = sessionRDD.mapPartitions(new FlatMapFunction<Iterator<String>, Integer>() {

        @Override
        public Iterator<Integer> call(Iterator<String> arg0) throws Exception {
            ESDriver tmpES = new ESDriver(props);
            tmpES.createBulkProcessor();
            List<Integer> sessionNums = new ArrayList<>();
            sessionNums.add(0);
            while (arg0.hasNext()) {
                String s = arg0.next();
                Integer sessionNum = processSession(tmpES, s);
                sessionNums.add(sessionNum);
            }
            tmpES.destroyBulkProcessor();
            tmpES.close();
            return sessionNums.iterator();
        }
    }).reduce(new Function2<Integer, Integer, Integer>() {

        @Override
        public Integer call(Integer a, Integer b) {
            return a + b;
        }
    });
    LOG.info("Final Session count: {}", Integer.toString(sessionCount));
}
Also used : ESDriver(org.apache.sdap.mudrod.driver.ESDriver) Function2(org.apache.spark.api.java.function.Function2) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 8 with ESDriver

use of org.apache.sdap.mudrod.driver.ESDriver in project incubator-sdap-mudrod by apache.

the class SessionExtractor method getClickStreamListInParallel.

protected JavaRDD<ClickStream> getClickStreamListInParallel(Properties props, SparkDriver spark, ESDriver es) {
    List<String> logIndexList = es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));
    LOG.info("Retrieved {}", logIndexList.toString());
    List<String> sessionIdList = new ArrayList<>();
    for (String logIndex : logIndexList) {
        List<String> tmpsessionList = this.getSessions(props, es, logIndex);
        sessionIdList.addAll(tmpsessionList);
    }
    JavaRDD<String> sessionRDD = spark.sc.parallelize(sessionIdList, 16);
    JavaRDD<ClickStream> clickStreamRDD = sessionRDD.mapPartitions(new FlatMapFunction<Iterator<String>, ClickStream>() {

        /**
         */
        private static final long serialVersionUID = 1L;

        @Override
        public Iterator<ClickStream> call(Iterator<String> arg0) throws Exception {
            ESDriver tmpES = new ESDriver(props);
            tmpES.createBulkProcessor();
            Session session = new Session(props, tmpES);
            List<ClickStream> clickstreams = new ArrayList<>();
            while (arg0.hasNext()) {
                String s = arg0.next();
                String[] sArr = s.split(",");
                List<ClickStream> clicks = session.getClickStreamList(sArr[1], sArr[2], sArr[0]);
                clickstreams.addAll(clicks);
            }
            tmpES.destroyBulkProcessor();
            tmpES.close();
            return clickstreams.iterator();
        }
    });
    LOG.info("Clickstream number: {}", clickStreamRDD.count());
    return clickStreamRDD;
}
Also used : ESDriver(org.apache.sdap.mudrod.driver.ESDriver) ArrayList(java.util.ArrayList) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List)

Example 9 with ESDriver

use of org.apache.sdap.mudrod.driver.ESDriver in project incubator-sdap-mudrod by apache.

the class MudrodEngine method main.

/**
 * Main program invocation. Accepts one argument denoting location (on disk)
 * to a log file which is to be ingested. Help will be provided if invoked
 * with incorrect parameters.
 *
 * @param args
 *          {@link java.lang.String} array contaning correct parameters.
 */
public static void main(String[] args) {
    // boolean options
    Option helpOpt = new Option("h", "help", false, "show this help message");
    // log ingest (preprocessing + processing)
    Option logIngestOpt = new Option("l", LOG_INGEST, false, "begin log ingest");
    // metadata ingest (preprocessing + processing)
    Option metaIngestOpt = new Option("m", META_INGEST, false, "begin metadata ingest");
    // ingest both log and metadata
    Option fullIngestOpt = new Option("f", FULL_INGEST, false, "begin full ingest Mudrod workflow");
    // processing only, assuming that preprocessing results is in dataDir
    Option processingOpt = new Option("p", PROCESSING, false, "begin processing with preprocessing results");
    // argument options
    Option dataDirOpt = OptionBuilder.hasArg(true).withArgName("/path/to/data/directory").hasArgs(1).withDescription("the data directory to be processed by Mudrod").withLongOpt("dataDirectory").isRequired().create(DATA_DIR);
    Option esHostOpt = OptionBuilder.hasArg(true).withArgName("host_name").hasArgs(1).withDescription("elasticsearch cluster unicast host").withLongOpt("elasticSearchHost").isRequired(false).create(ES_HOST);
    Option esTCPPortOpt = OptionBuilder.hasArg(true).withArgName("port_num").hasArgs(1).withDescription("elasticsearch transport TCP port").withLongOpt("elasticSearchTransportTCPPort").isRequired(false).create(ES_TCP_PORT);
    Option esPortOpt = OptionBuilder.hasArg(true).withArgName("port_num").hasArgs(1).withDescription("elasticsearch HTTP/REST port").withLongOpt("elasticSearchHTTPPort").isRequired(false).create(ES_HTTP_PORT);
    // create the options
    Options options = new Options();
    options.addOption(helpOpt);
    options.addOption(logIngestOpt);
    options.addOption(metaIngestOpt);
    options.addOption(fullIngestOpt);
    options.addOption(processingOpt);
    options.addOption(dataDirOpt);
    options.addOption(esHostOpt);
    options.addOption(esTCPPortOpt);
    options.addOption(esPortOpt);
    CommandLineParser parser = new GnuParser();
    try {
        CommandLine line = parser.parse(options, args);
        String processingType = null;
        if (line.hasOption(LOG_INGEST)) {
            processingType = LOG_INGEST;
        } else if (line.hasOption(PROCESSING)) {
            processingType = PROCESSING;
        } else if (line.hasOption(META_INGEST)) {
            processingType = META_INGEST;
        } else if (line.hasOption(FULL_INGEST)) {
            processingType = FULL_INGEST;
        }
        String dataDir = line.getOptionValue(DATA_DIR).replace("\\", "/");
        if (!dataDir.endsWith("/")) {
            dataDir += "/";
        }
        MudrodEngine me = new MudrodEngine();
        me.loadConfig();
        me.props.put(DATA_DIR, dataDir);
        if (line.hasOption(ES_HOST)) {
            String esHost = line.getOptionValue(ES_HOST);
            me.props.put(MudrodConstants.ES_UNICAST_HOSTS, esHost);
        }
        if (line.hasOption(ES_TCP_PORT)) {
            String esTcpPort = line.getOptionValue(ES_TCP_PORT);
            me.props.put(MudrodConstants.ES_TRANSPORT_TCP_PORT, esTcpPort);
        }
        if (line.hasOption(ES_HTTP_PORT)) {
            String esHttpPort = line.getOptionValue(ES_HTTP_PORT);
            me.props.put(MudrodConstants.ES_HTTP_PORT, esHttpPort);
        }
        me.es = new ESDriver(me.getConfig());
        me.spark = new SparkDriver(me.getConfig());
        loadPathConfig(me, dataDir);
        if (processingType != null) {
            switch(processingType) {
                case PROCESSING:
                    me.startProcessing();
                    break;
                case LOG_INGEST:
                    me.startLogIngest();
                    break;
                case META_INGEST:
                    me.startMetaIngest();
                    break;
                case FULL_INGEST:
                    me.startFullIngest();
                    break;
                default:
                    break;
            }
        }
        me.end();
    } catch (Exception e) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("MudrodEngine: 'dataDir' argument is mandatory. " + "User must also provide an ingest method.", options, true);
        LOG.error("Error whilst parsing command line.", e);
    }
}
Also used : HelpFormatter(org.apache.commons.cli.HelpFormatter) Options(org.apache.commons.cli.Options) ESDriver(org.apache.sdap.mudrod.driver.ESDriver) CommandLine(org.apache.commons.cli.CommandLine) SparkDriver(org.apache.sdap.mudrod.driver.SparkDriver) GnuParser(org.apache.commons.cli.GnuParser) Option(org.apache.commons.cli.Option) CommandLineParser(org.apache.commons.cli.CommandLineParser) JDOMException(org.jdom2.JDOMException) IOException(java.io.IOException)

Aggregations

ESDriver (org.apache.sdap.mudrod.driver.ESDriver)9 IOException (java.io.IOException)5 SparkDriver (org.apache.sdap.mudrod.driver.SparkDriver)3 Function2 (org.apache.spark.api.java.function.Function2)3 ArrayList (java.util.ArrayList)2 Iterator (java.util.Iterator)2 List (java.util.List)2 MudrodEngine (org.apache.sdap.mudrod.main.MudrodEngine)2 ElasticsearchException (org.elasticsearch.ElasticsearchException)2 JsonObject (com.google.gson.JsonObject)1 java.util (java.util)1 Properties (java.util.Properties)1 ExecutionException (java.util.concurrent.ExecutionException)1 Matcher (java.util.regex.Matcher)1 Pattern (java.util.regex.Pattern)1 ServletContext (javax.servlet.ServletContext)1 CommandLine (org.apache.commons.cli.CommandLine)1 CommandLineParser (org.apache.commons.cli.CommandLineParser)1 GnuParser (org.apache.commons.cli.GnuParser)1 HelpFormatter (org.apache.commons.cli.HelpFormatter)1