Search in sources :

Example 1 with Processor

use of org.edamontology.edammap.core.processing.Processor in project edammap by edamontology.

the class Cli method run.

private static void run(Version version) throws IOException, ParseException {
    List<Param> paramsMain = new ArrayList<>();
    paramsMain.add(new Param("Ontology file", CliArgs.EDAM, new File(args.getEdam()).getName(), "https://github.com/edamontology/edamontology/tree/master/releases"));
    if (Input.isProtocol(args.getQuery())) {
        paramsMain.add(new Param("Query file", CliArgs.QUERY, args.getQuery(), args.getQuery()));
    } else {
        paramsMain.add(new Param("Query file", CliArgs.QUERY, new File(args.getQuery()).getName()));
    }
    paramsMain.add(new Param("Type", CliArgs.TYPE, args.getType().toString()));
    paramsMain.add(new Param("Output file", CliArgs.OUTPUT, new File(args.getOutput()).getName()));
    paramsMain.add(new Param("Report file", CliArgs.REPORT, new File(args.getReport()).getName()));
    paramsMain.add(new Param("Report page size", CliArgs.REPORT_PAGE_SIZE, args.getReportPageSize(), 0.0, null));
    paramsMain.add(new Param("Report pagination size", CliArgs.REPORT_PAGINATION_SIZE, args.getReportPaginationSize(), 0.0, null));
    paramsMain.add(new Param("Number of threads", CliArgs.THREADS, args.getThreads(), 0.0, null));
    Output output = new Output(args.getOutput(), args.getReport(), false);
    stopwords = PreProcessor.getStopwords(args.getPreProcessorArgs().getStopwords());
    processor = new Processor(args.getProcessorArgs());
    idf = null;
    if (args.getPreProcessorArgs().isStemming()) {
        if (args.getProcessorArgs().getIdfStemmed() != null && !args.getProcessorArgs().getIdfStemmed().isEmpty()) {
            idf = new Idf(args.getProcessorArgs().getIdfStemmed());
        }
    } else {
        if (args.getProcessorArgs().getIdf() != null && !args.getProcessorArgs().getIdf().isEmpty()) {
            idf = new Idf(args.getProcessorArgs().getIdf());
        }
    }
    logger.info("Loading concepts");
    Map<EdamUri, Concept> concepts = Edam.load(args.getEdam());
    logger.info("Processing {} concepts", concepts.size());
    processedConcepts = processor.getProcessedConcepts(concepts, args.getMapperArgs().getIdfArgs(), args.getMapperArgs().getMultiplierArgs(), new PreProcessor(args.getPreProcessorArgs(), stopwords));
    logger.info("Loading queries");
    queries = QueryLoader.get(args.getQuery(), args.getType(), concepts, args.getFetcherArgs().getTimeout(), args.getFetcherArgs().getPrivateArgs().getUserAgent());
    publications = new ArrayList<>(queries.size());
    webpages = new ArrayList<>(queries.size());
    docs = new ArrayList<>(queries.size());
    mappings = new ArrayList<>(queries.size());
    for (int i = 0; i < queries.size(); ++i) {
        publications.add(null);
        webpages.add(null);
        docs.add(null);
        mappings.add(null);
    }
    start = System.currentTimeMillis();
    logger.info("Start: {}", Instant.ofEpochMilli(start));
    logger.info("Starting mapper threads");
    for (int i = 0; i < args.getThreads(); ++i) {
        Thread t = new Thread(new Cli());
        t.setDaemon(true);
        t.start();
    }
    synchronized (lock) {
        while (!lockDone || numThreads > 0) {
            try {
                lock.wait();
            } catch (InterruptedException e) {
                // TODO exit threads cleanly? give timeout for threads to exit? close db? print that exiting and waiting for threads to terminate?
                logger.error("Exception!", e);
                System.exit(1);
            }
        }
    }
    logger.info("All mapper threads stopped");
    long stop = System.currentTimeMillis();
    logger.info("Stop: {}", Instant.ofEpochMilli(stop));
    logger.info("Mapping took {}s", (stop - start) / 1000.0);
    Results results = Benchmark.calculate(queries, mappings);
    logger.info("Outputting results");
    output.output(args, paramsMain, args.getType(), args.getReportPageSize(), args.getReportPaginationSize(), concepts, queries, webpages, docs, publications, results, start, stop, version);
    logger.info("{} : {}", results.toStringMeasure(Measure.recall), Measure.recall);
    logger.info("{} : {}", results.toStringMeasure(Measure.AveP), Measure.AveP);
}
Also used : Concept(org.edamontology.edammap.core.edam.Concept) PreProcessor(org.edamontology.edammap.core.preprocessing.PreProcessor) Processor(org.edamontology.edammap.core.processing.Processor) ArrayList(java.util.ArrayList) PreProcessor(org.edamontology.edammap.core.preprocessing.PreProcessor) Idf(org.edamontology.edammap.core.idf.Idf) Results(org.edamontology.edammap.core.benchmarking.Results) Output(org.edamontology.edammap.core.output.Output) Param(org.edamontology.edammap.core.output.Param) File(java.io.File) EdamUri(org.edamontology.edammap.core.edam.EdamUri)

Example 2 with Processor

use of org.edamontology.edammap.core.processing.Processor in project edammap by edamontology.

the class Server method run.

private static void run() throws IOException, ParseException {
    paramsMain.add(new Param("Ontology file", ServerArgs.EDAM, new File(args.getEdam()).getName(), "https://github.com/edamontology/edamontology/tree/master/releases"));
    for (Stopwords stopwords : Stopwords.values()) {
        stopwordsAll.put(stopwords, PreProcessor.getStopwords(stopwords));
    }
    processor = new Processor(args.getProcessorArgs());
    if (args.getProcessorArgs().getIdf() != null && !args.getProcessorArgs().getIdf().isEmpty()) {
        idf = new Idf(args.getProcessorArgs().getIdf());
    }
    if (args.getProcessorArgs().getIdfStemmed() != null && !args.getProcessorArgs().getIdfStemmed().isEmpty()) {
        idfStemmed = new Idf(args.getProcessorArgs().getIdfStemmed());
    }
    logger.info("Loading concepts");
    concepts = Edam.load(args.getEdam());
    logger.info("Configuring server");
    final ResourceConfig rc = new ResourceConfig().packages("org.edamontology.edammap.server");
    // TODO .property(JsonGenerator.PRETTY_PRINTING, true);
    HttpServer httpServer = GrizzlyHttpServerFactory.createHttpServer(URI.create(args.getBaseUri() + "/" + args.getPath() + "/api"), rc, false);
    final StaticHttpHandler filesHttpHandler = new StaticHttpHandler(args.getFiles());
    filesHttpHandler.setDirectorySlashOff(true);
    httpServer.getServerConfiguration().addHttpHandler(filesHttpHandler, "/" + args.getPath() + "/*");
    httpServer.getServerConfiguration().addHttpHandler(new HttpHandler() {

        @Override
        public void service(Request request, Response response) throws Exception {
            // TODO replace null with request.getParameterMap()
            String responseText = Resource.runGet(null, request);
            response.setContentType(MediaType.TEXT_HTML);
            response.setContentLength(responseText.length());
            response.getWriter().write(responseText);
        }
    }, "/" + args.getPath() + "/");
    if (args.getLog() != null) {
        Path accessDir = Paths.get(args.getLog() + "/access");
        if (!Files.exists(accessDir)) {
            Files.createDirectory(accessDir);
        }
        final AccessLogBuilder builder = new AccessLogBuilder(accessDir + "/edammap-access.log");
        builder.rotatedDaily();
        // builder.format(ApacheLogFormat.COMBINED); // TODO
        builder.instrument(httpServer.getServerConfiguration());
    }
    logger.info("Starting server");
    httpServer.start();
    logger.info("{} has started", version.getName());
}
Also used : Path(java.nio.file.Path) StaticHttpHandler(org.glassfish.grizzly.http.server.StaticHttpHandler) HttpHandler(org.glassfish.grizzly.http.server.HttpHandler) PreProcessor(org.edamontology.edammap.core.preprocessing.PreProcessor) Processor(org.edamontology.edammap.core.processing.Processor) Request(org.glassfish.grizzly.http.server.Request) AccessLogBuilder(org.glassfish.grizzly.http.server.accesslog.AccessLogBuilder) Idf(org.edamontology.edammap.core.idf.Idf) Stopwords(org.edamontology.edammap.core.preprocessing.Stopwords) ParseException(java.text.ParseException) IOException(java.io.IOException) Response(org.glassfish.grizzly.http.server.Response) StaticHttpHandler(org.glassfish.grizzly.http.server.StaticHttpHandler) Param(org.edamontology.edammap.core.output.Param) HttpServer(org.glassfish.grizzly.http.server.HttpServer) ResourceConfig(org.glassfish.jersey.server.ResourceConfig) File(java.io.File)

Example 3 with Processor

use of org.edamontology.edammap.core.processing.Processor in project edammap by edamontology.

the class Util method makeIdf.

private static void makeIdf(String queryPath, String database, String idfPath, UtilArgs args, boolean stemming) throws IOException, ParseException {
    logger.info("Make query IDF from file {} of type {} to {}{}", queryPath, args.makeIdfType, idfPath, database != null ? " using database " + database : "");
    ProcessorArgs processorArgs = new ProcessorArgs();
    processorArgs.setFetching(false);
    processorArgs.setDb(database);
    processorArgs.setIdf(null);
    processorArgs.setIdfStemmed(null);
    Processor processor = new Processor(processorArgs);
    int idfs = processor.makeQueryIdf(QueryLoader.get(queryPath, args.makeIdfType, args.fetcherArgs.getTimeout(), args.fetcherArgs.getPrivateArgs().getUserAgent()), args.makeIdfType, idfPath, args.makeIdfWebpagesDocs, args.makeIdfFulltext, new PreProcessor(stemming), null, args.fetcherArgs);
    logger.info("Wrote {} IDFs to {}", idfs, idfPath);
}
Also used : PreProcessor(org.edamontology.edammap.core.preprocessing.PreProcessor) Processor(org.edamontology.edammap.core.processing.Processor) PreProcessor(org.edamontology.edammap.core.preprocessing.PreProcessor) ProcessorArgs(org.edamontology.edammap.core.processing.ProcessorArgs)

Aggregations

PreProcessor (org.edamontology.edammap.core.preprocessing.PreProcessor)3 Processor (org.edamontology.edammap.core.processing.Processor)3 File (java.io.File)2 Idf (org.edamontology.edammap.core.idf.Idf)2 Param (org.edamontology.edammap.core.output.Param)2 IOException (java.io.IOException)1 Path (java.nio.file.Path)1 ParseException (java.text.ParseException)1 ArrayList (java.util.ArrayList)1 Results (org.edamontology.edammap.core.benchmarking.Results)1 Concept (org.edamontology.edammap.core.edam.Concept)1 EdamUri (org.edamontology.edammap.core.edam.EdamUri)1 Output (org.edamontology.edammap.core.output.Output)1 Stopwords (org.edamontology.edammap.core.preprocessing.Stopwords)1 ProcessorArgs (org.edamontology.edammap.core.processing.ProcessorArgs)1 HttpHandler (org.glassfish.grizzly.http.server.HttpHandler)1 HttpServer (org.glassfish.grizzly.http.server.HttpServer)1 Request (org.glassfish.grizzly.http.server.Request)1 Response (org.glassfish.grizzly.http.server.Response)1 StaticHttpHandler (org.glassfish.grizzly.http.server.StaticHttpHandler)1