Search in sources :

Example 1 with Results

use of org.edamontology.edammap.core.benchmarking.Results in project edammap by edamontology.

the class Cli method run.

private static void run(Version version) throws IOException, ParseException {
    List<Param> paramsMain = new ArrayList<>();
    paramsMain.add(new Param("Ontology file", CliArgs.EDAM, new File(args.getEdam()).getName(), "https://github.com/edamontology/edamontology/tree/master/releases"));
    if (Input.isProtocol(args.getQuery())) {
        paramsMain.add(new Param("Query file", CliArgs.QUERY, args.getQuery(), args.getQuery()));
    } else {
        paramsMain.add(new Param("Query file", CliArgs.QUERY, new File(args.getQuery()).getName()));
    }
    paramsMain.add(new Param("Type", CliArgs.TYPE, args.getType().toString()));
    paramsMain.add(new Param("Output file", CliArgs.OUTPUT, new File(args.getOutput()).getName()));
    paramsMain.add(new Param("Report file", CliArgs.REPORT, new File(args.getReport()).getName()));
    paramsMain.add(new Param("Report page size", CliArgs.REPORT_PAGE_SIZE, args.getReportPageSize(), 0.0, null));
    paramsMain.add(new Param("Report pagination size", CliArgs.REPORT_PAGINATION_SIZE, args.getReportPaginationSize(), 0.0, null));
    paramsMain.add(new Param("Number of threads", CliArgs.THREADS, args.getThreads(), 0.0, null));
    Output output = new Output(args.getOutput(), args.getReport(), false);
    stopwords = PreProcessor.getStopwords(args.getPreProcessorArgs().getStopwords());
    processor = new Processor(args.getProcessorArgs());
    idf = null;
    if (args.getPreProcessorArgs().isStemming()) {
        if (args.getProcessorArgs().getIdfStemmed() != null && !args.getProcessorArgs().getIdfStemmed().isEmpty()) {
            idf = new Idf(args.getProcessorArgs().getIdfStemmed());
        }
    } else {
        if (args.getProcessorArgs().getIdf() != null && !args.getProcessorArgs().getIdf().isEmpty()) {
            idf = new Idf(args.getProcessorArgs().getIdf());
        }
    }
    logger.info("Loading concepts");
    Map<EdamUri, Concept> concepts = Edam.load(args.getEdam());
    logger.info("Processing {} concepts", concepts.size());
    processedConcepts = processor.getProcessedConcepts(concepts, args.getMapperArgs().getIdfArgs(), args.getMapperArgs().getMultiplierArgs(), new PreProcessor(args.getPreProcessorArgs(), stopwords));
    logger.info("Loading queries");
    queries = QueryLoader.get(args.getQuery(), args.getType(), concepts, args.getFetcherArgs().getTimeout(), args.getFetcherArgs().getPrivateArgs().getUserAgent());
    publications = new ArrayList<>(queries.size());
    webpages = new ArrayList<>(queries.size());
    docs = new ArrayList<>(queries.size());
    mappings = new ArrayList<>(queries.size());
    for (int i = 0; i < queries.size(); ++i) {
        publications.add(null);
        webpages.add(null);
        docs.add(null);
        mappings.add(null);
    }
    start = System.currentTimeMillis();
    logger.info("Start: {}", Instant.ofEpochMilli(start));
    logger.info("Starting mapper threads");
    for (int i = 0; i < args.getThreads(); ++i) {
        Thread t = new Thread(new Cli());
        t.setDaemon(true);
        t.start();
    }
    synchronized (lock) {
        while (!lockDone || numThreads > 0) {
            try {
                lock.wait();
            } catch (InterruptedException e) {
                // TODO exit threads cleanly? give timeout for threads to exit? close db? print that exiting and waiting for threads to terminate?
                logger.error("Exception!", e);
                System.exit(1);
            }
        }
    }
    logger.info("All mapper threads stopped");
    long stop = System.currentTimeMillis();
    logger.info("Stop: {}", Instant.ofEpochMilli(stop));
    logger.info("Mapping took {}s", (stop - start) / 1000.0);
    Results results = Benchmark.calculate(queries, mappings);
    logger.info("Outputting results");
    output.output(args, paramsMain, args.getType(), args.getReportPageSize(), args.getReportPaginationSize(), concepts, queries, webpages, docs, publications, results, start, stop, version);
    logger.info("{} : {}", results.toStringMeasure(Measure.recall), Measure.recall);
    logger.info("{} : {}", results.toStringMeasure(Measure.AveP), Measure.AveP);
}
Also used : Concept(org.edamontology.edammap.core.edam.Concept) PreProcessor(org.edamontology.edammap.core.preprocessing.PreProcessor) Processor(org.edamontology.edammap.core.processing.Processor) ArrayList(java.util.ArrayList) PreProcessor(org.edamontology.edammap.core.preprocessing.PreProcessor) Idf(org.edamontology.edammap.core.idf.Idf) Results(org.edamontology.edammap.core.benchmarking.Results) Output(org.edamontology.edammap.core.output.Output) Param(org.edamontology.edammap.core.output.Param) File(java.io.File) EdamUri(org.edamontology.edammap.core.edam.EdamUri)

Example 2 with Results

use of org.edamontology.edammap.core.benchmarking.Results in project edammap by edamontology.

the class Resource method runPost.

private Response runPost(MultivaluedMap<String, String> params, Request request) throws IOException, ParseException, URISyntaxException {
    logger.info("POST {} from {}", params, request.getRemoteAddr());
    long start = System.currentTimeMillis();
    logger.info("Start: {}", Instant.ofEpochMilli(start));
    CoreArgs coreArgs = new CoreArgs();
    ParamParse.parseParams(params, coreArgs);
    coreArgs.setProcessorArgs(Server.args.getProcessorArgs());
    coreArgs.getFetcherArgs().setPrivateArgs(Server.args.getFetcherPrivateArgs());
    ServerInput serverInput = new ServerInput(ParamParse.getParamString(params, "name"), ParamParse.getParamString(params, "keywords"), ParamParse.getParamString(params, "description"), ParamParse.getParamString(params, "webpage-urls"), ParamParse.getParamString(params, "doc-urls"), ParamParse.getParamString(params, "publication-ids"), ParamParse.getParamString(params, "annotations"));
    if (serverInput.getName() != null && serverInput.getName().length() > MAX_NAME_LENGTH) {
        throw new IllegalArgumentException("Name length (" + serverInput.getName().length() + ") is greater than maximum allowed (" + MAX_NAME_LENGTH + ")");
    }
    if (serverInput.getKeywords() != null && serverInput.getKeywords().length() > MAX_KEYWORDS_LENGTH) {
        throw new IllegalArgumentException("Keywords length (" + serverInput.getKeywords().length() + ") is greater than maximum allowed (" + MAX_KEYWORDS_LENGTH + ")");
    }
    if (serverInput.getDescription() != null && serverInput.getDescription().length() > MAX_DESCRIPTION_LENGTH) {
        throw new IllegalArgumentException("Description length (" + serverInput.getDescription().length() + ") is greater than maximum allowed (" + MAX_DESCRIPTION_LENGTH + ")");
    }
    if (serverInput.getWebpageUrls() != null && serverInput.getWebpageUrls().length() > MAX_LINKS_LENGTH) {
        throw new IllegalArgumentException("Webpage URLs length (" + serverInput.getWebpageUrls().length() + ") is greater than maximum allowed (" + MAX_LINKS_LENGTH + ")");
    }
    if (serverInput.getDocUrls() != null && serverInput.getDocUrls().length() > MAX_LINKS_LENGTH) {
        throw new IllegalArgumentException("Doc URLs length (" + serverInput.getDocUrls().length() + ") is greater than maximum allowed (" + MAX_LINKS_LENGTH + ")");
    }
    if (serverInput.getPublicationIds() != null && serverInput.getPublicationIds().length() > MAX_PUBLICATION_IDS_LENGTH) {
        throw new IllegalArgumentException("Publication IDs length (" + serverInput.getPublicationIds().length() + ") is greater than maximum allowed (" + MAX_PUBLICATION_IDS_LENGTH + ")");
    }
    if (serverInput.getAnnotations() != null && serverInput.getAnnotations().length() > MAX_ANNOTATIONS_LENGTH) {
        throw new IllegalArgumentException("Annotations length (" + serverInput.getAnnotations().length() + ") is greater than maximum allowed (" + MAX_ANNOTATIONS_LENGTH + ")");
    }
    String uuid;
    String uuidDir;
    do {
        uuid = Server.version.getVersion() + "/" + UUID.randomUUID().toString();
        uuidDir = Server.args.getFiles() + "/" + uuid;
    } while (Files.exists(Paths.get(uuidDir)));
    Files.createDirectory(Paths.get(uuidDir));
    serverInput.setId(uuid);
    logger.info("UUID: {}", uuid);
    Output output = new Output(uuidDir + "/results.txt", uuidDir, true);
    // TODO params to choose if HTML or TXT output desired
    PreProcessor preProcessor = new PreProcessor(coreArgs.getPreProcessorArgs(), Server.stopwordsAll.get(coreArgs.getPreProcessorArgs().getStopwords()));
    logger.info("Processing {} concepts", Server.concepts.size());
    Map<EdamUri, ConceptProcessed> processedConcepts = Server.processor.getProcessedConcepts(Server.concepts, coreArgs.getMapperArgs().getIdfArgs(), coreArgs.getMapperArgs().getMultiplierArgs(), preProcessor);
    logger.info("Loading query");
    Query query = QueryLoader.fromServer(serverInput, Server.concepts, MAX_KEYWORDS_SIZE, MAX_LINKS_SIZE, MAX_PUBLICATION_IDS_SIZE);
    Idf idf;
    if (coreArgs.getPreProcessorArgs().isStemming()) {
        idf = Server.idfStemmed;
    } else {
        idf = Server.idf;
    }
    QueryProcessed processedQuery = Server.processor.getProcessedQuery(query, QueryType.server, preProcessor, idf, coreArgs.getFetcherArgs());
    logger.info("Mapping query");
    Mapping mapping = new Mapper(processedConcepts).map(query, processedQuery, coreArgs.getMapperArgs());
    List<Query> queries = Collections.singletonList(query);
    List<List<Webpage>> webpages = Collections.singletonList(processedQuery.getWebpages());
    List<List<Webpage>> docs = Collections.singletonList(processedQuery.getDocs());
    List<List<Publication>> publications = Collections.singletonList(processedQuery.getPublications());
    List<Mapping> mappings = Collections.singletonList(mapping);
    Results results = Benchmark.calculate(queries, mappings);
    long stop = System.currentTimeMillis();
    logger.info("Stop: {}", Instant.ofEpochMilli(stop));
    logger.info("Mapping took {}s", (stop - start) / 1000.0);
    logger.info("Outputting results");
    output.output(coreArgs, Server.paramsMain, QueryType.server, 1, 1, Server.concepts, queries, webpages, docs, publications, results, start, stop, Server.version);
    URI location = new URI("/" + Server.args.getPath() + "/" + uuid + "/");
    logger.info("POSTED {}", location);
    return Response.seeOther(location).build();
}
Also used : QueryProcessed(org.edamontology.edammap.core.processing.QueryProcessed) ConceptProcessed(org.edamontology.edammap.core.processing.ConceptProcessed) Query(org.edamontology.edammap.core.query.Query) CoreArgs(org.edamontology.edammap.core.args.CoreArgs) PreProcessor(org.edamontology.edammap.core.preprocessing.PreProcessor) Idf(org.edamontology.edammap.core.idf.Idf) Mapping(org.edamontology.edammap.core.mapping.Mapping) URI(java.net.URI) Mapper(org.edamontology.edammap.core.mapping.Mapper) Results(org.edamontology.edammap.core.benchmarking.Results) Output(org.edamontology.edammap.core.output.Output) List(java.util.List) EdamUri(org.edamontology.edammap.core.edam.EdamUri) ServerInput(org.edamontology.edammap.core.input.ServerInput)

Aggregations

Results (org.edamontology.edammap.core.benchmarking.Results)2 EdamUri (org.edamontology.edammap.core.edam.EdamUri)2 Idf (org.edamontology.edammap.core.idf.Idf)2 Output (org.edamontology.edammap.core.output.Output)2 PreProcessor (org.edamontology.edammap.core.preprocessing.PreProcessor)2 File (java.io.File)1 URI (java.net.URI)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 CoreArgs (org.edamontology.edammap.core.args.CoreArgs)1 Concept (org.edamontology.edammap.core.edam.Concept)1 ServerInput (org.edamontology.edammap.core.input.ServerInput)1 Mapper (org.edamontology.edammap.core.mapping.Mapper)1 Mapping (org.edamontology.edammap.core.mapping.Mapping)1 Param (org.edamontology.edammap.core.output.Param)1 ConceptProcessed (org.edamontology.edammap.core.processing.ConceptProcessed)1 Processor (org.edamontology.edammap.core.processing.Processor)1 QueryProcessed (org.edamontology.edammap.core.processing.QueryProcessed)1 Query (org.edamontology.edammap.core.query.Query)1