Search in sources :

Example 1 with Mapping

use of org.edamontology.edammap.core.mapping.Mapping in project edammap by edamontology.

the class Cli method run.

@Override
public void run() {
    synchronized (lock) {
        ++numThreads;
        lockDone = true;
    }
    try {
        PreProcessor pp = new PreProcessor(args.getPreProcessorArgs(), stopwords);
        Mapper mapper = new Mapper(processedConcepts);
        while (true) {
            Query query;
            int localIndex;
            synchronized (queries) {
                if (index >= queries.size()) {
                    break;
                }
                query = queries.get(index);
                localIndex = index;
                ++index;
            }
            logger.info("{}/{} @ {}s", localIndex + 1, queries.size(), (System.currentTimeMillis() - start) / 1000.0);
            QueryProcessed processedQuery = processor.getProcessedQuery(query, args.getType(), pp, idf, args.getFetcherArgs());
            Mapping mapping = mapper.map(query, processedQuery, args.getMapperArgs());
            synchronized (mappings) {
                webpages.set(localIndex, processedQuery.getWebpages());
                docs.set(localIndex, processedQuery.getDocs());
                publications.set(localIndex, processedQuery.getPublications());
                mappings.set(localIndex, mapping);
            }
        }
    } finally {
        synchronized (lock) {
            --numThreads;
            lock.notifyAll();
        }
    }
}
Also used : Mapper(org.edamontology.edammap.core.mapping.Mapper) QueryProcessed(org.edamontology.edammap.core.processing.QueryProcessed) Query(org.edamontology.edammap.core.query.Query) PreProcessor(org.edamontology.edammap.core.preprocessing.PreProcessor) Mapping(org.edamontology.edammap.core.mapping.Mapping)

Example 2 with Mapping

use of org.edamontology.edammap.core.mapping.Mapping in project edammap by edamontology.

the class Benchmark method calculate.

public static Results calculate(List<Query> queries, List<Mapping> mappings) {
    Results results = new Results();
    Map<Branch, Long> size = new EnumMap<>(Branch.class);
    for (Branch branch : Branch.values()) {
        size.put(branch, 0l);
    }
    for (int i = 0; i < queries.size(); ++i) {
        MappingTest mappingTest = new MappingTest();
        Query query = queries.get(i);
        Mapping mapping = mappings.get(i);
        for (Branch branch : mapping.getBranches()) {
            long annotationsSize;
            if (query.getAnnotations() != null) {
                annotationsSize = query.getAnnotations().stream().filter(e -> e.getBranch() == branch).count();
            } else {
                annotationsSize = 0;
            }
            if (annotationsSize > 0) {
                size.put(branch, size.get(branch) + 1);
            }
            int tp = 0, fp = 0, fn = 0;
            double DCG = 0, IDCG = 0, DCGa = 0, IDCGa = 0;
            for (int j = 0; j < mapping.getMatches(branch).size(); ++j) {
                Match match = mapping.getMatches(branch).get(j);
                if (match.isExistingAnnotation()) {
                    mappingTest.matches.get(branch).add(new MatchTest(match, Test.tp));
                    ++tp;
                    double precisionAve = tp / (double) (tp + fp);
                    results.measures.get(branch).addMeasure(Measure.AveP, precisionAve / (double) annotationsSize);
                    if (j < annotationsSize) {
                        results.measures.get(branch).addMeasure(Measure.RP, 1 / (double) annotationsSize);
                    }
                    int rel = 1;
                    if (j == 0) {
                        DCG += rel;
                    } else {
                        DCG += rel / (Math.log(j + 1) / Math.log(2));
                    }
                    DCGa += (Math.pow(2, rel) - 1) / (Math.log(j + 1 + 1) / Math.log(2));
                } else {
                    mappingTest.matches.get(branch).add(new MatchTest(match, Test.fp));
                    ++fp;
                }
                if (annotationsSize > 0) {
                    int Mrel = ((annotationsSize - j <= 0) ? 0 : 1);
                    if (j == 0) {
                        IDCG += Mrel;
                    } else {
                        IDCG += Mrel / (Math.log(j + 1) / Math.log(2));
                    }
                    IDCGa += (Math.pow(2, Mrel) - 1) / (Math.log(j + 1 + 1) / Math.log(2));
                }
            }
            for (Match excludedAnnotation : mapping.getRemainingAnnotations(branch)) {
                mappingTest.matches.get(branch).add(new MatchTest(excludedAnnotation, Test.fn));
                ++fn;
            }
            results.measuresTotal.addTest(Test.tp, tp);
            results.measuresTotal.addTest(Test.fp, fp);
            results.measuresTotal.addTest(Test.fn, fn);
            results.measures.get(branch).addTest(Test.tp, tp);
            results.measures.get(branch).addTest(Test.fp, fp);
            results.measures.get(branch).addTest(Test.fn, fn);
            if (annotationsSize > 0) {
                double precision = 0;
                if (tp > 0 || fp > 0)
                    precision = tp / (double) (tp + fp);
                double recall = tp / (double) (tp + fn);
                results.measures.get(branch).addMeasure(Measure.precision, precision);
                results.measures.get(branch).addMeasure(Measure.recall, recall);
                if (tp > 0) {
                    results.measures.get(branch).addMeasure(Measure.f1, 2 * (precision * recall) / (precision + recall));
                    results.measures.get(branch).addMeasure(Measure.f2, (1 + Math.pow(2, 2)) * (precision * recall) / ((Math.pow(2, 2) * precision) + recall));
                }
                results.measures.get(branch).addMeasure(Measure.Jaccard, tp / (double) (tp + fp + fn));
                if (tp > 0 || fp > 0) {
                    results.measures.get(branch).addMeasure(Measure.DCG, DCG / IDCG);
                    results.measures.get(branch).addMeasure(Measure.DCGa, DCGa / IDCGa);
                }
            }
        }
        results.mappings.add(mappingTest);
    }
    for (Branch branch : Branch.values()) {
        long s = size.get(branch);
        if (s == 0)
            continue;
        for (Measure measure : Measure.values()) {
            results.measures.get(branch).divideMeasure(measure, s);
        }
    }
    int branchesSize = 0;
    for (Branch branch : Branch.values()) {
        if (size.get(branch) == 0)
            continue;
        ++branchesSize;
        for (Measure measure : Measure.values()) {
            results.measuresTotal.addMeasure(measure, results.measures.get(branch).getMeasure(measure));
        }
    }
    if (branchesSize > 0) {
        for (Measure measure : Measure.values()) {
            results.measuresTotal.divideMeasure(measure, branchesSize);
        }
    }
    return results;
}
Also used : Query(org.edamontology.edammap.core.query.Query) Mapping(org.edamontology.edammap.core.mapping.Mapping) Match(org.edamontology.edammap.core.mapping.Match) Branch(org.edamontology.edammap.core.edam.Branch) EnumMap(java.util.EnumMap)

Example 3 with Mapping

use of org.edamontology.edammap.core.mapping.Mapping in project edammap by edamontology.

the class Resource method runPost.

private Response runPost(MultivaluedMap<String, String> params, Request request) throws IOException, ParseException, URISyntaxException {
    logger.info("POST {} from {}", params, request.getRemoteAddr());
    long start = System.currentTimeMillis();
    logger.info("Start: {}", Instant.ofEpochMilli(start));
    CoreArgs coreArgs = new CoreArgs();
    ParamParse.parseParams(params, coreArgs);
    coreArgs.setProcessorArgs(Server.args.getProcessorArgs());
    coreArgs.getFetcherArgs().setPrivateArgs(Server.args.getFetcherPrivateArgs());
    ServerInput serverInput = new ServerInput(ParamParse.getParamString(params, "name"), ParamParse.getParamString(params, "keywords"), ParamParse.getParamString(params, "description"), ParamParse.getParamString(params, "webpage-urls"), ParamParse.getParamString(params, "doc-urls"), ParamParse.getParamString(params, "publication-ids"), ParamParse.getParamString(params, "annotations"));
    if (serverInput.getName() != null && serverInput.getName().length() > MAX_NAME_LENGTH) {
        throw new IllegalArgumentException("Name length (" + serverInput.getName().length() + ") is greater than maximum allowed (" + MAX_NAME_LENGTH + ")");
    }
    if (serverInput.getKeywords() != null && serverInput.getKeywords().length() > MAX_KEYWORDS_LENGTH) {
        throw new IllegalArgumentException("Keywords length (" + serverInput.getKeywords().length() + ") is greater than maximum allowed (" + MAX_KEYWORDS_LENGTH + ")");
    }
    if (serverInput.getDescription() != null && serverInput.getDescription().length() > MAX_DESCRIPTION_LENGTH) {
        throw new IllegalArgumentException("Description length (" + serverInput.getDescription().length() + ") is greater than maximum allowed (" + MAX_DESCRIPTION_LENGTH + ")");
    }
    if (serverInput.getWebpageUrls() != null && serverInput.getWebpageUrls().length() > MAX_LINKS_LENGTH) {
        throw new IllegalArgumentException("Webpage URLs length (" + serverInput.getWebpageUrls().length() + ") is greater than maximum allowed (" + MAX_LINKS_LENGTH + ")");
    }
    if (serverInput.getDocUrls() != null && serverInput.getDocUrls().length() > MAX_LINKS_LENGTH) {
        throw new IllegalArgumentException("Doc URLs length (" + serverInput.getDocUrls().length() + ") is greater than maximum allowed (" + MAX_LINKS_LENGTH + ")");
    }
    if (serverInput.getPublicationIds() != null && serverInput.getPublicationIds().length() > MAX_PUBLICATION_IDS_LENGTH) {
        throw new IllegalArgumentException("Publication IDs length (" + serverInput.getPublicationIds().length() + ") is greater than maximum allowed (" + MAX_PUBLICATION_IDS_LENGTH + ")");
    }
    if (serverInput.getAnnotations() != null && serverInput.getAnnotations().length() > MAX_ANNOTATIONS_LENGTH) {
        throw new IllegalArgumentException("Annotations length (" + serverInput.getAnnotations().length() + ") is greater than maximum allowed (" + MAX_ANNOTATIONS_LENGTH + ")");
    }
    String uuid;
    String uuidDir;
    do {
        uuid = Server.version.getVersion() + "/" + UUID.randomUUID().toString();
        uuidDir = Server.args.getFiles() + "/" + uuid;
    } while (Files.exists(Paths.get(uuidDir)));
    Files.createDirectory(Paths.get(uuidDir));
    serverInput.setId(uuid);
    logger.info("UUID: {}", uuid);
    Output output = new Output(uuidDir + "/results.txt", uuidDir, true);
    // TODO params to choose if HTML or TXT output desired
    PreProcessor preProcessor = new PreProcessor(coreArgs.getPreProcessorArgs(), Server.stopwordsAll.get(coreArgs.getPreProcessorArgs().getStopwords()));
    logger.info("Processing {} concepts", Server.concepts.size());
    Map<EdamUri, ConceptProcessed> processedConcepts = Server.processor.getProcessedConcepts(Server.concepts, coreArgs.getMapperArgs().getIdfArgs(), coreArgs.getMapperArgs().getMultiplierArgs(), preProcessor);
    logger.info("Loading query");
    Query query = QueryLoader.fromServer(serverInput, Server.concepts, MAX_KEYWORDS_SIZE, MAX_LINKS_SIZE, MAX_PUBLICATION_IDS_SIZE);
    Idf idf;
    if (coreArgs.getPreProcessorArgs().isStemming()) {
        idf = Server.idfStemmed;
    } else {
        idf = Server.idf;
    }
    QueryProcessed processedQuery = Server.processor.getProcessedQuery(query, QueryType.server, preProcessor, idf, coreArgs.getFetcherArgs());
    logger.info("Mapping query");
    Mapping mapping = new Mapper(processedConcepts).map(query, processedQuery, coreArgs.getMapperArgs());
    List<Query> queries = Collections.singletonList(query);
    List<List<Webpage>> webpages = Collections.singletonList(processedQuery.getWebpages());
    List<List<Webpage>> docs = Collections.singletonList(processedQuery.getDocs());
    List<List<Publication>> publications = Collections.singletonList(processedQuery.getPublications());
    List<Mapping> mappings = Collections.singletonList(mapping);
    Results results = Benchmark.calculate(queries, mappings);
    long stop = System.currentTimeMillis();
    logger.info("Stop: {}", Instant.ofEpochMilli(stop));
    logger.info("Mapping took {}s", (stop - start) / 1000.0);
    logger.info("Outputting results");
    output.output(coreArgs, Server.paramsMain, QueryType.server, 1, 1, Server.concepts, queries, webpages, docs, publications, results, start, stop, Server.version);
    URI location = new URI("/" + Server.args.getPath() + "/" + uuid + "/");
    logger.info("POSTED {}", location);
    return Response.seeOther(location).build();
}
Also used : QueryProcessed(org.edamontology.edammap.core.processing.QueryProcessed) ConceptProcessed(org.edamontology.edammap.core.processing.ConceptProcessed) Query(org.edamontology.edammap.core.query.Query) CoreArgs(org.edamontology.edammap.core.args.CoreArgs) PreProcessor(org.edamontology.edammap.core.preprocessing.PreProcessor) Idf(org.edamontology.edammap.core.idf.Idf) Mapping(org.edamontology.edammap.core.mapping.Mapping) URI(java.net.URI) Mapper(org.edamontology.edammap.core.mapping.Mapper) Results(org.edamontology.edammap.core.benchmarking.Results) Output(org.edamontology.edammap.core.output.Output) List(java.util.List) EdamUri(org.edamontology.edammap.core.edam.EdamUri) ServerInput(org.edamontology.edammap.core.input.ServerInput)

Aggregations

Mapping (org.edamontology.edammap.core.mapping.Mapping)3 Query (org.edamontology.edammap.core.query.Query)3 Mapper (org.edamontology.edammap.core.mapping.Mapper)2 PreProcessor (org.edamontology.edammap.core.preprocessing.PreProcessor)2 QueryProcessed (org.edamontology.edammap.core.processing.QueryProcessed)2 URI (java.net.URI)1 EnumMap (java.util.EnumMap)1 List (java.util.List)1 CoreArgs (org.edamontology.edammap.core.args.CoreArgs)1 Results (org.edamontology.edammap.core.benchmarking.Results)1 Branch (org.edamontology.edammap.core.edam.Branch)1 EdamUri (org.edamontology.edammap.core.edam.EdamUri)1 Idf (org.edamontology.edammap.core.idf.Idf)1 ServerInput (org.edamontology.edammap.core.input.ServerInput)1 Match (org.edamontology.edammap.core.mapping.Match)1 Output (org.edamontology.edammap.core.output.Output)1 ConceptProcessed (org.edamontology.edammap.core.processing.ConceptProcessed)1