Search in sources :

Example 1 with ConceptProcessed

use of org.edamontology.edammap.core.processing.ConceptProcessed in project edammap by edamontology.

the class Resource method runPost.

private Response runPost(MultivaluedMap<String, String> params, Request request) throws IOException, ParseException, URISyntaxException {
    logger.info("POST {} from {}", params, request.getRemoteAddr());
    long start = System.currentTimeMillis();
    logger.info("Start: {}", Instant.ofEpochMilli(start));
    CoreArgs coreArgs = new CoreArgs();
    ParamParse.parseParams(params, coreArgs);
    coreArgs.setProcessorArgs(Server.args.getProcessorArgs());
    coreArgs.getFetcherArgs().setPrivateArgs(Server.args.getFetcherPrivateArgs());
    ServerInput serverInput = new ServerInput(ParamParse.getParamString(params, "name"), ParamParse.getParamString(params, "keywords"), ParamParse.getParamString(params, "description"), ParamParse.getParamString(params, "webpage-urls"), ParamParse.getParamString(params, "doc-urls"), ParamParse.getParamString(params, "publication-ids"), ParamParse.getParamString(params, "annotations"));
    if (serverInput.getName() != null && serverInput.getName().length() > MAX_NAME_LENGTH) {
        throw new IllegalArgumentException("Name length (" + serverInput.getName().length() + ") is greater than maximum allowed (" + MAX_NAME_LENGTH + ")");
    }
    if (serverInput.getKeywords() != null && serverInput.getKeywords().length() > MAX_KEYWORDS_LENGTH) {
        throw new IllegalArgumentException("Keywords length (" + serverInput.getKeywords().length() + ") is greater than maximum allowed (" + MAX_KEYWORDS_LENGTH + ")");
    }
    if (serverInput.getDescription() != null && serverInput.getDescription().length() > MAX_DESCRIPTION_LENGTH) {
        throw new IllegalArgumentException("Description length (" + serverInput.getDescription().length() + ") is greater than maximum allowed (" + MAX_DESCRIPTION_LENGTH + ")");
    }
    if (serverInput.getWebpageUrls() != null && serverInput.getWebpageUrls().length() > MAX_LINKS_LENGTH) {
        throw new IllegalArgumentException("Webpage URLs length (" + serverInput.getWebpageUrls().length() + ") is greater than maximum allowed (" + MAX_LINKS_LENGTH + ")");
    }
    if (serverInput.getDocUrls() != null && serverInput.getDocUrls().length() > MAX_LINKS_LENGTH) {
        throw new IllegalArgumentException("Doc URLs length (" + serverInput.getDocUrls().length() + ") is greater than maximum allowed (" + MAX_LINKS_LENGTH + ")");
    }
    if (serverInput.getPublicationIds() != null && serverInput.getPublicationIds().length() > MAX_PUBLICATION_IDS_LENGTH) {
        throw new IllegalArgumentException("Publication IDs length (" + serverInput.getPublicationIds().length() + ") is greater than maximum allowed (" + MAX_PUBLICATION_IDS_LENGTH + ")");
    }
    if (serverInput.getAnnotations() != null && serverInput.getAnnotations().length() > MAX_ANNOTATIONS_LENGTH) {
        throw new IllegalArgumentException("Annotations length (" + serverInput.getAnnotations().length() + ") is greater than maximum allowed (" + MAX_ANNOTATIONS_LENGTH + ")");
    }
    String uuid;
    String uuidDir;
    do {
        uuid = Server.version.getVersion() + "/" + UUID.randomUUID().toString();
        uuidDir = Server.args.getFiles() + "/" + uuid;
    } while (Files.exists(Paths.get(uuidDir)));
    Files.createDirectory(Paths.get(uuidDir));
    serverInput.setId(uuid);
    logger.info("UUID: {}", uuid);
    Output output = new Output(uuidDir + "/results.txt", uuidDir, true);
    // TODO params to choose if HTML or TXT output desired
    PreProcessor preProcessor = new PreProcessor(coreArgs.getPreProcessorArgs(), Server.stopwordsAll.get(coreArgs.getPreProcessorArgs().getStopwords()));
    logger.info("Processing {} concepts", Server.concepts.size());
    Map<EdamUri, ConceptProcessed> processedConcepts = Server.processor.getProcessedConcepts(Server.concepts, coreArgs.getMapperArgs().getIdfArgs(), coreArgs.getMapperArgs().getMultiplierArgs(), preProcessor);
    logger.info("Loading query");
    Query query = QueryLoader.fromServer(serverInput, Server.concepts, MAX_KEYWORDS_SIZE, MAX_LINKS_SIZE, MAX_PUBLICATION_IDS_SIZE);
    Idf idf;
    if (coreArgs.getPreProcessorArgs().isStemming()) {
        idf = Server.idfStemmed;
    } else {
        idf = Server.idf;
    }
    QueryProcessed processedQuery = Server.processor.getProcessedQuery(query, QueryType.server, preProcessor, idf, coreArgs.getFetcherArgs());
    logger.info("Mapping query");
    Mapping mapping = new Mapper(processedConcepts).map(query, processedQuery, coreArgs.getMapperArgs());
    List<Query> queries = Collections.singletonList(query);
    List<List<Webpage>> webpages = Collections.singletonList(processedQuery.getWebpages());
    List<List<Webpage>> docs = Collections.singletonList(processedQuery.getDocs());
    List<List<Publication>> publications = Collections.singletonList(processedQuery.getPublications());
    List<Mapping> mappings = Collections.singletonList(mapping);
    Results results = Benchmark.calculate(queries, mappings);
    long stop = System.currentTimeMillis();
    logger.info("Stop: {}", Instant.ofEpochMilli(stop));
    logger.info("Mapping took {}s", (stop - start) / 1000.0);
    logger.info("Outputting results");
    output.output(coreArgs, Server.paramsMain, QueryType.server, 1, 1, Server.concepts, queries, webpages, docs, publications, results, start, stop, Server.version);
    URI location = new URI("/" + Server.args.getPath() + "/" + uuid + "/");
    logger.info("POSTED {}", location);
    return Response.seeOther(location).build();
}
Also used : QueryProcessed(org.edamontology.edammap.core.processing.QueryProcessed) ConceptProcessed(org.edamontology.edammap.core.processing.ConceptProcessed) Query(org.edamontology.edammap.core.query.Query) CoreArgs(org.edamontology.edammap.core.args.CoreArgs) PreProcessor(org.edamontology.edammap.core.preprocessing.PreProcessor) Idf(org.edamontology.edammap.core.idf.Idf) Mapping(org.edamontology.edammap.core.mapping.Mapping) URI(java.net.URI) Mapper(org.edamontology.edammap.core.mapping.Mapper) Results(org.edamontology.edammap.core.benchmarking.Results) Output(org.edamontology.edammap.core.output.Output) List(java.util.List) EdamUri(org.edamontology.edammap.core.edam.EdamUri) ServerInput(org.edamontology.edammap.core.input.ServerInput)

Example 2 with ConceptProcessed

use of org.edamontology.edammap.core.processing.ConceptProcessed in project edammap by edamontology.

the class Mapper method map.

public Mapping map(Query query, QueryProcessed processedQuery, MapperArgs args) {
    Mapping mapping = new Mapping(args.getMatches(), args.getBranches());
    Map<EdamUri, Match> matches = new HashMap<>();
    for (Map.Entry<EdamUri, ConceptProcessed> conceptEntry : processedConcepts.entrySet()) {
        EdamUri edamUri = conceptEntry.getKey();
        ConceptProcessed processedConcept = conceptEntry.getValue();
        if (!args.getBranches().contains(edamUri.getBranch()))
            continue;
        if ((processedConcept.isObsolete() && !args.isObsolete()) || (processedConcept.getDirectParents().isEmpty() && !args.isTopLevel())) {
            Match zeroMatch = new Match(0, new ConceptMatch(0, ConceptMatchType.none, -1), new QueryMatch(0, QueryMatchType.none, -1, -1));
            zeroMatch.setEdamUri(edamUri);
            matches.put(edamUri, zeroMatch);
            continue;
        }
        Match match = getBestMatch(processedConcept, processedQuery, args);
        match.setEdamUri(edamUri);
        matches.put(edamUri, match);
    }
    Set<EdamUri> annotations = new LinkedHashSet<>();
    for (EdamUri annotation : query.getAnnotations()) {
        if (args.getBranches().contains(annotation.getBranch())) {
            annotations.add(annotation);
            matches.get(annotation).setExistingAnnotation(true);
        }
    }
    if (!args.isInferiorParentsChildren() && !args.isDoneAnnotations()) {
        for (EdamUri annotation : annotations) {
            removeParents(annotation, matches);
            removeChildren(annotation, matches);
            matches.get(annotation).setRemoved(true);
        }
    }
    if (args.getAlgorithmArgs().getPathWeight() > 0 && args.getAlgorithmArgs().getParentWeight() > 0) {
        for (Match match : matches.values()) {
            match.setWithoutPathScore(match.getScore());
        }
        for (Map.Entry<EdamUri, Match> matchEntry : matches.entrySet()) {
            EdamUri edamUri = matchEntry.getKey();
            Match match = matchEntry.getValue();
            if (processedConcepts.get(edamUri).getDirectParents().isEmpty() && !args.isTopLevel()) {
                match.setRemoved(true);
                continue;
            }
            if (processedConcepts.get(edamUri).isObsolete() && !args.isObsolete()) {
                continue;
            }
            double bestPathScore = 0;
            for (EdamUri parent : processedConcepts.get(edamUri).getDirectParents()) {
                double best = bestPathScore(parent, matches, 1, 0, args.getAlgorithmArgs().getParentWeight());
                if (best > bestPathScore)
                    bestPathScore = best;
            }
            match.setScore((match.getScore() + args.getAlgorithmArgs().getPathWeight() * bestPathScore) / (1 + args.getAlgorithmArgs().getPathWeight()));
        }
    }
    List<Match> sortedMatches = new ArrayList<>(matches.values());
    Collections.sort(sortedMatches, Collections.reverseOrder());
    for (Match match : sortedMatches) {
        if (mapping.isFull())
            break;
        if (mapping.isFull(match.getEdamUri().getBranch()))
            continue;
        if (match.isRemoved())
            continue;
        if (processedConcepts.get(match.getEdamUri()).isObsolete() && !args.isObsolete())
            continue;
        if (!args.isDoneAnnotations() && match.isExistingAnnotation())
            continue;
        double goodScore = 0;
        double badScore = 0;
        switch(match.getEdamUri().getBranch()) {
            case topic:
                goodScore = args.getScoreArgs().getGoodScoreTopic();
                badScore = args.getScoreArgs().getBadScoreTopic();
                break;
            case operation:
                goodScore = args.getScoreArgs().getGoodScoreOperation();
                badScore = args.getScoreArgs().getBadScoreOperation();
                break;
            case data:
                goodScore = args.getScoreArgs().getGoodScoreData();
                badScore = args.getScoreArgs().getBadScoreData();
                break;
            case format:
                goodScore = args.getScoreArgs().getGoodScoreFormat();
                badScore = args.getScoreArgs().getBadScoreFormat();
                break;
        }
        double score = 0;
        if (args.getAlgorithmArgs().getMappingStrategy() == MapperStrategy.average) {
            score = match.getBestOneScore();
        } else if (args.getAlgorithmArgs().getPathWeight() > 0 && args.getAlgorithmArgs().getParentWeight() > 0) {
            score = match.getWithoutPathScore();
        } else {
            score = match.getScore();
        }
        if (score > goodScore) {
            if (!args.getScoreArgs().isOutputGoodScores())
                continue;
        } else if (score >= badScore && score <= goodScore) {
            if (!args.getScoreArgs().isOutputMediumScores())
                continue;
        } else if (score < badScore) {
            if (!args.getScoreArgs().isOutputBadScores())
                continue;
        }
        if (!args.isInferiorParentsChildren()) {
            removeParents(match.getEdamUri(), matches);
            removeChildren(match.getEdamUri(), matches);
        }
        addParentsChildren(match, mapping, false);
        mapping.addMatch(match);
    }
    if (args.isDoneAnnotations() && annotations.size() > 0) {
        int annotationsSeen = 0;
        for (Match match : sortedMatches) {
            if (annotations.contains(match.getEdamUri())) {
                ++annotationsSeen;
                if (!mapping.getMatches(match.getEdamUri().getBranch()).contains(match)) {
                    addParentsChildren(match, mapping, true);
                    mapping.addRemainingAnnotation(match);
                }
                if (annotationsSeen >= annotations.size())
                    break;
            }
        }
    }
    return mapping;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ConceptProcessed(org.edamontology.edammap.core.processing.ConceptProcessed) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) EdamUri(org.edamontology.edammap.core.edam.EdamUri) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

EdamUri (org.edamontology.edammap.core.edam.EdamUri)2 ConceptProcessed (org.edamontology.edammap.core.processing.ConceptProcessed)2 URI (java.net.URI)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedHashSet (java.util.LinkedHashSet)1 List (java.util.List)1 Map (java.util.Map)1 CoreArgs (org.edamontology.edammap.core.args.CoreArgs)1 Results (org.edamontology.edammap.core.benchmarking.Results)1 Idf (org.edamontology.edammap.core.idf.Idf)1 ServerInput (org.edamontology.edammap.core.input.ServerInput)1 Mapper (org.edamontology.edammap.core.mapping.Mapper)1 Mapping (org.edamontology.edammap.core.mapping.Mapping)1 Output (org.edamontology.edammap.core.output.Output)1 PreProcessor (org.edamontology.edammap.core.preprocessing.PreProcessor)1 QueryProcessed (org.edamontology.edammap.core.processing.QueryProcessed)1 Query (org.edamontology.edammap.core.query.Query)1