use of org.edamontology.edammap.core.processing.ConceptProcessed in project edammap by edamontology.
the class Resource method runPost.
private Response runPost(MultivaluedMap<String, String> params, Request request) throws IOException, ParseException, URISyntaxException {
logger.info("POST {} from {}", params, request.getRemoteAddr());
long start = System.currentTimeMillis();
logger.info("Start: {}", Instant.ofEpochMilli(start));
CoreArgs coreArgs = new CoreArgs();
ParamParse.parseParams(params, coreArgs);
coreArgs.setProcessorArgs(Server.args.getProcessorArgs());
coreArgs.getFetcherArgs().setPrivateArgs(Server.args.getFetcherPrivateArgs());
ServerInput serverInput = new ServerInput(ParamParse.getParamString(params, "name"), ParamParse.getParamString(params, "keywords"), ParamParse.getParamString(params, "description"), ParamParse.getParamString(params, "webpage-urls"), ParamParse.getParamString(params, "doc-urls"), ParamParse.getParamString(params, "publication-ids"), ParamParse.getParamString(params, "annotations"));
if (serverInput.getName() != null && serverInput.getName().length() > MAX_NAME_LENGTH) {
throw new IllegalArgumentException("Name length (" + serverInput.getName().length() + ") is greater than maximum allowed (" + MAX_NAME_LENGTH + ")");
}
if (serverInput.getKeywords() != null && serverInput.getKeywords().length() > MAX_KEYWORDS_LENGTH) {
throw new IllegalArgumentException("Keywords length (" + serverInput.getKeywords().length() + ") is greater than maximum allowed (" + MAX_KEYWORDS_LENGTH + ")");
}
if (serverInput.getDescription() != null && serverInput.getDescription().length() > MAX_DESCRIPTION_LENGTH) {
throw new IllegalArgumentException("Description length (" + serverInput.getDescription().length() + ") is greater than maximum allowed (" + MAX_DESCRIPTION_LENGTH + ")");
}
if (serverInput.getWebpageUrls() != null && serverInput.getWebpageUrls().length() > MAX_LINKS_LENGTH) {
throw new IllegalArgumentException("Webpage URLs length (" + serverInput.getWebpageUrls().length() + ") is greater than maximum allowed (" + MAX_LINKS_LENGTH + ")");
}
if (serverInput.getDocUrls() != null && serverInput.getDocUrls().length() > MAX_LINKS_LENGTH) {
throw new IllegalArgumentException("Doc URLs length (" + serverInput.getDocUrls().length() + ") is greater than maximum allowed (" + MAX_LINKS_LENGTH + ")");
}
if (serverInput.getPublicationIds() != null && serverInput.getPublicationIds().length() > MAX_PUBLICATION_IDS_LENGTH) {
throw new IllegalArgumentException("Publication IDs length (" + serverInput.getPublicationIds().length() + ") is greater than maximum allowed (" + MAX_PUBLICATION_IDS_LENGTH + ")");
}
if (serverInput.getAnnotations() != null && serverInput.getAnnotations().length() > MAX_ANNOTATIONS_LENGTH) {
throw new IllegalArgumentException("Annotations length (" + serverInput.getAnnotations().length() + ") is greater than maximum allowed (" + MAX_ANNOTATIONS_LENGTH + ")");
}
String uuid;
String uuidDir;
do {
uuid = Server.version.getVersion() + "/" + UUID.randomUUID().toString();
uuidDir = Server.args.getFiles() + "/" + uuid;
} while (Files.exists(Paths.get(uuidDir)));
Files.createDirectory(Paths.get(uuidDir));
serverInput.setId(uuid);
logger.info("UUID: {}", uuid);
Output output = new Output(uuidDir + "/results.txt", uuidDir, true);
// TODO params to choose if HTML or TXT output desired
PreProcessor preProcessor = new PreProcessor(coreArgs.getPreProcessorArgs(), Server.stopwordsAll.get(coreArgs.getPreProcessorArgs().getStopwords()));
logger.info("Processing {} concepts", Server.concepts.size());
Map<EdamUri, ConceptProcessed> processedConcepts = Server.processor.getProcessedConcepts(Server.concepts, coreArgs.getMapperArgs().getIdfArgs(), coreArgs.getMapperArgs().getMultiplierArgs(), preProcessor);
logger.info("Loading query");
Query query = QueryLoader.fromServer(serverInput, Server.concepts, MAX_KEYWORDS_SIZE, MAX_LINKS_SIZE, MAX_PUBLICATION_IDS_SIZE);
Idf idf;
if (coreArgs.getPreProcessorArgs().isStemming()) {
idf = Server.idfStemmed;
} else {
idf = Server.idf;
}
QueryProcessed processedQuery = Server.processor.getProcessedQuery(query, QueryType.server, preProcessor, idf, coreArgs.getFetcherArgs());
logger.info("Mapping query");
Mapping mapping = new Mapper(processedConcepts).map(query, processedQuery, coreArgs.getMapperArgs());
List<Query> queries = Collections.singletonList(query);
List<List<Webpage>> webpages = Collections.singletonList(processedQuery.getWebpages());
List<List<Webpage>> docs = Collections.singletonList(processedQuery.getDocs());
List<List<Publication>> publications = Collections.singletonList(processedQuery.getPublications());
List<Mapping> mappings = Collections.singletonList(mapping);
Results results = Benchmark.calculate(queries, mappings);
long stop = System.currentTimeMillis();
logger.info("Stop: {}", Instant.ofEpochMilli(stop));
logger.info("Mapping took {}s", (stop - start) / 1000.0);
logger.info("Outputting results");
output.output(coreArgs, Server.paramsMain, QueryType.server, 1, 1, Server.concepts, queries, webpages, docs, publications, results, start, stop, Server.version);
URI location = new URI("/" + Server.args.getPath() + "/" + uuid + "/");
logger.info("POSTED {}", location);
return Response.seeOther(location).build();
}
use of org.edamontology.edammap.core.processing.ConceptProcessed in project edammap by edamontology.
the class Mapper method map.
public Mapping map(Query query, QueryProcessed processedQuery, MapperArgs args) {
Mapping mapping = new Mapping(args.getMatches(), args.getBranches());
Map<EdamUri, Match> matches = new HashMap<>();
for (Map.Entry<EdamUri, ConceptProcessed> conceptEntry : processedConcepts.entrySet()) {
EdamUri edamUri = conceptEntry.getKey();
ConceptProcessed processedConcept = conceptEntry.getValue();
if (!args.getBranches().contains(edamUri.getBranch()))
continue;
if ((processedConcept.isObsolete() && !args.isObsolete()) || (processedConcept.getDirectParents().isEmpty() && !args.isTopLevel())) {
Match zeroMatch = new Match(0, new ConceptMatch(0, ConceptMatchType.none, -1), new QueryMatch(0, QueryMatchType.none, -1, -1));
zeroMatch.setEdamUri(edamUri);
matches.put(edamUri, zeroMatch);
continue;
}
Match match = getBestMatch(processedConcept, processedQuery, args);
match.setEdamUri(edamUri);
matches.put(edamUri, match);
}
Set<EdamUri> annotations = new LinkedHashSet<>();
for (EdamUri annotation : query.getAnnotations()) {
if (args.getBranches().contains(annotation.getBranch())) {
annotations.add(annotation);
matches.get(annotation).setExistingAnnotation(true);
}
}
if (!args.isInferiorParentsChildren() && !args.isDoneAnnotations()) {
for (EdamUri annotation : annotations) {
removeParents(annotation, matches);
removeChildren(annotation, matches);
matches.get(annotation).setRemoved(true);
}
}
if (args.getAlgorithmArgs().getPathWeight() > 0 && args.getAlgorithmArgs().getParentWeight() > 0) {
for (Match match : matches.values()) {
match.setWithoutPathScore(match.getScore());
}
for (Map.Entry<EdamUri, Match> matchEntry : matches.entrySet()) {
EdamUri edamUri = matchEntry.getKey();
Match match = matchEntry.getValue();
if (processedConcepts.get(edamUri).getDirectParents().isEmpty() && !args.isTopLevel()) {
match.setRemoved(true);
continue;
}
if (processedConcepts.get(edamUri).isObsolete() && !args.isObsolete()) {
continue;
}
double bestPathScore = 0;
for (EdamUri parent : processedConcepts.get(edamUri).getDirectParents()) {
double best = bestPathScore(parent, matches, 1, 0, args.getAlgorithmArgs().getParentWeight());
if (best > bestPathScore)
bestPathScore = best;
}
match.setScore((match.getScore() + args.getAlgorithmArgs().getPathWeight() * bestPathScore) / (1 + args.getAlgorithmArgs().getPathWeight()));
}
}
List<Match> sortedMatches = new ArrayList<>(matches.values());
Collections.sort(sortedMatches, Collections.reverseOrder());
for (Match match : sortedMatches) {
if (mapping.isFull())
break;
if (mapping.isFull(match.getEdamUri().getBranch()))
continue;
if (match.isRemoved())
continue;
if (processedConcepts.get(match.getEdamUri()).isObsolete() && !args.isObsolete())
continue;
if (!args.isDoneAnnotations() && match.isExistingAnnotation())
continue;
double goodScore = 0;
double badScore = 0;
switch(match.getEdamUri().getBranch()) {
case topic:
goodScore = args.getScoreArgs().getGoodScoreTopic();
badScore = args.getScoreArgs().getBadScoreTopic();
break;
case operation:
goodScore = args.getScoreArgs().getGoodScoreOperation();
badScore = args.getScoreArgs().getBadScoreOperation();
break;
case data:
goodScore = args.getScoreArgs().getGoodScoreData();
badScore = args.getScoreArgs().getBadScoreData();
break;
case format:
goodScore = args.getScoreArgs().getGoodScoreFormat();
badScore = args.getScoreArgs().getBadScoreFormat();
break;
}
double score = 0;
if (args.getAlgorithmArgs().getMappingStrategy() == MapperStrategy.average) {
score = match.getBestOneScore();
} else if (args.getAlgorithmArgs().getPathWeight() > 0 && args.getAlgorithmArgs().getParentWeight() > 0) {
score = match.getWithoutPathScore();
} else {
score = match.getScore();
}
if (score > goodScore) {
if (!args.getScoreArgs().isOutputGoodScores())
continue;
} else if (score >= badScore && score <= goodScore) {
if (!args.getScoreArgs().isOutputMediumScores())
continue;
} else if (score < badScore) {
if (!args.getScoreArgs().isOutputBadScores())
continue;
}
if (!args.isInferiorParentsChildren()) {
removeParents(match.getEdamUri(), matches);
removeChildren(match.getEdamUri(), matches);
}
addParentsChildren(match, mapping, false);
mapping.addMatch(match);
}
if (args.isDoneAnnotations() && annotations.size() > 0) {
int annotationsSeen = 0;
for (Match match : sortedMatches) {
if (annotations.contains(match.getEdamUri())) {
++annotationsSeen;
if (!mapping.getMatches(match.getEdamUri().getBranch()).contains(match)) {
addParentsChildren(match, mapping, true);
mapping.addRemainingAnnotation(match);
}
if (annotationsSeen >= annotations.size())
break;
}
}
}
return mapping;
}
Aggregations