use of org.edamontology.edammap.core.processing.Processor in project edammap by edamontology.
the class Cli method run.
private static void run(Version version) throws IOException, ParseException {
List<Param> paramsMain = new ArrayList<>();
paramsMain.add(new Param("Ontology file", CliArgs.EDAM, new File(args.getEdam()).getName(), "https://github.com/edamontology/edamontology/tree/master/releases"));
if (Input.isProtocol(args.getQuery())) {
paramsMain.add(new Param("Query file", CliArgs.QUERY, args.getQuery(), args.getQuery()));
} else {
paramsMain.add(new Param("Query file", CliArgs.QUERY, new File(args.getQuery()).getName()));
}
paramsMain.add(new Param("Type", CliArgs.TYPE, args.getType().toString()));
paramsMain.add(new Param("Output file", CliArgs.OUTPUT, new File(args.getOutput()).getName()));
paramsMain.add(new Param("Report file", CliArgs.REPORT, new File(args.getReport()).getName()));
paramsMain.add(new Param("Report page size", CliArgs.REPORT_PAGE_SIZE, args.getReportPageSize(), 0.0, null));
paramsMain.add(new Param("Report pagination size", CliArgs.REPORT_PAGINATION_SIZE, args.getReportPaginationSize(), 0.0, null));
paramsMain.add(new Param("Number of threads", CliArgs.THREADS, args.getThreads(), 0.0, null));
Output output = new Output(args.getOutput(), args.getReport(), false);
stopwords = PreProcessor.getStopwords(args.getPreProcessorArgs().getStopwords());
processor = new Processor(args.getProcessorArgs());
idf = null;
if (args.getPreProcessorArgs().isStemming()) {
if (args.getProcessorArgs().getIdfStemmed() != null && !args.getProcessorArgs().getIdfStemmed().isEmpty()) {
idf = new Idf(args.getProcessorArgs().getIdfStemmed());
}
} else {
if (args.getProcessorArgs().getIdf() != null && !args.getProcessorArgs().getIdf().isEmpty()) {
idf = new Idf(args.getProcessorArgs().getIdf());
}
}
logger.info("Loading concepts");
Map<EdamUri, Concept> concepts = Edam.load(args.getEdam());
logger.info("Processing {} concepts", concepts.size());
processedConcepts = processor.getProcessedConcepts(concepts, args.getMapperArgs().getIdfArgs(), args.getMapperArgs().getMultiplierArgs(), new PreProcessor(args.getPreProcessorArgs(), stopwords));
logger.info("Loading queries");
queries = QueryLoader.get(args.getQuery(), args.getType(), concepts, args.getFetcherArgs().getTimeout(), args.getFetcherArgs().getPrivateArgs().getUserAgent());
publications = new ArrayList<>(queries.size());
webpages = new ArrayList<>(queries.size());
docs = new ArrayList<>(queries.size());
mappings = new ArrayList<>(queries.size());
for (int i = 0; i < queries.size(); ++i) {
publications.add(null);
webpages.add(null);
docs.add(null);
mappings.add(null);
}
start = System.currentTimeMillis();
logger.info("Start: {}", Instant.ofEpochMilli(start));
logger.info("Starting mapper threads");
for (int i = 0; i < args.getThreads(); ++i) {
Thread t = new Thread(new Cli());
t.setDaemon(true);
t.start();
}
synchronized (lock) {
while (!lockDone || numThreads > 0) {
try {
lock.wait();
} catch (InterruptedException e) {
// TODO exit threads cleanly? give timeout for threads to exit? close db? print that exiting and waiting for threads to terminate?
logger.error("Exception!", e);
System.exit(1);
}
}
}
logger.info("All mapper threads stopped");
long stop = System.currentTimeMillis();
logger.info("Stop: {}", Instant.ofEpochMilli(stop));
logger.info("Mapping took {}s", (stop - start) / 1000.0);
Results results = Benchmark.calculate(queries, mappings);
logger.info("Outputting results");
output.output(args, paramsMain, args.getType(), args.getReportPageSize(), args.getReportPaginationSize(), concepts, queries, webpages, docs, publications, results, start, stop, version);
logger.info("{} : {}", results.toStringMeasure(Measure.recall), Measure.recall);
logger.info("{} : {}", results.toStringMeasure(Measure.AveP), Measure.AveP);
}
use of org.edamontology.edammap.core.processing.Processor in project edammap by edamontology.
the class Server method run.
private static void run() throws IOException, ParseException {
paramsMain.add(new Param("Ontology file", ServerArgs.EDAM, new File(args.getEdam()).getName(), "https://github.com/edamontology/edamontology/tree/master/releases"));
for (Stopwords stopwords : Stopwords.values()) {
stopwordsAll.put(stopwords, PreProcessor.getStopwords(stopwords));
}
processor = new Processor(args.getProcessorArgs());
if (args.getProcessorArgs().getIdf() != null && !args.getProcessorArgs().getIdf().isEmpty()) {
idf = new Idf(args.getProcessorArgs().getIdf());
}
if (args.getProcessorArgs().getIdfStemmed() != null && !args.getProcessorArgs().getIdfStemmed().isEmpty()) {
idfStemmed = new Idf(args.getProcessorArgs().getIdfStemmed());
}
logger.info("Loading concepts");
concepts = Edam.load(args.getEdam());
logger.info("Configuring server");
final ResourceConfig rc = new ResourceConfig().packages("org.edamontology.edammap.server");
// TODO .property(JsonGenerator.PRETTY_PRINTING, true);
HttpServer httpServer = GrizzlyHttpServerFactory.createHttpServer(URI.create(args.getBaseUri() + "/" + args.getPath() + "/api"), rc, false);
final StaticHttpHandler filesHttpHandler = new StaticHttpHandler(args.getFiles());
filesHttpHandler.setDirectorySlashOff(true);
httpServer.getServerConfiguration().addHttpHandler(filesHttpHandler, "/" + args.getPath() + "/*");
httpServer.getServerConfiguration().addHttpHandler(new HttpHandler() {
@Override
public void service(Request request, Response response) throws Exception {
// TODO replace null with request.getParameterMap()
String responseText = Resource.runGet(null, request);
response.setContentType(MediaType.TEXT_HTML);
response.setContentLength(responseText.length());
response.getWriter().write(responseText);
}
}, "/" + args.getPath() + "/");
if (args.getLog() != null) {
Path accessDir = Paths.get(args.getLog() + "/access");
if (!Files.exists(accessDir)) {
Files.createDirectory(accessDir);
}
final AccessLogBuilder builder = new AccessLogBuilder(accessDir + "/edammap-access.log");
builder.rotatedDaily();
// builder.format(ApacheLogFormat.COMBINED); // TODO
builder.instrument(httpServer.getServerConfiguration());
}
logger.info("Starting server");
httpServer.start();
logger.info("{} has started", version.getName());
}
use of org.edamontology.edammap.core.processing.Processor in project edammap by edamontology.
the class Util method makeIdf.
private static void makeIdf(String queryPath, String database, String idfPath, UtilArgs args, boolean stemming) throws IOException, ParseException {
logger.info("Make query IDF from file {} of type {} to {}{}", queryPath, args.makeIdfType, idfPath, database != null ? " using database " + database : "");
ProcessorArgs processorArgs = new ProcessorArgs();
processorArgs.setFetching(false);
processorArgs.setDb(database);
processorArgs.setIdf(null);
processorArgs.setIdfStemmed(null);
Processor processor = new Processor(processorArgs);
int idfs = processor.makeQueryIdf(QueryLoader.get(queryPath, args.makeIdfType, args.fetcherArgs.getTimeout(), args.fetcherArgs.getPrivateArgs().getUserAgent()), args.makeIdfType, idfPath, args.makeIdfWebpagesDocs, args.makeIdfFulltext, new PreProcessor(stemming), null, args.fetcherArgs);
logger.info("Wrote {} IDFs to {}", idfs, idfPath);
}
Aggregations