use of org.edamontology.edammap.core.idf.IdfMake in project edammap by edamontology.
the class Processor method getProcessedConcepts.
public Map<EdamUri, ConceptProcessed> getProcessedConcepts(Map<EdamUri, Concept> concepts, IdfArgs idfArgs, MultiplierArgs multiplierArgs, PreProcessor preProcessor) {
Map<EdamUri, ConceptProcessed> processedConcepts = new LinkedHashMap<>();
IdfMake idfMake = new IdfMake();
for (Map.Entry<EdamUri, Concept> concept : concepts.entrySet()) {
processedConcepts.put(concept.getKey(), processConcept(concept.getValue(), idfMake, preProcessor));
}
Idf idf = new Idf(idfMake.getIdf());
for (ConceptProcessed processedConcept : processedConcepts.values()) {
processConceptIdf(processedConcept, idf);
}
for (ConceptProcessed processedConcept : processedConcepts.values()) {
anonymizeProcessedConcept(processedConcept, idfArgs, multiplierArgs);
}
return processedConcepts;
}
use of org.edamontology.edammap.core.idf.IdfMake in project edammap by edamontology.
the class Processor method makeQueryIdf.
public int makeQueryIdf(List<Query> queries, QueryType type, String outputPath, boolean webpagesDocs, boolean fulltext, PreProcessor preProcessor, Idf queryIdf, FetcherArgs fetcherArgs) throws IOException {
IdfMake idfMake = new IdfMake(outputPath);
for (Query query : queries) {
QueryProcessed processedQuery = getProcessedQuery(query, type, preProcessor, queryIdf, fetcherArgs);
if (processedQuery.getNameTokens() != null) {
idfMake.addTerms(processedQuery.getNameTokens());
}
for (List<String> keywordTokens : processedQuery.getKeywordsTokens()) {
if (keywordTokens != null)
idfMake.addTerms(keywordTokens);
}
if (processedQuery.getDescriptionTokens() != null) {
idfMake.addTerms(processedQuery.getDescriptionTokens());
}
if (webpagesDocs) {
for (List<String> webpageTokens : processedQuery.getWebpagesTokens()) {
if (webpageTokens != null)
idfMake.addTerms(webpageTokens);
}
for (List<String> docTokens : processedQuery.getDocsTokens()) {
if (docTokens != null)
idfMake.addTerms(docTokens);
}
}
for (PublicationProcessed processedPublication : processedQuery.getProcessedPublications()) {
if (processedPublication == null)
continue;
if (processedPublication.getTitleTokens() != null) {
idfMake.addTerms(processedPublication.getTitleTokens());
}
for (List<String> keywordTokens : processedPublication.getKeywordsTokens()) {
if (keywordTokens != null)
idfMake.addTerms(keywordTokens);
}
for (List<String> meshTermTokens : processedPublication.getMeshTermsTokens()) {
if (meshTermTokens != null)
idfMake.addTerms(meshTermTokens);
}
for (List<String> efoTermTokens : processedPublication.getEfoTermsTokens()) {
if (efoTermTokens != null)
idfMake.addTerms(efoTermTokens);
}
for (List<String> goTermTokens : processedPublication.getGoTermsTokens()) {
if (goTermTokens != null)
idfMake.addTerms(goTermTokens);
}
if (processedPublication.getAbstractTokens() != null) {
idfMake.addTerms(processedPublication.getAbstractTokens());
}
if (fulltext) {
if (processedPublication.getFulltextTokens() != null) {
idfMake.addTerms(processedPublication.getFulltextTokens());
}
}
}
idfMake.endDocument();
}
return idfMake.writeOutput();
}
Aggregations