use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class CoexpressionServiceImpl method updateNodeDegrees.
@Override
public void updateNodeDegrees(Taxon t) {
CoexpressionServiceImpl.log.info("Updating node degree for all genes from " + t);
// map of support to gene to number of links, in order of support.
TreeMap<Integer, Map<Long, Integer>> forRanksPos = new TreeMap<>();
TreeMap<Integer, Map<Long, Integer>> forRanksNeg = new TreeMap<>();
int count = 0;
for (Gene g : this.geneDao.loadKnownGenes(t)) {
this.updateNodeDegree(g, forRanksPos, forRanksNeg);
if (++count % 1000 == 0) {
CoexpressionServiceImpl.log.info("Updated node degree for " + count + " genes; last was " + g + " ...");
}
}
CoexpressionServiceImpl.log.info("Updated node degree for " + count + " genes");
/*
* Update the ranks. Each entry in the resulting map (key = gene id) is a list of the ranks at each support
* threshold. So it means the rank "at or above" that level of support.
*/
Map<Long, List<Double>> relRanksPerGenePos = this.computeRelativeRanks(forRanksPos);
Map<Long, List<Double>> relRanksPerGeneNeg = this.computeRelativeRanks(forRanksNeg);
this.updateRelativeNodeDegrees(relRanksPerGenePos, relRanksPerGeneNeg);
}
use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class Gene2GOAssociationDaoImpl method fetchBatch.
private Map<? extends Gene, ? extends Collection<VocabCharacteristic>> fetchBatch(Set<Gene> batch) {
Map<Long, Gene> giMap = EntityUtils.getIdMap(batch);
// language=HQL
final String queryString = "select g.id, geneAss.ontologyEntry from Gene2GOAssociationImpl as geneAss join geneAss.gene g where g.id in (:genes)";
Map<Gene, Collection<VocabCharacteristic>> results = new HashMap<>();
Query query = this.getHibernateTemplate().getSessionFactory().getCurrentSession().createQuery(queryString);
query.setFetchSize(batch.size());
query.setParameterList("genes", giMap.keySet());
List<?> o = query.list();
for (Object object : o) {
Object[] oa = (Object[]) object;
Long g = (Long) oa[0];
VocabCharacteristic vc = (VocabCharacteristic) oa[1];
Gene gene = giMap.get(g);
assert gene != null;
if (!results.containsKey(gene)) {
results.put(gene, new HashSet<VocabCharacteristic>());
}
results.get(gene).add(vc);
}
return results;
}
use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class Gene2GOAssociationDaoImpl method findByGenes.
@Override
public Map<Gene, Collection<VocabCharacteristic>> findByGenes(Collection<Gene> needToFind) {
Map<Gene, Collection<VocabCharacteristic>> result = new HashMap<>();
StopWatch timer = new StopWatch();
timer.start();
int batchSize = 200;
Set<Gene> batch = new HashSet<>();
int i = 0;
for (Gene gene : needToFind) {
batch.add(gene);
if (batch.size() == batchSize) {
result.putAll(this.fetchBatch(batch));
batch.clear();
}
if (++i % 1000 == 0) {
AbstractDao.log.info("Fetched GO associations for " + i + "/" + needToFind.size() + " genes");
}
}
if (!batch.isEmpty())
result.putAll(this.fetchBatch(batch));
if (timer.getTime() > 1000) {
AbstractDao.log.info("Fetched GO annotations for " + needToFind.size() + " genes in " + timer.getTime() + "ms");
}
return result;
}
use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class EvidenceImporterCLI method populateCommonFields.
/**
* File to valueObject conversion, populate the basics
*/
private void populateCommonFields(EvidenceValueObject evidence, String[] tokens) throws IOException {
boolean isNegativeEvidence = false;
String primaryReferencePubmeds = tokens[this.mapColumns.get("PrimaryPubMeds")].trim();
if (!primaryReferencePubmeds.equalsIgnoreCase("")) {
String[] tokensPrimary = primaryReferencePubmeds.split(";");
for (String primary : tokensPrimary) {
evidence.getPhenotypeAssPubVO().add(PhenotypeAssPubValueObject.createPrimaryPublication(primary.trim()));
}
}
String geneSymbol = tokens[this.mapColumns.get("GeneSymbol")].trim();
String geneNcbiId = "";
if (this.mapColumns.get("GeneId") != null) {
geneNcbiId = tokens[this.mapColumns.get("GeneId")].trim();
}
String evidenceCode = tokens[this.mapColumns.get("EvidenceCode")].trim();
this.checkEvidenceCodeExits(evidenceCode);
String description = tokens[this.mapColumns.get("Comments")].trim();
if (!StringUtil.containsValidCharacter(description)) {
this.writeError(description + " Ivalid character found (if character is ok add it to StringUtil.containsValidCharacter)");
}
if (this.mapColumns.get("IsNegative") != null && this.mapColumns.get("IsNegative") < tokens.length && tokens[this.mapColumns.get("IsNegative")].trim().equals("1")) {
isNegativeEvidence = true;
}
String externalDatabaseName = tokens[this.mapColumns.get("ExternalDatabase")].trim();
String databaseID = tokens[this.mapColumns.get("DatabaseLink")].trim();
String originalPhenotype = tokens[this.mapColumns.get("OriginalPhenotype")].trim();
System.out.println("original phenotype is: " + originalPhenotype);
String phenotypeMapping = tokens[this.mapColumns.get("PhenotypeMapping")].trim();
this.verifyMappingType(phenotypeMapping);
Set<String> phenotypeFromArray = this.trimArray(tokens[this.mapColumns.get("Phenotypes")].split(";"));
Gene g = this.verifyGeneIdExist(geneNcbiId, geneSymbol);
SortedSet<CharacteristicValueObject> phenotypes = this.toValuesUri(phenotypeFromArray);
evidence.setDescription(description);
evidence.setEvidenceCode(evidenceCode);
evidence.setEvidenceSource(this.makeEvidenceSource(databaseID, externalDatabaseName));
evidence.setGeneNCBI(new Integer(geneNcbiId));
evidence.setPhenotypes(phenotypes);
evidence.setIsNegativeEvidence(isNegativeEvidence);
evidence.setOriginalPhenotype(originalPhenotype);
evidence.setPhenotypeMapping(phenotypeMapping);
evidence.setRelationship("gene-disease association");
if (externalDatabaseName.equalsIgnoreCase("CTD")) {
if (description.contains("marker/mechanism"))
evidence.setRelationship("biomarker");
if (description.contains("therapeutic"))
evidence.setRelationship("therapeutic target");
}
if (this.mapColumns.get("Score") != null && this.mapColumns.get("ScoreType") != null && this.mapColumns.get("Strength") != null) {
try {
String score = tokens[this.mapColumns.get("Score")].trim();
String scoreName = tokens[this.mapColumns.get("ScoreType")].trim();
String strength = tokens[this.mapColumns.get("Strength")].trim();
// score
evidence.getScoreValueObject().setScoreValue(score);
evidence.getScoreValueObject().setScoreName(scoreName);
evidence.getScoreValueObject().setStrength(new Double(strength));
} catch (ArrayIndexOutOfBoundsException e) {
// no score set for this evidence, blank space
}
} else if (!externalDatabaseName.equalsIgnoreCase("")) {
this.setScoreDependingOnExternalSource(externalDatabaseName, evidence, g.getTaxon().getCommonName());
}
}
use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class OmimDatabaseImporter method processOmimFiles.
// process all OMIM files to get the data out and manipulates it
private void processOmimFiles(String morbidmap, String mim2gene, Map<Long, Collection<Long>> omimIdToPubmeds) throws Exception {
// mapping find using mim2gene file, Omim id ---> Gene NCBI
Map<String, String> omimIdToGeneNCBI = this.parseFileOmimIdToGeneNCBI(mim2gene);
String line;
try (BufferedReader br = new BufferedReader(new FileReader(morbidmap))) {
// parse the morbid OMIM file
while ((line = br.readLine()) != null) {
String[] tokens = line.split("\\|");
int pos = tokens[0].lastIndexOf(",");
String pubmedIds = "";
// if there is a database link
if (pos != -1) {
// OMIM description find in file, the annotator use description
String description = tokens[0].substring(0, pos).trim();
// evidence code we will use
String evidenceCode = "TAS";
// the OMIM id, (also is the database link)
String omimPhenotypeId = tokens[0].substring(pos + 1, tokens[0].length()).trim().split(" ")[0];
String omimId = "OMIM:" + omimPhenotypeId;
// OMOM gene id
String omimGeneId = tokens[2].trim();
// omimGeneid ---> ncbi id
String ncbiGeneId = omimIdToGeneNCBI.get(omimGeneId);
// is the omimGeneId found in the other file
if (ncbiGeneId != null) {
// if there is no omim id given we cannot do anything with this line (happens often)
if (this.notInteger(omimPhenotypeId) || Integer.parseInt(omimPhenotypeId) < 100) {
continue;
}
Gene gene = this.geneService.findByNCBIId(new Integer(ncbiGeneId));
if (gene != null) {
Collection<Long> commonsPubmeds = this.findCommonPubmed(new Long(omimGeneId), new Long(omimPhenotypeId), omimIdToPubmeds);
if (!commonsPubmeds.isEmpty()) {
pubmedIds = StringUtils.join(commonsPubmeds, ";");
}
this.findMapping(omimId, gene, pubmedIds, evidenceCode, description, description, OmimDatabaseImporter.OMIM, omimPhenotypeId);
}
}
}
}
br.close();
this.writeBuffersAndCloseFiles();
}
// special thing to do with OMIM, for the same ncbiGeneId + omimPhenotypeId, combine the phenotype
this.combinePhenotypes();
}
Aggregations