Search in sources :

Example 76 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class CoexpressionServiceImpl method updateNodeDegrees.

@Override
public void updateNodeDegrees(Taxon t) {
    CoexpressionServiceImpl.log.info("Updating node degree for all genes from " + t);
    // map of support to gene to number of links, in order of support.
    TreeMap<Integer, Map<Long, Integer>> forRanksPos = new TreeMap<>();
    TreeMap<Integer, Map<Long, Integer>> forRanksNeg = new TreeMap<>();
    int count = 0;
    for (Gene g : this.geneDao.loadKnownGenes(t)) {
        this.updateNodeDegree(g, forRanksPos, forRanksNeg);
        if (++count % 1000 == 0) {
            CoexpressionServiceImpl.log.info("Updated node degree for " + count + " genes; last was " + g + " ...");
        }
    }
    CoexpressionServiceImpl.log.info("Updated node degree for " + count + " genes");
    /*
         * Update the ranks. Each entry in the resulting map (key = gene id) is a list of the ranks at each support
         * threshold. So it means the rank "at or above" that level of support.
         */
    Map<Long, List<Double>> relRanksPerGenePos = this.computeRelativeRanks(forRanksPos);
    Map<Long, List<Double>> relRanksPerGeneNeg = this.computeRelativeRanks(forRanksNeg);
    this.updateRelativeNodeDegrees(relRanksPerGenePos, relRanksPerGeneNeg);
}
Also used : Gene(ubic.gemma.model.genome.Gene) DoubleArrayList(cern.colt.list.DoubleArrayList)

Example 77 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class Gene2GOAssociationDaoImpl method fetchBatch.

private Map<? extends Gene, ? extends Collection<VocabCharacteristic>> fetchBatch(Set<Gene> batch) {
    Map<Long, Gene> giMap = EntityUtils.getIdMap(batch);
    // language=HQL
    final String queryString = "select g.id, geneAss.ontologyEntry from Gene2GOAssociationImpl as geneAss join geneAss.gene g where g.id in (:genes)";
    Map<Gene, Collection<VocabCharacteristic>> results = new HashMap<>();
    Query query = this.getHibernateTemplate().getSessionFactory().getCurrentSession().createQuery(queryString);
    query.setFetchSize(batch.size());
    query.setParameterList("genes", giMap.keySet());
    List<?> o = query.list();
    for (Object object : o) {
        Object[] oa = (Object[]) object;
        Long g = (Long) oa[0];
        VocabCharacteristic vc = (VocabCharacteristic) oa[1];
        Gene gene = giMap.get(g);
        assert gene != null;
        if (!results.containsKey(gene)) {
            results.put(gene, new HashSet<VocabCharacteristic>());
        }
        results.get(gene).add(vc);
    }
    return results;
}
Also used : Query(org.hibernate.Query) VocabCharacteristic(ubic.gemma.model.common.description.VocabCharacteristic) Gene(ubic.gemma.model.genome.Gene)

Example 78 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class Gene2GOAssociationDaoImpl method findByGenes.

@Override
public Map<Gene, Collection<VocabCharacteristic>> findByGenes(Collection<Gene> needToFind) {
    Map<Gene, Collection<VocabCharacteristic>> result = new HashMap<>();
    StopWatch timer = new StopWatch();
    timer.start();
    int batchSize = 200;
    Set<Gene> batch = new HashSet<>();
    int i = 0;
    for (Gene gene : needToFind) {
        batch.add(gene);
        if (batch.size() == batchSize) {
            result.putAll(this.fetchBatch(batch));
            batch.clear();
        }
        if (++i % 1000 == 0) {
            AbstractDao.log.info("Fetched GO associations for " + i + "/" + needToFind.size() + " genes");
        }
    }
    if (!batch.isEmpty())
        result.putAll(this.fetchBatch(batch));
    if (timer.getTime() > 1000) {
        AbstractDao.log.info("Fetched GO annotations for " + needToFind.size() + " genes in " + timer.getTime() + "ms");
    }
    return result;
}
Also used : Gene(ubic.gemma.model.genome.Gene) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 79 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class EvidenceImporterCLI method populateCommonFields.

/**
 * File to valueObject conversion, populate the basics
 */
private void populateCommonFields(EvidenceValueObject evidence, String[] tokens) throws IOException {
    boolean isNegativeEvidence = false;
    String primaryReferencePubmeds = tokens[this.mapColumns.get("PrimaryPubMeds")].trim();
    if (!primaryReferencePubmeds.equalsIgnoreCase("")) {
        String[] tokensPrimary = primaryReferencePubmeds.split(";");
        for (String primary : tokensPrimary) {
            evidence.getPhenotypeAssPubVO().add(PhenotypeAssPubValueObject.createPrimaryPublication(primary.trim()));
        }
    }
    String geneSymbol = tokens[this.mapColumns.get("GeneSymbol")].trim();
    String geneNcbiId = "";
    if (this.mapColumns.get("GeneId") != null) {
        geneNcbiId = tokens[this.mapColumns.get("GeneId")].trim();
    }
    String evidenceCode = tokens[this.mapColumns.get("EvidenceCode")].trim();
    this.checkEvidenceCodeExits(evidenceCode);
    String description = tokens[this.mapColumns.get("Comments")].trim();
    if (!StringUtil.containsValidCharacter(description)) {
        this.writeError(description + " Ivalid character found (if character is ok add it to StringUtil.containsValidCharacter)");
    }
    if (this.mapColumns.get("IsNegative") != null && this.mapColumns.get("IsNegative") < tokens.length && tokens[this.mapColumns.get("IsNegative")].trim().equals("1")) {
        isNegativeEvidence = true;
    }
    String externalDatabaseName = tokens[this.mapColumns.get("ExternalDatabase")].trim();
    String databaseID = tokens[this.mapColumns.get("DatabaseLink")].trim();
    String originalPhenotype = tokens[this.mapColumns.get("OriginalPhenotype")].trim();
    System.out.println("original phenotype is: " + originalPhenotype);
    String phenotypeMapping = tokens[this.mapColumns.get("PhenotypeMapping")].trim();
    this.verifyMappingType(phenotypeMapping);
    Set<String> phenotypeFromArray = this.trimArray(tokens[this.mapColumns.get("Phenotypes")].split(";"));
    Gene g = this.verifyGeneIdExist(geneNcbiId, geneSymbol);
    SortedSet<CharacteristicValueObject> phenotypes = this.toValuesUri(phenotypeFromArray);
    evidence.setDescription(description);
    evidence.setEvidenceCode(evidenceCode);
    evidence.setEvidenceSource(this.makeEvidenceSource(databaseID, externalDatabaseName));
    evidence.setGeneNCBI(new Integer(geneNcbiId));
    evidence.setPhenotypes(phenotypes);
    evidence.setIsNegativeEvidence(isNegativeEvidence);
    evidence.setOriginalPhenotype(originalPhenotype);
    evidence.setPhenotypeMapping(phenotypeMapping);
    evidence.setRelationship("gene-disease association");
    if (externalDatabaseName.equalsIgnoreCase("CTD")) {
        if (description.contains("marker/mechanism"))
            evidence.setRelationship("biomarker");
        if (description.contains("therapeutic"))
            evidence.setRelationship("therapeutic target");
    }
    if (this.mapColumns.get("Score") != null && this.mapColumns.get("ScoreType") != null && this.mapColumns.get("Strength") != null) {
        try {
            String score = tokens[this.mapColumns.get("Score")].trim();
            String scoreName = tokens[this.mapColumns.get("ScoreType")].trim();
            String strength = tokens[this.mapColumns.get("Strength")].trim();
            // score
            evidence.getScoreValueObject().setScoreValue(score);
            evidence.getScoreValueObject().setScoreName(scoreName);
            evidence.getScoreValueObject().setStrength(new Double(strength));
        } catch (ArrayIndexOutOfBoundsException e) {
        // no score set for this evidence, blank space
        }
    } else if (!externalDatabaseName.equalsIgnoreCase("")) {
        this.setScoreDependingOnExternalSource(externalDatabaseName, evidence, g.getTaxon().getCommonName());
    }
}
Also used : Gene(ubic.gemma.model.genome.Gene)

Example 80 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class OmimDatabaseImporter method processOmimFiles.

// process all OMIM files to get the data out and manipulates it
private void processOmimFiles(String morbidmap, String mim2gene, Map<Long, Collection<Long>> omimIdToPubmeds) throws Exception {
    // mapping find using mim2gene file, Omim id ---> Gene NCBI
    Map<String, String> omimIdToGeneNCBI = this.parseFileOmimIdToGeneNCBI(mim2gene);
    String line;
    try (BufferedReader br = new BufferedReader(new FileReader(morbidmap))) {
        // parse the morbid OMIM file
        while ((line = br.readLine()) != null) {
            String[] tokens = line.split("\\|");
            int pos = tokens[0].lastIndexOf(",");
            String pubmedIds = "";
            // if there is a database link
            if (pos != -1) {
                // OMIM description find in file, the annotator use description
                String description = tokens[0].substring(0, pos).trim();
                // evidence code we will use
                String evidenceCode = "TAS";
                // the OMIM id, (also is the database link)
                String omimPhenotypeId = tokens[0].substring(pos + 1, tokens[0].length()).trim().split(" ")[0];
                String omimId = "OMIM:" + omimPhenotypeId;
                // OMOM gene id
                String omimGeneId = tokens[2].trim();
                // omimGeneid ---> ncbi id
                String ncbiGeneId = omimIdToGeneNCBI.get(omimGeneId);
                // is the omimGeneId found in the other file
                if (ncbiGeneId != null) {
                    // if there is no omim id given we cannot do anything with this line (happens often)
                    if (this.notInteger(omimPhenotypeId) || Integer.parseInt(omimPhenotypeId) < 100) {
                        continue;
                    }
                    Gene gene = this.geneService.findByNCBIId(new Integer(ncbiGeneId));
                    if (gene != null) {
                        Collection<Long> commonsPubmeds = this.findCommonPubmed(new Long(omimGeneId), new Long(omimPhenotypeId), omimIdToPubmeds);
                        if (!commonsPubmeds.isEmpty()) {
                            pubmedIds = StringUtils.join(commonsPubmeds, ";");
                        }
                        this.findMapping(omimId, gene, pubmedIds, evidenceCode, description, description, OmimDatabaseImporter.OMIM, omimPhenotypeId);
                    }
                }
            }
        }
        br.close();
        this.writeBuffersAndCloseFiles();
    }
    // special thing to do with OMIM, for the same ncbiGeneId + omimPhenotypeId, combine the phenotype
    this.combinePhenotypes();
}
Also used : Gene(ubic.gemma.model.genome.Gene)

Aggregations

Gene (ubic.gemma.model.genome.Gene)186 Taxon (ubic.gemma.model.genome.Taxon)34 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)32 StopWatch (org.apache.commons.lang3.time.StopWatch)31 Test (org.junit.Test)24 HashSet (java.util.HashSet)23 GeneProduct (ubic.gemma.model.genome.gene.GeneProduct)20 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)18 Element (org.w3c.dom.Element)16 ArrayList (java.util.ArrayList)13 Transactional (org.springframework.transaction.annotation.Transactional)12 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)12 Collection (java.util.Collection)11 OntologyTerm (ubic.basecode.ontology.model.OntologyTerm)11 CharacteristicValueObject (ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject)10 HashMap (java.util.HashMap)8 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)8 BioSequence2GeneProduct (ubic.gemma.model.association.BioSequence2GeneProduct)7 PhysicalLocation (ubic.gemma.model.genome.PhysicalLocation)7 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)7