Search in sources :

Example 6 with Ensembl2NcbiValueObject

use of ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject in project Gemma by PavlidisLab.

the class BioMartEnsemblNcbiObjectGeneratorTest method testGenerate.

/**
 * Tests that given a taxon biomart file a BioMartEnsembleNcbi can be returned and that the genes are correctly
 * mapped. Could be done through parser but thought do a quick test here. AWk commands are given to help give the
 * counts to check validity of the numbers
 */
@Test
public void testGenerate() {
    try {
        biomartEnsemblNcbiObjectGenerator.setBioMartFileName(taxonBiomartFile);
        Map<String, Ensembl2NcbiValueObject> map = biomartEnsemblNcbiObjectGenerator.generate(taxa);
        long counterEnsemblToManyGeneids = 0;
        long counterEnsemblToOneGeneids = 0;
        long counterNumberGenes = 0;
        long countHowManyNoGenes = 0;
        // awk -F'\t' 'length($3)>1' test.txt | awk -F'\t' '{print $4}' | uniq |sort | wc -l -1
        // there are 510 records which have one or more gene mapping
        assertEquals(510, map.keySet().size());
        for (Ensembl2NcbiValueObject biomart : map.values()) {
            // count how many have duplicate genes
            if (biomart.getEntrezgenes().size() > 1) {
                counterEnsemblToManyGeneids++;
            } else // count how many 1
            if (biomart.getEntrezgenes().size() == 1) {
                counterEnsemblToOneGeneids++;
            } else // how many 0- should be null
            {
                countHowManyNoGenes++;
            }
            // count how many genes in total
            for (String geneE : biomart.getEntrezgenes()) {
                if (!geneE.isEmpty()) {
                    counterNumberGenes++;
                }
            }
        }
        // awk -F'\t' 'length($3)>1' test.txt | awk -F"\t" '{if (a[$4]) { print $4 } a[$4] = $0}' | sort | uniq | wc
        // -l
        assertEquals(75, counterEnsemblToManyGeneids);
        // awk -F'\t' 'length($3)>1' test.txt | awk -F"\t" '{if (a[$4]==null) { print $4 } a[$4] = $0}' | sort |
        // uniq | wc -l -75
        assertEquals(435, counterEnsemblToOneGeneids);
        // there should be none with no genes as they are filtered out
        assertEquals(0, countHowManyNoGenes);
        // test the file awk -F'\t' 'length($3)>1' test.txt | awk -F'\t' '{print $3}' | uniq |sort | wc -l
        assertEquals(638, counterNumberGenes);
    } catch (Exception e) {
        e.printStackTrace();
        fail();
    }
}
Also used : Ensembl2NcbiValueObject(ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject) Test(org.junit.Test)

Example 7 with Ensembl2NcbiValueObject

use of ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject in project Gemma by PavlidisLab.

the class BioMartEnsemblNcbiParserTest method testParseValidFileMouse.

/*
     * Tests that a biomart mouse file can be parsed
     */
@Test
public void testParseValidFileMouse() {
    String[] attributesToGet = new String[] { "ensembl_gene_id", "ensembl_transcript_id", "entrezgene", "ensembl_peptide_id", "" };
    String fileNameStringmouse = "/data/loader/protein/biomart/biomartmmusculus.txt";
    URL myurl = this.getClass().getResource(fileNameStringmouse);
    try {
        parser.setBioMartFields(attributesToGet);
        parser.parse(new File(myurl.getFile()));
        Collection<Ensembl2NcbiValueObject> items = parser.getResults();
        boolean isItemThereOne = false;
        boolean isItemThereTwo = false;
        // 27 unique peptide ids but only 20 which have entrez genes other get filtered out
        assertEquals(20, items.size());
        for (Ensembl2NcbiValueObject item : items) {
            if (item.getEnsemblGeneId().equals("ENSMUSG00000064341")) {
                assertEquals(2, (item.getEntrezgenes().size()));
                isItemThereOne = true;
            }
            if (item.getEnsemblGeneId().equals("ENSMUSG00000057782")) {
                assertEquals(item.getEntrezgenes().size(), 1);
                isItemThereTwo = true;
            }
        }
        assertTrue(isItemThereTwo);
        assertTrue(isItemThereOne);
    } catch (Exception e) {
        e.printStackTrace();
        fail();
    }
}
Also used : Ensembl2NcbiValueObject(ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject) File(java.io.File) URL(java.net.URL) Test(org.junit.Test)

Example 8 with Ensembl2NcbiValueObject

use of ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject in project Gemma by PavlidisLab.

the class BioMartEnsemblNcbiParserTest method testParseOneValidLineHuman.

/*
     * Test method for {@link ubic.gemma.core.loader.protein.string.BiomartEnsembleNcbiParser#parseOneLine(java.lang.String)}
     * . Tests that a standard human taxon line can be parsed
     */
@Test
public void testParseOneValidLineHuman() {
    String[] attributesToGet = new String[] { "ensembl_gene_id", "ensembl_transcript_id", "entrezgene", "ensembl_peptide_id", "hgnc_id" };
    parser.setBioMartFields(attributesToGet);
    String line = "ENSG00000220023" + "\t" + "ENST00000418749" + "\t" + "10013421" + "\t" + "ENST00000418749" + "\t" + "12123";
    Ensembl2NcbiValueObject bioMartEnsembleNcbi = parser.parseOneLine(line);
    assertTrue(bioMartEnsembleNcbi.getNcbiTaxonId().equals(10));
    assertEquals("ENSG00000220023", bioMartEnsembleNcbi.getEnsemblGeneId());
    assertEquals("ENST00000418749", bioMartEnsembleNcbi.getEnsemblTranscriptId());
    Collection<String> genes = bioMartEnsembleNcbi.getEntrezgenes();
    assertTrue(genes.contains("10013421"));
    assertEquals("ENST00000418749", bioMartEnsembleNcbi.getEnsemblPeptideId());
    assertEquals("12123", bioMartEnsembleNcbi.getHgnc_id());
}
Also used : Ensembl2NcbiValueObject(ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject) Test(org.junit.Test)

Example 9 with Ensembl2NcbiValueObject

use of ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject in project Gemma by PavlidisLab.

the class BioMartEnsemblNcbiParserTest method testParseOneValidLineNonHuman.

/*
     * Test method for {@link ubic.gemma.core.loader.protein.string.BiomartEnsembleNcbiParser#parseOneLine(java.lang.String)}
     * . Tests that a standard taxon line can be parsed
     */
@Test
public void testParseOneValidLineNonHuman() {
    String line = "ENSG00000220023" + "\t" + "ENST00000418749" + "\t" + "100134091" + "\t" + "ENST00000418749";
    Ensembl2NcbiValueObject bioMartEnsembleNcbi = parser.parseOneLine(line);
    assertTrue(bioMartEnsembleNcbi.getNcbiTaxonId().equals(10));
    assertEquals("ENSG00000220023", bioMartEnsembleNcbi.getEnsemblGeneId());
    assertEquals("ENST00000418749", bioMartEnsembleNcbi.getEnsemblTranscriptId());
    Collection<String> genes = bioMartEnsembleNcbi.getEntrezgenes();
    assertTrue(genes.contains("100134091"));
    assertEquals("ENST00000418749", bioMartEnsembleNcbi.getEnsemblPeptideId());
}
Also used : Ensembl2NcbiValueObject(ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject) Test(org.junit.Test)

Example 10 with Ensembl2NcbiValueObject

use of ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject in project Gemma by PavlidisLab.

the class BioMartEnsemblNcbiParserTest method testParseValidFileHuman.

/*
     * Test method for {@link ubic.gemma.core.loader.protein.string.BiomartEnsembleNcbiParser#parseOneLine(java.lang.String)}
     * . Tests that a standard human taxon line can be parsed
     */
@Test
public void testParseValidFileHuman() {
    String[] attributesToGet = new String[] { "ensembl_gene_id", "ensembl_transcript_id", "entrezgene", "ensembl_peptide_id", "hgnc_id" };
    String fileName = "/data/loader/protein/biomart/biomartsapiens.txt";
    URL myurl = this.getClass().getResource(fileName);
    try {
        parser.setBioMartFields(attributesToGet);
        parser.parse(new File(myurl.getFile()));
        Collection<Ensembl2NcbiValueObject> items = parser.getResults();
        // 39 unique proteins and 36 unique genes
        assertEquals(10, items.size());
        for (Ensembl2NcbiValueObject item : items) {
            if (item.getEnsemblGeneId().equals("ENSG00000215764")) {
                assertEquals(1, item.getEntrezgenes().size());
                assertEquals("6330", item.getHgnc_id());
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail();
    }
}
Also used : Ensembl2NcbiValueObject(ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject) File(java.io.File) URL(java.net.URL) Test(org.junit.Test)

Aggregations

Ensembl2NcbiValueObject (ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject)10 Test (org.junit.Test)5 File (java.io.File)4 URL (java.net.URL)3 Taxon (ubic.gemma.model.genome.Taxon)3 StringProteinProteinInteraction (ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction)2 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 HashMap (java.util.HashMap)1 Before (org.junit.Before)1 BiomartEnsemblNcbiObjectGenerator (ubic.gemma.core.loader.protein.biomart.BiomartEnsemblNcbiObjectGenerator)1 StringProteinProteinInteractionObjectGenerator (ubic.gemma.core.loader.protein.string.StringProteinProteinInteractionObjectGenerator)1 FileFormatException (ubic.gemma.core.loader.util.parser.FileFormatException)1 Gene (ubic.gemma.model.genome.Gene)1