use of ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject in project Gemma by PavlidisLab.
the class BioMartEnsemblNcbiObjectGeneratorTest method testGenerate.
/**
* Tests that given a taxon biomart file a BioMartEnsembleNcbi can be returned and that the genes are correctly
* mapped. Could be done through parser but thought do a quick test here. AWk commands are given to help give the
* counts to check validity of the numbers
*/
@Test
public void testGenerate() {
try {
biomartEnsemblNcbiObjectGenerator.setBioMartFileName(taxonBiomartFile);
Map<String, Ensembl2NcbiValueObject> map = biomartEnsemblNcbiObjectGenerator.generate(taxa);
long counterEnsemblToManyGeneids = 0;
long counterEnsemblToOneGeneids = 0;
long counterNumberGenes = 0;
long countHowManyNoGenes = 0;
// awk -F'\t' 'length($3)>1' test.txt | awk -F'\t' '{print $4}' | uniq |sort | wc -l -1
// there are 510 records which have one or more gene mapping
assertEquals(510, map.keySet().size());
for (Ensembl2NcbiValueObject biomart : map.values()) {
// count how many have duplicate genes
if (biomart.getEntrezgenes().size() > 1) {
counterEnsemblToManyGeneids++;
} else // count how many 1
if (biomart.getEntrezgenes().size() == 1) {
counterEnsemblToOneGeneids++;
} else // how many 0- should be null
{
countHowManyNoGenes++;
}
// count how many genes in total
for (String geneE : biomart.getEntrezgenes()) {
if (!geneE.isEmpty()) {
counterNumberGenes++;
}
}
}
// awk -F'\t' 'length($3)>1' test.txt | awk -F"\t" '{if (a[$4]) { print $4 } a[$4] = $0}' | sort | uniq | wc
// -l
assertEquals(75, counterEnsemblToManyGeneids);
// awk -F'\t' 'length($3)>1' test.txt | awk -F"\t" '{if (a[$4]==null) { print $4 } a[$4] = $0}' | sort |
// uniq | wc -l -75
assertEquals(435, counterEnsemblToOneGeneids);
// there should be none with no genes as they are filtered out
assertEquals(0, countHowManyNoGenes);
// test the file awk -F'\t' 'length($3)>1' test.txt | awk -F'\t' '{print $3}' | uniq |sort | wc -l
assertEquals(638, counterNumberGenes);
} catch (Exception e) {
e.printStackTrace();
fail();
}
}
use of ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject in project Gemma by PavlidisLab.
the class BioMartEnsemblNcbiParserTest method testParseValidFileMouse.
/*
* Tests that a biomart mouse file can be parsed
*/
@Test
public void testParseValidFileMouse() {
String[] attributesToGet = new String[] { "ensembl_gene_id", "ensembl_transcript_id", "entrezgene", "ensembl_peptide_id", "" };
String fileNameStringmouse = "/data/loader/protein/biomart/biomartmmusculus.txt";
URL myurl = this.getClass().getResource(fileNameStringmouse);
try {
parser.setBioMartFields(attributesToGet);
parser.parse(new File(myurl.getFile()));
Collection<Ensembl2NcbiValueObject> items = parser.getResults();
boolean isItemThereOne = false;
boolean isItemThereTwo = false;
// 27 unique peptide ids but only 20 which have entrez genes other get filtered out
assertEquals(20, items.size());
for (Ensembl2NcbiValueObject item : items) {
if (item.getEnsemblGeneId().equals("ENSMUSG00000064341")) {
assertEquals(2, (item.getEntrezgenes().size()));
isItemThereOne = true;
}
if (item.getEnsemblGeneId().equals("ENSMUSG00000057782")) {
assertEquals(item.getEntrezgenes().size(), 1);
isItemThereTwo = true;
}
}
assertTrue(isItemThereTwo);
assertTrue(isItemThereOne);
} catch (Exception e) {
e.printStackTrace();
fail();
}
}
use of ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject in project Gemma by PavlidisLab.
the class BioMartEnsemblNcbiParserTest method testParseOneValidLineHuman.
/*
* Test method for {@link ubic.gemma.core.loader.protein.string.BiomartEnsembleNcbiParser#parseOneLine(java.lang.String)}
* . Tests that a standard human taxon line can be parsed
*/
@Test
public void testParseOneValidLineHuman() {
String[] attributesToGet = new String[] { "ensembl_gene_id", "ensembl_transcript_id", "entrezgene", "ensembl_peptide_id", "hgnc_id" };
parser.setBioMartFields(attributesToGet);
String line = "ENSG00000220023" + "\t" + "ENST00000418749" + "\t" + "10013421" + "\t" + "ENST00000418749" + "\t" + "12123";
Ensembl2NcbiValueObject bioMartEnsembleNcbi = parser.parseOneLine(line);
assertTrue(bioMartEnsembleNcbi.getNcbiTaxonId().equals(10));
assertEquals("ENSG00000220023", bioMartEnsembleNcbi.getEnsemblGeneId());
assertEquals("ENST00000418749", bioMartEnsembleNcbi.getEnsemblTranscriptId());
Collection<String> genes = bioMartEnsembleNcbi.getEntrezgenes();
assertTrue(genes.contains("10013421"));
assertEquals("ENST00000418749", bioMartEnsembleNcbi.getEnsemblPeptideId());
assertEquals("12123", bioMartEnsembleNcbi.getHgnc_id());
}
use of ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject in project Gemma by PavlidisLab.
the class BioMartEnsemblNcbiParserTest method testParseOneValidLineNonHuman.
/*
* Test method for {@link ubic.gemma.core.loader.protein.string.BiomartEnsembleNcbiParser#parseOneLine(java.lang.String)}
* . Tests that a standard taxon line can be parsed
*/
@Test
public void testParseOneValidLineNonHuman() {
String line = "ENSG00000220023" + "\t" + "ENST00000418749" + "\t" + "100134091" + "\t" + "ENST00000418749";
Ensembl2NcbiValueObject bioMartEnsembleNcbi = parser.parseOneLine(line);
assertTrue(bioMartEnsembleNcbi.getNcbiTaxonId().equals(10));
assertEquals("ENSG00000220023", bioMartEnsembleNcbi.getEnsemblGeneId());
assertEquals("ENST00000418749", bioMartEnsembleNcbi.getEnsemblTranscriptId());
Collection<String> genes = bioMartEnsembleNcbi.getEntrezgenes();
assertTrue(genes.contains("100134091"));
assertEquals("ENST00000418749", bioMartEnsembleNcbi.getEnsemblPeptideId());
}
use of ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject in project Gemma by PavlidisLab.
the class BioMartEnsemblNcbiParserTest method testParseValidFileHuman.
/*
* Test method for {@link ubic.gemma.core.loader.protein.string.BiomartEnsembleNcbiParser#parseOneLine(java.lang.String)}
* . Tests that a standard human taxon line can be parsed
*/
@Test
public void testParseValidFileHuman() {
String[] attributesToGet = new String[] { "ensembl_gene_id", "ensembl_transcript_id", "entrezgene", "ensembl_peptide_id", "hgnc_id" };
String fileName = "/data/loader/protein/biomart/biomartsapiens.txt";
URL myurl = this.getClass().getResource(fileName);
try {
parser.setBioMartFields(attributesToGet);
parser.parse(new File(myurl.getFile()));
Collection<Ensembl2NcbiValueObject> items = parser.getResults();
// 39 unique proteins and 36 unique genes
assertEquals(10, items.size());
for (Ensembl2NcbiValueObject item : items) {
if (item.getEnsemblGeneId().equals("ENSG00000215764")) {
assertEquals(1, item.getEntrezgenes().size());
assertEquals("6330", item.getHgnc_id());
}
}
} catch (Exception e) {
e.printStackTrace();
fail();
}
}
Aggregations