Search in sources :

Example 1 with StringProteinProteinInteraction

use of ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction in project Gemma by PavlidisLab.

the class StringProteinProteinInteractionObjectGenerator method generate.

/**
 * Main method to generate StringProteinProteinInteraction objects.
 *
 * @param validTaxa Taxon to generate StringProteinProteinInteraction from string (STRING has many taxon).
 * @return Collection of StringProteinProteinInteraction objects specific for the taxa that were provided, held in a
 * may keyed on taxon.
 */
public Map<Taxon, Collection<StringProteinProteinInteraction>> generate(Collection<Taxon> validTaxa) {
    log.debug("Starting to get StringProteinProteinInteraction data");
    Collection<StringProteinProteinInteraction> stringProteinProteinInteractions;
    if (stringProteinInteractionFileLocal == null) {
        log.info("stringProteinInteractionFile is remote file fetching remote site");
        fetchProteinStringFileFromRemoteSiteUnArchived();
    }
    Map<Taxon, Collection<StringProteinProteinInteraction>> map = new HashMap<>();
    // however when I did it in one big go I got java.lang.OutOfMemoryError: Java heap space
    for (Taxon taxon : validTaxa) {
        log.info("calling taxon " + taxon);
        Collection<Taxon> taxa = new ArrayList<>();
        taxa.add(taxon);
        stringProteinProteinInteractions = this.parseProteinStringFileInteraction(taxa);
        map.put(taxon, stringProteinProteinInteractions);
    }
    log.debug("Starting to get StringProteinProteinInteraction data");
    return map;
}
Also used : HashMap(java.util.HashMap) Taxon(ubic.gemma.model.genome.Taxon) ArrayList(java.util.ArrayList) Collection(java.util.Collection) StringProteinProteinInteraction(ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction)

Example 2 with StringProteinProteinInteraction

use of ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction in project Gemma by PavlidisLab.

the class StringBiomartProteinConverterTest method setUp.

@Before
public void setUp() {
    String fileNameBiomartmouse = "/data/loader/protein/biomart/biomartmmusculusShort.txt";
    URL fileNameBiomartmouseURL = this.getClass().getResource(fileNameBiomartmouse);
    File taxonBiomartFile = new File(fileNameBiomartmouseURL.getFile());
    Taxon taxon = Taxon.Factory.newInstance();
    taxon.setIsGenesUsable(true);
    taxon.setNcbiId(10090);
    taxon.setScientificName("Mus musculus");
    taxon.setIsSpecies(true);
    taxa.add(taxon);
    try {
        BiomartEnsemblNcbiObjectGenerator biomartEnsemblNcbiObjectGenerator = new BiomartEnsemblNcbiObjectGenerator();
        biomartEnsemblNcbiObjectGenerator.setBioMartFileName(taxonBiomartFile);
        Map<String, Ensembl2NcbiValueObject> map = biomartEnsemblNcbiObjectGenerator.generate(taxa);
        stringBiomartProteinConverter = new StringProteinProteinInteractionConverter(map);
    } catch (Exception e) {
        e.printStackTrace();
        fail();
    }
    stringProteinProteinInteractionOne = new StringProteinProteinInteraction("ENSMUSP00000111623", "ENSMUSP00000100396");
    StringProteinProteinInteraction stringProteinProteinInteractionTwo = new StringProteinProteinInteraction("ENSMUSP00000100395", "ENSMUSP00000100396");
    StringProteinProteinInteraction stringProteinProteinInteractionThree = new StringProteinProteinInteraction("ENSMUSP00000100407", "ENSMUSP00000100395");
    // add them to array
    stringProteinProteinInteractions.add(stringProteinProteinInteractionOne);
    stringProteinProteinInteractions.add(stringProteinProteinInteractionTwo);
    stringProteinProteinInteractions.add(stringProteinProteinInteractionThree);
}
Also used : Ensembl2NcbiValueObject(ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject) BiomartEnsemblNcbiObjectGenerator(ubic.gemma.core.loader.protein.biomart.BiomartEnsemblNcbiObjectGenerator) Taxon(ubic.gemma.model.genome.Taxon) File(java.io.File) URL(java.net.URL) StringProteinProteinInteraction(ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction) Before(org.junit.Before)

Example 3 with StringProteinProteinInteraction

use of ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction in project Gemma by PavlidisLab.

the class StringProteinLinksDetailedParserTest method testParseFileContainingOneTaxon.

/*
     * Test to ensure that a small file containing 100 lines can be parsed correctly. The file contains all one taxon.
     * There are duplicate interactions in the file e.g. 10090.ENSMUSP00000000001 10090.ENSMUSP00000000153 0 0 0 0 0 900
     * 27 902 10090.ENSMUSP00000000153 10090.ENSMUSP00000000001 0 0 0 0 0 900 27 902 are effectively the same and should
     * be treated as one. protein.links.detailed.txt contains references but two are duplicates
     */
@Test
public void testParseFileContainingOneTaxon() {
    String fileName = "/data/loader/protein/string/protein.links.detailed.txt";
    URL myurl = this.getClass().getResource(fileName);
    try {
        parser.parse(new File(myurl.getFile()));
        Collection<StringProteinProteinInteraction> items = parser.getResults();
        assertEquals(23, items.size());
        for (StringProteinProteinInteraction item : items) {
            assertTrue(item.getProtein1().startsWith("10090.ENSMUSP"));
            assertTrue(item.getProtein2().startsWith("10090.ENSMUSP"));
        }
    } catch (RuntimeException | IOException e) {
        fail();
    }
}
Also used : IOException(java.io.IOException) File(java.io.File) URL(java.net.URL) StringProteinProteinInteraction(ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction) Test(org.junit.Test)

Example 4 with StringProteinProteinInteraction

use of ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction in project Gemma by PavlidisLab.

the class StringProteinLinksDetailedParserTest method testParseOneValidLine.

/*
     * Test to make sure that a line can be parsed correctly to its constituent values. Also that the alpabetical
     * sorting of the protein works so that the most alpabetically higer value gets stored in protein 1. Make sure that
     * the proteins get stored in the same order. Test method for
     * {@link ubic.gemma.core.loader.protein.string.StringProteinProteinInteractionFileParser#parseOneLine(java.lang.String)}
     */
@Test
public void testParseOneValidLine() {
    String line = "10090.ENSMUSP00000000201 10090.ENSMUSP00000000153 707 0 10 2 3 0 0 222";
    StringProteinProteinInteraction stringProteinProteinInteraction = parser.parseOneLine(line);
    assertTrue(stringProteinProteinInteraction.getNcbiTaxonId().equals(10090));
    assertEquals("10090.ENSMUSP00000000153", stringProteinProteinInteraction.getProtein1());
    assertEquals("10090.ENSMUSP00000000201", stringProteinProteinInteraction.getProtein2());
    byte[] arrayStored = stringProteinProteinInteraction.getEvidenceVector();
    byte[] array = new byte[] { 1, 0, 1, 1, 1, 0, 0 };
    assertArrayEquals("Compare bit vector", array, arrayStored);
    assertEquals(new Double(222), stringProteinProteinInteraction.getCombined_score());
}
Also used : StringProteinProteinInteraction(ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction) Test(org.junit.Test)

Example 5 with StringProteinProteinInteraction

use of ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction in project Gemma by PavlidisLab.

the class StringProteinInteractionLoader method load.

/**
 * Main method to load string protein protein interactions. Can either be supplied with files to load from or do
 * remote download. After files have been located/fetched the files are parsed and converted into value objects.
 * These value objects are then converted into GEMMA Gene2GeneProteinInteractions. Which are then loaded into the
 * database. Can be run on all eligable TAXA in gemma or on a supplied taxon.
 *
 * @param stringProteinFileNameLocal     The name of the string file on the local system
 * @param stringProteinFileNameRemote    The name of the string file on the remote system (just in case the string name
 *                                       proves to be too variable) - can be null
 * @param localEnsembl2EntrezMappingFile The name of the local biomart file - can be null?
 * @param taxa                           taxa to load data for. List of taxon to process
 * @throws IOException io problems
 */
public void load(File stringProteinFileNameLocal, String stringProteinFileNameRemote, File localEnsembl2EntrezMappingFile, Collection<Taxon> taxa) throws IOException {
    // very basic validation before any processing done
    this.validateLoadParameters(stringProteinFileNameLocal, taxa);
    // retrieve STRING protein protein interactions
    StringProteinProteinInteractionObjectGenerator stringProteinProteinInteractionObjectGenerator = new StringProteinProteinInteractionObjectGenerator(stringProteinFileNameLocal, stringProteinFileNameRemote);
    Map<Taxon, Collection<StringProteinProteinInteraction>> map = stringProteinProteinInteractionObjectGenerator.generate(taxa);
    /*
         * Get ENSEMBL to NCBI id mappings so we can store the STRING interactions
         */
    Map<String, Ensembl2NcbiValueObject> bioMartStringEntreGeneMapping = this.getIdMappings(localEnsembl2EntrezMappingFile, taxa);
    // To one taxon at a time to reduce memory use
    for (Taxon taxon : map.keySet()) {
        StringProteinInteractionLoader.log.debug("Loading for taxon " + taxon);
        Collection<StringProteinProteinInteraction> proteinInteractions = map.get(taxon);
        StringProteinInteractionLoader.log.info("Found " + proteinInteractions.size() + " STRING interactions for: " + taxon);
        this.loadOneTaxonAtATime(bioMartStringEntreGeneMapping, proteinInteractions);
    }
}
Also used : Ensembl2NcbiValueObject(ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject) StringProteinProteinInteractionObjectGenerator(ubic.gemma.core.loader.protein.string.StringProteinProteinInteractionObjectGenerator) Taxon(ubic.gemma.model.genome.Taxon) Collection(java.util.Collection) StringProteinProteinInteraction(ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction)

Aggregations

StringProteinProteinInteraction (ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction)7 Test (org.junit.Test)3 Taxon (ubic.gemma.model.genome.Taxon)3 File (java.io.File)2 URL (java.net.URL)2 Collection (java.util.Collection)2 Ensembl2NcbiValueObject (ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Before (org.junit.Before)1 BiomartEnsemblNcbiObjectGenerator (ubic.gemma.core.loader.protein.biomart.BiomartEnsemblNcbiObjectGenerator)1 StringProteinProteinInteractionObjectGenerator (ubic.gemma.core.loader.protein.string.StringProteinProteinInteractionObjectGenerator)1 FileFormatException (ubic.gemma.core.loader.util.parser.FileFormatException)1