use of ubic.gemma.core.loader.protein.string.StringProteinProteinInteractionObjectGenerator in project Gemma by PavlidisLab.
the class StringProteinInteractionLoader method load.
/**
* Main method to load string protein protein interactions. Can either be supplied with files to load from or do
* remote download. After files have been located/fetched the files are parsed and converted into value objects.
* These value objects are then converted into GEMMA Gene2GeneProteinInteractions. Which are then loaded into the
* database. Can be run on all eligable TAXA in gemma or on a supplied taxon.
*
* @param stringProteinFileNameLocal The name of the string file on the local system
* @param stringProteinFileNameRemote The name of the string file on the remote system (just in case the string name
* proves to be too variable) - can be null
* @param localEnsembl2EntrezMappingFile The name of the local biomart file - can be null?
* @param taxa taxa to load data for. List of taxon to process
* @throws IOException io problems
*/
public void load(File stringProteinFileNameLocal, String stringProteinFileNameRemote, File localEnsembl2EntrezMappingFile, Collection<Taxon> taxa) throws IOException {
// very basic validation before any processing done
this.validateLoadParameters(stringProteinFileNameLocal, taxa);
// retrieve STRING protein protein interactions
StringProteinProteinInteractionObjectGenerator stringProteinProteinInteractionObjectGenerator = new StringProteinProteinInteractionObjectGenerator(stringProteinFileNameLocal, stringProteinFileNameRemote);
Map<Taxon, Collection<StringProteinProteinInteraction>> map = stringProteinProteinInteractionObjectGenerator.generate(taxa);
/*
* Get ENSEMBL to NCBI id mappings so we can store the STRING interactions
*/
Map<String, Ensembl2NcbiValueObject> bioMartStringEntreGeneMapping = this.getIdMappings(localEnsembl2EntrezMappingFile, taxa);
// To one taxon at a time to reduce memory use
for (Taxon taxon : map.keySet()) {
StringProteinInteractionLoader.log.debug("Loading for taxon " + taxon);
Collection<StringProteinProteinInteraction> proteinInteractions = map.get(taxon);
StringProteinInteractionLoader.log.info("Found " + proteinInteractions.size() + " STRING interactions for: " + taxon);
this.loadOneTaxonAtATime(bioMartStringEntreGeneMapping, proteinInteractions);
}
}
Aggregations