Search in sources :

Example 6 with StringProteinProteinInteraction

use of ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction in project Gemma by PavlidisLab.

the class StringProteinLinksDetailedParserTest method testParseOneLineTaxonNotSupported.

/*
     * Test to make sure that if taxon is not supported then null is returned. Test method for
     * {@link ubic.gemma.core.loader.protein.string.StringProteinProteinInteractionFileParser#parseOneLine(java.lang.String)}
     */
@Test
public void testParseOneLineTaxonNotSupported() {
    String line = "778.DVU0002 778.DVU0001 707 0 1 2 3 4 172 AAA";
    try {
        StringProteinProteinInteraction interaction = parser.parseOneLine(line);
        assertNull(interaction);
    } catch (RuntimeException e) {
        e.printStackTrace();
        fail();
    }
}
Also used : StringProteinProteinInteraction(ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction) Test(org.junit.Test)

Example 7 with StringProteinProteinInteraction

use of ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction in project Gemma by PavlidisLab.

the class StringProteinProteinInteractionFileParser method createStringProteinProteinInteraction.

/**
 * Typical line of string file is of the following format:
 * <pre>
 * 882.DVU0001 882.DVU0002 707 0 0 0 0 0 172 742
 * </pre>
 * 882.DVU0001 and 882.DVU0002 refer to protein 1 and protein2 Note the 882 is the ncbi taxon id, the other part is
 * an external id (ensembl). Method takes the array representing a line of string file and creates a
 * StringProteinProteinInteraction object.
 *
 * @param fields Line split on delimiter
 * @return StringProteinProteinInteraction value object.
 */
public StringProteinProteinInteraction createStringProteinProteinInteraction(String[] fields) {
    // validate
    if (fields == null) {
        return null;
    }
    if (fields[0] == null || fields[1] == null || fields[0].isEmpty() || fields[1].isEmpty()) {
        return null;
    }
    String[] protein1AndTaxa = StringUtils.split(fields[0], ".");
    int taxonIdProtein1 = Integer.parseInt(protein1AndTaxa[0]);
    String[] protein2AndTaxa = StringUtils.split(fields[1], ".");
    int taxonIdProtein2 = Integer.parseInt(protein2AndTaxa[0]);
    // Check that the two proteins taxa match that is the taxon appended to protein name match
    if (taxonIdProtein1 != taxonIdProtein2) {
        throw new FileFormatException("Protein 1 " + fields[0] + " protein 2  " + fields[1] + " do not contain matching taxons");
    }
    // taxon not supported skip it
    if (!(this.getNcbiValidTaxon()).contains(taxonIdProtein1)) {
        return null;
    }
    // always ensure that protein 1 and protein 2 are set same alphabetical order makes matching much easier later
    // hashcode equality method relies on them being in consistent order.
    // use hashcode as mixed alphanumeric code
    Integer protein1Infile = fields[0].hashCode();
    Integer protein2InFile = fields[1].hashCode();
    StringProteinProteinInteraction stringProteinProteinInteraction;
    if (protein1Infile.compareTo(protein2InFile) < 0) {
        stringProteinProteinInteraction = new StringProteinProteinInteraction(fields[0], fields[1]);
    } else {
        stringProteinProteinInteraction = new StringProteinProteinInteraction(fields[1], fields[0]);
    }
    stringProteinProteinInteraction.setNcbiTaxonId(taxonIdProtein1);
    // validate the line make sure these fields are numeric
    for (int i = 2; i < fields.length; i++) {
        if (!StringUtils.isNumeric(fields[i])) {
            throw new FileFormatException("This line does not contain valid number ");
        }
    }
    stringProteinProteinInteraction.addEvidenceCodeScoreToMap(StringProteinInteractionEvidenceCodeEnum.NEIGHBORHOOD, Integer.valueOf(fields[2]));
    stringProteinProteinInteraction.addEvidenceCodeScoreToMap(StringProteinInteractionEvidenceCodeEnum.GENEFUSION, Integer.valueOf(fields[3]));
    stringProteinProteinInteraction.addEvidenceCodeScoreToMap(StringProteinInteractionEvidenceCodeEnum.COOCCURENCE, Integer.valueOf(fields[4]));
    stringProteinProteinInteraction.addEvidenceCodeScoreToMap(StringProteinInteractionEvidenceCodeEnum.COEXPRESSION, Integer.valueOf(fields[5]));
    stringProteinProteinInteraction.addEvidenceCodeScoreToMap(StringProteinInteractionEvidenceCodeEnum.EXPERIMENTAL, Integer.valueOf(fields[6]));
    stringProteinProteinInteraction.addEvidenceCodeScoreToMap(StringProteinInteractionEvidenceCodeEnum.DATABASE, Integer.valueOf(fields[7]));
    stringProteinProteinInteraction.addEvidenceCodeScoreToMap(StringProteinInteractionEvidenceCodeEnum.TEXTMINING, Integer.valueOf(fields[8]));
    stringProteinProteinInteraction.setCombined_score(Double.valueOf(fields[9]));
    return stringProteinProteinInteraction;
}
Also used : FileFormatException(ubic.gemma.core.loader.util.parser.FileFormatException) StringProteinProteinInteraction(ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction)

Aggregations

StringProteinProteinInteraction (ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction)7 Test (org.junit.Test)3 Taxon (ubic.gemma.model.genome.Taxon)3 File (java.io.File)2 URL (java.net.URL)2 Collection (java.util.Collection)2 Ensembl2NcbiValueObject (ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Before (org.junit.Before)1 BiomartEnsemblNcbiObjectGenerator (ubic.gemma.core.loader.protein.biomart.BiomartEnsemblNcbiObjectGenerator)1 StringProteinProteinInteractionObjectGenerator (ubic.gemma.core.loader.protein.string.StringProteinProteinInteractionObjectGenerator)1 FileFormatException (ubic.gemma.core.loader.util.parser.FileFormatException)1