use of ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction in project Gemma by PavlidisLab.
the class StringProteinLinksDetailedParserTest method testParseOneLineTaxonNotSupported.
/*
* Test to make sure that if taxon is not supported then null is returned. Test method for
* {@link ubic.gemma.core.loader.protein.string.StringProteinProteinInteractionFileParser#parseOneLine(java.lang.String)}
*/
@Test
public void testParseOneLineTaxonNotSupported() {
String line = "778.DVU0002 778.DVU0001 707 0 1 2 3 4 172 AAA";
try {
StringProteinProteinInteraction interaction = parser.parseOneLine(line);
assertNull(interaction);
} catch (RuntimeException e) {
e.printStackTrace();
fail();
}
}
use of ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction in project Gemma by PavlidisLab.
the class StringProteinProteinInteractionFileParser method createStringProteinProteinInteraction.
/**
* Typical line of string file is of the following format:
* <pre>
* 882.DVU0001 882.DVU0002 707 0 0 0 0 0 172 742
* </pre>
* 882.DVU0001 and 882.DVU0002 refer to protein 1 and protein2 Note the 882 is the ncbi taxon id, the other part is
* an external id (ensembl). Method takes the array representing a line of string file and creates a
* StringProteinProteinInteraction object.
*
* @param fields Line split on delimiter
* @return StringProteinProteinInteraction value object.
*/
public StringProteinProteinInteraction createStringProteinProteinInteraction(String[] fields) {
// validate
if (fields == null) {
return null;
}
if (fields[0] == null || fields[1] == null || fields[0].isEmpty() || fields[1].isEmpty()) {
return null;
}
String[] protein1AndTaxa = StringUtils.split(fields[0], ".");
int taxonIdProtein1 = Integer.parseInt(protein1AndTaxa[0]);
String[] protein2AndTaxa = StringUtils.split(fields[1], ".");
int taxonIdProtein2 = Integer.parseInt(protein2AndTaxa[0]);
// Check that the two proteins taxa match that is the taxon appended to protein name match
if (taxonIdProtein1 != taxonIdProtein2) {
throw new FileFormatException("Protein 1 " + fields[0] + " protein 2 " + fields[1] + " do not contain matching taxons");
}
// taxon not supported skip it
if (!(this.getNcbiValidTaxon()).contains(taxonIdProtein1)) {
return null;
}
// always ensure that protein 1 and protein 2 are set same alphabetical order makes matching much easier later
// hashcode equality method relies on them being in consistent order.
// use hashcode as mixed alphanumeric code
Integer protein1Infile = fields[0].hashCode();
Integer protein2InFile = fields[1].hashCode();
StringProteinProteinInteraction stringProteinProteinInteraction;
if (protein1Infile.compareTo(protein2InFile) < 0) {
stringProteinProteinInteraction = new StringProteinProteinInteraction(fields[0], fields[1]);
} else {
stringProteinProteinInteraction = new StringProteinProteinInteraction(fields[1], fields[0]);
}
stringProteinProteinInteraction.setNcbiTaxonId(taxonIdProtein1);
// validate the line make sure these fields are numeric
for (int i = 2; i < fields.length; i++) {
if (!StringUtils.isNumeric(fields[i])) {
throw new FileFormatException("This line does not contain valid number ");
}
}
stringProteinProteinInteraction.addEvidenceCodeScoreToMap(StringProteinInteractionEvidenceCodeEnum.NEIGHBORHOOD, Integer.valueOf(fields[2]));
stringProteinProteinInteraction.addEvidenceCodeScoreToMap(StringProteinInteractionEvidenceCodeEnum.GENEFUSION, Integer.valueOf(fields[3]));
stringProteinProteinInteraction.addEvidenceCodeScoreToMap(StringProteinInteractionEvidenceCodeEnum.COOCCURENCE, Integer.valueOf(fields[4]));
stringProteinProteinInteraction.addEvidenceCodeScoreToMap(StringProteinInteractionEvidenceCodeEnum.COEXPRESSION, Integer.valueOf(fields[5]));
stringProteinProteinInteraction.addEvidenceCodeScoreToMap(StringProteinInteractionEvidenceCodeEnum.EXPERIMENTAL, Integer.valueOf(fields[6]));
stringProteinProteinInteraction.addEvidenceCodeScoreToMap(StringProteinInteractionEvidenceCodeEnum.DATABASE, Integer.valueOf(fields[7]));
stringProteinProteinInteraction.addEvidenceCodeScoreToMap(StringProteinInteractionEvidenceCodeEnum.TEXTMINING, Integer.valueOf(fields[8]));
stringProteinProteinInteraction.setCombined_score(Double.valueOf(fields[9]));
return stringProteinProteinInteraction;
}
Aggregations