use of org.nextprot.api.core.domain.annotation.Annotation in project nextprot-api by calipho-sib.
the class PepXServiceTest method shouldReturnAnEmptryListIfTheVariantIsNotConaintedInThePeptide.
@Test
public void shouldReturnAnEmptryListIfTheVariantIsNotConaintedInThePeptide() throws Exception {
// Taking example NX_Q9H6T3
String peptide = "GANAP";
boolean modeIsoleucine = true;
String isoName = "NX_Q9H6T3-3";
Isoform isoform = mock(Isoform.class);
when(isoform.getIsoformAccession()).thenReturn(isoName);
// https://cdn.rawgit.com/calipho-sib/sequence-viewer/master/examples/simple.html
// (check that page to format the sequence)
// GANAL is present instead of GANAP
when(isoform.getSequence()).thenReturn("MDADPYNPVLPTNRASAYFRLKKFAVAESDCNLAVALNRSYTKAYSRRGAARFALQKLEEAKKDYERVLELEPNNFEATNELRKISQALASKENSYPKEADIVIKSTEGERKQIEAQQNKQQAISEKDRGNGFFKEGKYERAIECYTRGIAADGANALLPANRAMAYLKIQKYEEAEKDCTQAILLDGSYSKAFARRGTARTFLGKLNEAKQDFETVLLLEPGNKQAVTELSKIKKELIEKGHWDDVFLDSTQRQNVVKPIDNPPHPGSTKPLKKVIIEETGNLIQTIDVPDSTTAAAPENNPINLANVIAATGTTSKKNSSQDDLFPTSDTPRAKVLKIEEVSDTSSLQPQASLKQDVCQSYSEKMPIEIEQKPAQFATTVLPPIPANSFQLESDFRQLKSSPDMLYQYLKQIEPSLYPKLFQKNLDPDVFNQIVKILHDFYIEKEKPLLIFEILQRLSELKRFDMAVMFMSETEKKIARALFNHIDKSGLKDSSVEELKKRYGG");
PepXIsoformMatch pepXIsoformMatch = new PepXIsoformMatch(isoName, 154);
List<Annotation> annots = Arrays.asList(getMockedAnnotation("L", "Z", 158, isoName, true));
List<Isoform> isoforms = Arrays.asList(isoform);
// empty
List<Annotation> result = PepXServiceImpl.buildEntryWithVirtualAnnotations(peptide, modeIsoleucine, Arrays.asList(pepXIsoformMatch), annots, isoforms);
// or
// null
// annotations
assertTrue(result.isEmpty());
}
use of org.nextprot.api.core.domain.annotation.Annotation in project nextprot-api by calipho-sib.
the class PepXServiceTest method shouldGiveAnExceptionIfTheOriginalIsNotPresentOnTheSequence.
/*
* Specification has changed look at:
* shouldReturnAnEmptryListIfTheVariantIsNotConaintedInThePeptide
*
* @Test(expected=NextProtException.class) public void
* shouldGiveAnExceptionIfTheVariantIsNotConaintedInThePeptide() throws
* Exception { try {
*
* //Taking example NX_Q9H6T3 String peptide = "GANAP"; boolean
* modeIsoleucine = true; String isoName = "NX_Q9H6T3-3";
*
* Isoform isoform = mock(Isoform.class);
* when(isoform.getIsoformAccession()).thenReturn(isoName);
* //https://cdn.rawgit.com/calipho-sib/sequence-viewer/master/examples/
* simple.html (check that page to format the sequence) //GANAL is present
* instead of GANAP when(isoform.getSequence()).thenReturn(
* "MDADPYNPVLPTNRASAYFRLKKFAVAESDCNLAVALNRSYTKAYSRRGAARFALQKLEEAKKDYERVLELEPNNFEATNELRKISQALASKENSYPKEADIVIKSTEGERKQIEAQQNKQQAISEKDRGNGFFKEGKYERAIECYTRGIAADGANALLPANRAMAYLKIQKYEEAEKDCTQAILLDGSYSKAFARRGTARTFLGKLNEAKQDFETVLLLEPGNKQAVTELSKIKKELIEKGHWDDVFLDSTQRQNVVKPIDNPPHPGSTKPLKKVIIEETGNLIQTIDVPDSTTAAAPENNPINLANVIAATGTTSKKNSSQDDLFPTSDTPRAKVLKIEEVSDTSSLQPQASLKQDVCQSYSEKMPIEIEQKPAQFATTVLPPIPANSFQLESDFRQLKSSPDMLYQYLKQIEPSLYPKLFQKNLDPDVFNQIVKILHDFYIEKEKPLLIFEILQRLSELKRFDMAVMFMSETEKKIARALFNHIDKSGLKDSSVEELKKRYGG"
* );
*
* List<Pair<String, Integer>> isosAndPositions = Arrays.asList(new
* Pair<String, Integer>(isoName, 154)); //Position of the begin of peptide
* List<Annotation> annots = Arrays.asList(getMockedAnnotation("L", "Z",
* 158, isoName, true)); List<Isoform> isoforms = Arrays.asList(isoform);
*
* PepXServiceImpl.buildEntryWithVirtualAnnotations(peptide, modeIsoleucine,
* isosAndPositions, annots, isoforms); //empty or null annotations
* }catch(NextProtException e){ if(e.getMessage().contains(
* "No valid variants found for isoform ")){ throw e; //success tests }else
* fail(); }
*
* }
*/
// because we have variants in nextprot which do not have original aa is not equal to isoform aa at that variant position (inconsistency)
@Ignore
@Test(expected = NextProtException.class)
public void shouldGiveAnExceptionIfTheOriginalIsNotPresentOnTheSequence() throws Exception {
try {
// Taking example NX_Q9H6T3
String peptide = "GANAP";
boolean modeIsoleucine = true;
String isoName = "NX_Q9H6T3-3";
Isoform isoform = mock(Isoform.class);
when(isoform.getIsoformAccession()).thenReturn(isoName);
// https://cdn.rawgit.com/calipho-sib/sequence-viewer/master/examples/simple.html
// (check that page to format the sequence)
// GANAL is present instead of GANAP
when(isoform.getSequence()).thenReturn("MDADPYNPVLPTNRASAYFRLKKFAVAESDCNLAVALNRSYTKAYSRRGAARFALQKLEEAKKDYERVLELEPNNFEATNELRKISQALASKENSYPKEADIVIKSTEGERKQIEAQQNKQQAISEKDRGNGFFKEGKYERAIECYTRGIAADGANALLPANRAMAYLKIQKYEEAEKDCTQAILLDGSYSKAFARRGTARTFLGKLNEAKQDFETVLLLEPGNKQAVTELSKIKKELIEKGHWDDVFLDSTQRQNVVKPIDNPPHPGSTKPLKKVIIEETGNLIQTIDVPDSTTAAAPENNPINLANVIAATGTTSKKNSSQDDLFPTSDTPRAKVLKIEEVSDTSSLQPQASLKQDVCQSYSEKMPIEIEQKPAQFATTVLPPIPANSFQLESDFRQLKSSPDMLYQYLKQIEPSLYPKLFQKNLDPDVFNQIVKILHDFYIEKEKPLLIFEILQRLSELKRFDMAVMFMSETEKKIARALFNHIDKSGLKDSSVEELKKRYGG");
PepXIsoformMatch pepXIsoformMatch = new PepXIsoformMatch(isoName, 154);
// Original is not contained in the sequence, should be a L L->P
// (GANAL)
List<Annotation> annots = Arrays.asList(getMockedAnnotation("O", "P", 158, isoName, true));
List<Isoform> isoforms = Arrays.asList(isoform);
// empty
PepXServiceImpl.buildEntryWithVirtualAnnotations(peptide, modeIsoleucine, Arrays.asList(pepXIsoformMatch), annots, isoforms);
// or
// null
// annotations
} catch (NextProtException e) {
if (e.getMessage().contains("The amino acid")) {
// success tests
throw e;
} else
fail();
}
}
use of org.nextprot.api.core.domain.annotation.Annotation in project nextprot-api by calipho-sib.
the class AnnotationFieldBuilder method init.
@Override
protected void init(Entry entry) {
// Function with canonical first
List<String> function_canonical = EntryUtils.getFunctionInfoWithCanonicalFirst(entry);
for (String finfo : function_canonical) {
addField(Fields.FUNCTION_DESC, finfo);
addField(Fields.ANNOTATIONS, finfo);
}
List<Annotation> annots = entry.getAnnotations();
for (Annotation currannot : annots) {
String category = currannot.getCategory();
AnnotationCategory apiCategory = currannot.getAPICategory();
String quality = currannot.getQualityQualifier();
if (apiCategory.equals(AnnotationCategory.FUNCTION_INFO) || apiCategory.equals(AnnotationCategory.EXPRESSION_PROFILE))
// and tissue specificity values are indexed under other fields
continue;
// We also should exclude uninformative category 'sequence conflict'
// if(!category.equals("tissue specificity")) {//These values are
// indexed under other fields
// if(!apiCategory.equals(AnnotationCategory.) {//These values are
// indexed under other fields
String desc = currannot.getDescription();
if (apiCategory.equals(AnnotationCategory.GLYCOSYLATION_SITE)) {
String xref = currannot.getSynonym();
if (xref != null)
// It is actually not a synonym but the carbohydrate id from
// glycosuitedb !
addField(Fields.ANNOTATIONS, xref);
} else if (apiCategory.equals(AnnotationCategory.DNA_BINDING_REGION))
addField(Fields.ANNOTATIONS, category);
else if (apiCategory.equals(AnnotationCategory.VARIANT))
// We need to index them somehow for the GOLD/SILVER tests, or
// do we ? in creates a lot of useless 'variant null' tokens
desc = "Variant " + desc;
if (desc != null) {
// System.err.println(category + ": " + desc);
if (apiCategory.equals(AnnotationCategory.SEQUENCE_CAUTION)) {
int stringpos = 0;
// The sequence
desc = desc.split(":")[1].substring(1);
// AAH70170 differs
// from that shown.
// Reason:
// miscellaneous
// discrepancy
String[] desclevels = desc.split("\\.");
String mainreason = desclevels[0];
if ((stringpos = mainreason.indexOf(" at position")) != -1) {
// truncate the position
mainreason = mainreason.substring(0, stringpos);
}
addField(Fields.ANNOTATIONS, mainreason);
if (desclevels.length > 1) {
if (// mainreason truncated
stringpos > 0)
desc = desc.substring(desc.indexOf(".") + 2);
else {
stringpos = desc.indexOf(mainreason) + mainreason.length();
desc = desc.substring(stringpos + 2);
}
addField(Fields.ANNOTATIONS, desc);
}
}
if (!category.startsWith("go") && desc.length() > 1) {
// description
if (!this.isGold() || quality.equals("GOLD")) {
if (apiCategory.equals(AnnotationCategory.PHENOTYPIC_VARIATION)) {
// Get BED data (also get the notes ? )
Map<String, AnnotationIsoformSpecificity> annotSpecs = currannot.getTargetingIsoformsMap();
for (Map.Entry<String, AnnotationIsoformSpecificity> mapentry : annotSpecs.entrySet()) {
String subjectName = mapentry.getValue().getName();
// update description with the subject for each
// target isofotm
addField(Fields.ANNOTATIONS, subjectName + " " + desc);
// System.err.println("adding: " + subjectName +
// " " + desc);
}
} else
addField(Fields.ANNOTATIONS, desc);
}
}
// in pathway and disease new annotations may appear due to
// transformation of specific xrefs (orphanet...) into
// annotations in the api
}
handleAnnotationTerm(currannot, entry);
if (apiCategory.equals(AnnotationCategory.MATURE_PROTEIN) || apiCategory.equals(AnnotationCategory.MATURATION_PEPTIDE)) {
String chainid = currannot.getSynonym();
if (chainid != null) {
// " synonyms: " + currannot.getAllSynonyms());
if (chainid.contains("-"))
// Uniprot FT id,
addField(Fields.ANNOTATIONS, chainid);
else // like
// PRO_0000019235,
// shouldn't be
// called a
// synonym
{
List<String> chainsynonyms = currannot.getSynonyms();
if (chainsynonyms.size() == 1)
addField(Fields.ANNOTATIONS, StringUtils.getSortedValueFromPipeSeparatedField(desc + " | " + chainid));
else {
chainid = "";
for (String syno : chainsynonyms) {
chainid += syno + " | ";
}
addField(Fields.ANNOTATIONS, StringUtils.getSortedValueFromPipeSeparatedField(chainid));
}
}
}
// else System.err.println("chainid null for: " + desc);
// chainid 's null for the main chain, this is wrong
}
// variant xrefs and identifiers
if (apiCategory.equals(AnnotationCategory.VARIANT)) {
String evidxrefaccs = "";
List<AnnotationEvidence> evidences = currannot.getEvidences();
if (evidences != null)
for (AnnotationEvidence ev : evidences) {
if (ev.isResourceAXref()) {
String db = ev.getResourceDb();
if (db == null)
System.err.println("db is null for evidence in variant annot: " + desc);
else {
if (!evidxrefaccs.isEmpty())
evidxrefaccs += " | ";
if (db.equals("Cosmic"))
evidxrefaccs += db.toLowerCase() + ":" + ev.getResourceAccession();
else if (// Just to allow
db.equals("dbSNP"))
// comparison with
// incoherent
// current solr
// implementation
evidxrefaccs += ev.getResourceAccession();
else
// Uniprot
evidxrefaccs += currannot.getSynonym();
// FT
// id,
// like
// VAR_056577
}
}
}
if (!this.isGold() || quality.equals("GOLD")) {
if (!evidxrefaccs.isEmpty())
addField(Fields.ANNOTATIONS, StringUtils.getSortedValueFromPipeSeparatedField(evidxrefaccs));
Collection<AnnotationProperty> props = currannot.getProperties();
for (AnnotationProperty prop : props) if (prop.getName().equals("mutation AA"))
// eg: p.D1685E, it is unclear why this property
// exists only in cosmic variants
addField(Fields.ANNOTATIONS, prop.getValue());
}
}
}
// Families (why not part of Annotations ?), always GOLD
for (Family family : entry.getOverview().getFamilies()) {
String ac = family.getAccession();
int stringpos = 0;
addField(Fields.ANNOTATIONS, ac);
String famdesc = family.getDescription();
// There is no get_synonyms() method for families -> can't access
// PERVR for FA-04785
addField(Fields.ANNOTATIONS, famdesc);
stringpos = famdesc.indexOf("elongs to ") + 14;
// Skip the 'Belongs to' and
famdesc = famdesc.substring(stringpos);
// what may come before (eg:
// NX_P19021)
// remove
famdesc = famdesc.substring(0, famdesc.length() - 1);
// final dot
addField(Fields.ANNOTATIONS, famdesc);
// are there subfamilies
String[] families = famdesc.split("\\. ");
// ?
if (families.length > 1) {
// Always GOLD
for (int i = 0; i < families.length; i++) {
addField(Fields.ANNOTATIONS, families[i]);
if (families[i].contains(") superfamily")) {
// index one
// more time
// without
// parenthesis
famdesc = families[i].substring(0, families[i].indexOf("(")) + "superfamily";
addField(Fields.ANNOTATIONS, famdesc);
}
}
}
// Sonetimes these synonymes are wrong eg: NX_Q6NUT3 -> Major
// facilitator (TC 2.A.1) superfamily
List<String> famsynonyms = this.terminologyservice.findCvTermByAccession(ac).getSynonyms();
if (famsynonyms != null)
for (String famsynonym : famsynonyms) addField(Fields.ANNOTATIONS, famsynonym.trim());
}
}
use of org.nextprot.api.core.domain.annotation.Annotation in project nextprot-api by calipho-sib.
the class XrefFieldBuilder method init.
@Override
protected void init(Entry entry) {
String[] extraNameCat = { "entry name", "family name", "allergen name", "reaction ID", "toxin name" };
// Xrefs
List<DbXref> xrefs = entry.getXrefs();
for (DbXref xref : xrefs) {
String acc = xref.getAccession();
String db = xref.getDatabaseName();
if (db.equals(NEXTPROT_SUBMISSION.getName()))
continue;
if (db.equals(HPA.getName()) && !acc.contains(ENSG.getName())) {
// HPA with ENSG are for expression
addField(Fields.ANTIBODY, acc);
}
if (db.equals(ENSEMBL.getName())) {
addField(Fields.ENSEMBL, acc);
}
// but for mrnas BC040557 -> protein sequence ID=AAH40557.1, the pid is just a property of the xref...
if (!(db.equals(PEPTIDE_ATLAS.getName()) || db.equals(SRM_ATLAS.getName()))) {
if (db.equals(EMBL.getName())) {
String propvalue = xref.getPropertyValue("protein sequence ID");
if (propvalue != null) {
addField(Fields.XREFS, "EMBL:" + propvalue + ", " + propvalue);
addField(Fields.XREFS, "EMBL:" + acc + ", " + acc);
} else {
propvalue = xref.getPropertyValue("genomic sequence ID");
if (propvalue != null || !acc.contains(".")) {
addField(Fields.XREFS, "EMBL:" + acc + ", " + acc);
}
}
} else {
addField(Fields.XREFS, db + ":" + acc + ", " + acc);
for (String category : extraNameCat) {
String extraName = xref.getPropertyValue(category);
if (extraName != null) {
// Can be found for dbs: "InterPro", "Pfam", "PROSITE"), "TIGRFAMs", "SMART", "PRINTS", "HAMAP",
// "PeroxiBase", "PIRSF", "PIR", "TCDB", "CAZy", "ESTHER", UniPathway
addField(Fields.XREFS, db + ":" + extraName + ", " + extraName);
break;
}
}
}
}
}
// It is weird to have to go thru this to get the CAB antibodies, they should come with getXrefs()
List<Annotation> annots = entry.getAnnotations();
for (Annotation currannot : annots) {
String category = currannot.getCategory();
if ("pathway".equals(category)) {
addField(Fields.XREFS, "Pathway:" + currannot.getDescription() + ", " + currannot.getDescription());
} else if ("disease".equals(category)) {
// Same remark
DbXref parentXref = currannot.getParentXref();
if (parentXref != null && parentXref.getDatabaseName().equals(ORPHANET.getName())) {
String disName = parentXref.getPropertyValue("disease");
addField(Fields.XREFS, "Disease:" + disName + ", " + disName);
}
} else if ("SmallMoleculeInteraction".equals(category)) {
// Same remark
addField(Fields.XREFS, "generic name:" + currannot.getDescription() + ", " + currannot.getDescription());
}
}
// Isoform ids
List<Isoform> isoforms = entry.getIsoforms();
for (Isoform iso : isoforms) {
String isoId = iso.getIsoformAccession().substring(3);
addField(Fields.XREFS, "isoform ID:" + isoId + ", " + isoId);
}
// Xrefs to publications (PubMed, DOIs)
for (Publication currpubli : entry.getPublications()) {
List<PublicationDbXref> pubxrefs = currpubli.getDbXrefs();
for (DbXref pubxref : pubxrefs) {
// It happens to have a trailing \t (like 10.1080/13547500802063240 in NX_P14635)
String acc = pubxref.getAccession().trim();
String db = pubxref.getDatabaseName();
addField(Fields.XREFS, db + ":" + acc + ", " + acc);
}
}
}
use of org.nextprot.api.core.domain.annotation.Annotation in project nextprot-api by calipho-sib.
the class IsoformSequencePositionMapperIntegrationTest method getErrorsDuringPropagationOnVariantsOfSingleEntry.
public int getErrorsDuringPropagationOnVariantsOfSingleEntry(String entry_ac) throws Exception {
Entry entry = entryBuilderService.build(EntryConfig.newConfig(entry_ac).withTargetIsoforms().withAnnotations());
int delCount = 0;
int subCount = 0;
int insCount = 0;
int otherCount = 0;
int errorCount = 0;
for (Annotation a : entry.getAnnotations()) {
if (a.getAPICategory().equals(AnnotationCategory.VARIANT)) {
// for each variant annotation
String ori = a.getVariant().getOriginal();
String mut = a.getVariant().getVariant();
if (ori.length() == 1 && mut.length() == 1) {
subCount++;
} else if (ori.length() == 1 && mut.length() == 0) {
delCount++;
} else if (ori.length() == 0 && mut.length() == 1) {
insCount++;
} else if (sout) {
System.out.println("Other variant:" + a.getUniqueName());
otherCount++;
}
Map<String, Integer> isoExpectedPos = getExpectedPosForEachIsoform(entry, a);
printExpectedPosForEachIsoform(isoExpectedPos, a);
boolean errorOnVariant = false;
for (String iso1name : isoExpectedPos.keySet()) {
Integer iso1ExpectedPos = isoExpectedPos.get(iso1name);
Isoform iso1 = IsoformUtils.getIsoformByName(entry, iso1name);
if (iso1ExpectedPos != null) {
GeneMasterCodonPosition nuPos = IsoformSequencePositionMapper.getCodonPositionsOnMaster(iso1ExpectedPos, iso1);
if (!nuPos.isValid()) {
errorOnVariant = true;
if (sout)
System.out.println("ERROR1: codon positions not found for " + iso1name + " for variant at position: " + iso1ExpectedPos);
continue;
}
printIsoLengthAndRangesNuCount(iso1.getUniqueName(), iso1.getSequence(), iso1.getMasterMapping());
if (sout) {
System.out.println("Starting variant propagation from isoform " + iso1name + " at position " + iso1ExpectedPos);
System.out.println(getSequenceWithHighlighedPos(iso1.getSequence(), iso1ExpectedPos));
}
for (Isoform iso2 : entry.getIsoforms()) {
String iso2name = iso2.getUniqueName();
if (iso2name.equals(iso1name))
continue;
CodonNucleotideIndices nuIdx = IsoformSequencePositionMapper.getCodonNucleotideIndices(nuPos, iso2);
Integer iso2ActualPos = nuIdx.getAminoAcidPosition();
Integer iso2ExpectedPos = isoExpectedPos.get(iso2name);
if (sout)
System.out.println("Variant " + a.getUniqueName() + " position on isoform " + iso2name + " is " + iso2ActualPos);
printIsoLengthAndRangesNuCount(iso2.getUniqueName(), iso2.getSequence(), iso2.getMasterMapping());
if (iso2ExpectedPos != null)
if (sout)
System.out.println("Expected:" + getSequenceWithHighlighedPos(iso2.getSequence(), iso2ExpectedPos));
if (iso2ActualPos != null)
if (sout)
System.out.println("Actual :" + getSequenceWithHighlighedPos(iso2.getSequence(), iso2ActualPos));
if (iso2ActualPos == null && iso2ExpectedPos == null) {
// OK
} else if (iso2ActualPos == null || iso2ExpectedPos == null) {
errorOnVariant = true;
if (sout)
System.out.println("ERROR2: variant position on isoform " + iso2name + " is " + iso2ActualPos + ", expected " + iso2ExpectedPos);
} else if (!iso2ActualPos.equals(iso2ExpectedPos)) {
errorOnVariant = true;
if (sout)
System.out.println("ERROR3: variant position on isoform " + iso2name + " is " + iso2ActualPos + ", expected " + iso2ExpectedPos);
}
}
}
}
if (errorOnVariant)
errorCount++;
if (errorOnVariant)
break;
}
}
if (sout) {
System.out.println("Summary " + entry.getUniqueName());
System.out.println("insCount:" + insCount);
System.out.println("delCount:" + delCount);
System.out.println("subCount:" + subCount);
System.out.println("otherCount:" + otherCount);
System.out.println("errorCount:" + errorCount);
}
return errorCount;
}
Aggregations