use of org.nextprot.api.core.domain.CvTerm in project nextprot-api by calipho-sib.
the class AnnotationBuilderBastUnitTest method init.
@Before
public void init() throws FileNotFoundException, DataSetException {
MockitoAnnotations.initMocks(this);
CvTerm cvterm = new CvTerm();
cvterm.setName("eco-name-1");
cvterm.setOntology("eco-ontology-cv");
cvterm.setDescription("some description");
Mockito.when(terminologyService.findCvTermByAccession(Matchers.anyString())).thenReturn(cvterm);
Publication pub = new Publication();
pub.setId(999);
Mockito.when(publicationService.findPublicationByDatabaseAndAccession("PubMed", "000")).thenReturn(null);
Mockito.when(publicationService.findPublicationByDatabaseAndAccession("PubMed", "123")).thenReturn(pub);
// unused in tests yet
Mockito.when(mainNamesService.findIsoformOrEntryMainName()).thenReturn(null);
}
use of org.nextprot.api.core.domain.CvTerm in project nextprot-api by calipho-sib.
the class CVFieldBuilder method init.
@Override
protected void init(Entry entry) {
Set<String> cv_acs = new HashSet<String>();
Set<String> cv_ancestors_acs = new HashSet<String>();
Set<String> cv_synonyms = new HashSet<String>();
// top level ancestors (Annotation, feature, and ROI)
Set<String> top_acs = new HashSet<>(Arrays.asList("CVAN_0001", "CVAN_0002", "CVAN_0011"));
// CV accessions
List<Annotation> annots = entry.getAnnotations();
boolean allnegative;
for (Annotation currannot : annots) {
String category = currannot.getCategory();
if (!category.equals("tissue specificity")) {
// tissue-specific CVs are indexed under 'expression'
String cvac = currannot.getCvTermAccessionCode();
if (cvac == null)
continue;
if (cvac.isEmpty())
logger.warn("CVterm accession empty in " + category + " for " + entry.getUniqueName());
else {
if (category.startsWith("go ")) {
allnegative = true;
List<AnnotationEvidence> evlist = currannot.getEvidences();
// We don't index negative annotations
for (AnnotationEvidence ev : evlist) allnegative = allnegative & ev.isNegativeEvidence();
if (allnegative == true)
continue;
}
if (!this.isGold() || currannot.getQualityQualifier().equals("GOLD")) {
addField(Fields.CV_ACS, cvac);
// No duplicates: this is a Set, will be used for synonyms and ancestors
cv_acs.add(cvac);
addField(Fields.CV_NAMES, currannot.getCvTermName());
}
}
}
}
// Families (why not part of Annotations ?)
for (Family family : entry.getOverview().getFamilies()) {
addField(Fields.CV_ACS, family.getAccession());
addField(Fields.CV_NAMES, family.getName() + " family");
cv_acs.add(family.getAccession());
}
// Final CV acs, ancestors and synonyms
for (String cvac : cv_acs) {
CvTerm term = this.terminologyservice.findCvTermByAccession(cvac);
if (null == term) {
logger.error(entry.getUniqueName() + " - term with accession |" + cvac + "| not found with findCvTermByAccession()");
continue;
}
List<String> ancestors = terminologyservice.getAllAncestorsAccession(term.getAccession());
if (ancestors != null) {
for (String ancestor : ancestors) cv_ancestors_acs.add(ancestor);
}
List<String> synonyms = term.getSynonyms();
if (synonyms != null) {
// No duplicate: this is a Set
for (String synonym : synonyms) cv_synonyms.add(synonym.trim());
}
}
// Remove uninformative top level ancestors (Annotation, feature, and ROI)
cv_ancestors_acs.removeAll(top_acs);
// Index generated sets
for (String ancestorac : cv_ancestors_acs) {
addField(Fields.CV_ANCESTORS_ACS, ancestorac);
addField(Fields.CV_ANCESTORS, this.terminologyservice.findCvTermByAccession(ancestorac).getName());
}
for (String synonym : cv_synonyms) {
addField(Fields.CV_SYNONYMS, synonym);
}
List<CvTerm> enzymes = entry.getEnzymes();
String ec_names = "";
for (CvTerm currenzyme : enzymes) {
cv_acs.add(currenzyme.getAccession());
addField(Fields.CV_NAMES, currenzyme.getName());
if (ec_names != "")
ec_names += ", ";
ec_names += "EC " + currenzyme.getAccession();
List<String> synonyms = currenzyme.getSynonyms();
if (synonyms != null)
for (String synonym : synonyms) {
addField(Fields.CV_SYNONYMS, synonym.trim());
}
}
addField(Fields.EC_NAME, ec_names);
}
use of org.nextprot.api.core.domain.CvTerm in project nextprot-api by calipho-sib.
the class ExpressionFieldBuilder method init.
@Override
protected void init(Entry entry) {
// Extract the tissues where there is expression ....
Set<String> cv_tissues = new HashSet<String>();
for (Annotation currannot : entry.getAnnotations()) {
if (currannot.getCategory().equals("tissue specificity")) {
// Check there is a detected expression
boolean allnegative = true;
for (AnnotationEvidence ev : currannot.getEvidences()) if (!ev.isNegativeEvidence() && (!this.isGold() || ev.getQualityQualifier().equals("GOLD"))) // Only a GOLD positive evidence can invalidate allnegative in the GOLD index
{
allnegative = false;
break;
}
if (!allnegative) {
// No duplicates this is a Set
if (!this.isGold() || currannot.getQualityQualifier().equals("GOLD")) {
cv_tissues.add(currannot.getCvTermAccessionCode());
cv_tissues.add(currannot.getCvTermName());
}
}
// else System.err.println("No expression: " + currannot.getCvTermAccessionCode());
}
}
// Expression (without stages and expression_levels)
SortedSet<String> cv_tissues_final = new TreeSet<String>();
for (String cv : cv_tissues) {
cv_tissues_final.add(cv);
if (cv.startsWith("TS-")) {
CvTerm term = terminologyservice.findCvTermByAccession(cv);
if (null == term) {
// there is nothing more we can add to indexed fields (ancestors, synonyms), so let's return
logger.error(entry.getUniqueName() + " - term with accession |" + cv + "| not found with findCvTermByAccession()");
continue;
}
List<String> ancestors = terminologyservice.getAllAncestorsAccession(term.getAccession());
if (ancestors != null)
for (String ancestorac : ancestors) {
cv_tissues_final.add(ancestorac);
cv_tissues_final.add(terminologyservice.findCvTermByAccession(ancestorac).getName());
}
List<String> synonyms = term.getSynonyms();
if (synonyms != null)
for (String synonym : synonyms) cv_tissues_final.add(synonym);
}
}
for (String cv : cv_tissues_final) {
addField(Fields.EXPRESSION, cv.trim());
}
}
use of org.nextprot.api.core.domain.CvTerm in project nextprot-api by calipho-sib.
the class AnnotationFieldBuilder method handleAnnotationTerm.
protected void handleAnnotationTerm(Annotation currannot, Entry entry) {
String quality = currannot.getQualityQualifier();
String cvac = currannot.getCvTermAccessionCode();
if (cvac != null && !cvac.isEmpty()) {
if (cvac.startsWith("GO:")) {
boolean allnegative = true;
// We don't index negative annotations
for (AnnotationEvidence ev : currannot.getEvidences()) allnegative = allnegative & ev.isNegativeEvidence();
if (allnegative == true) {
return;
}
}
if (!this.isGold() || quality.equals("GOLD")) {
addField(Fields.ANNOTATIONS, cvac);
addField(Fields.ANNOTATIONS, currannot.getCvTermName());
CvTerm term = this.terminologyservice.findCvTermByAccession(cvac);
if (null == term) {
// there is nothing more we can add to indexed fields (ancestors, synonyms), so let's return
logger.error(entry.getUniqueName() + " - term with accession |" + cvac + "| not found with findCvTermByAccession()");
return;
}
List<String> synonyms = this.terminologyservice.findCvTermByAccession(cvac).getSynonyms();
if (synonyms != null) {
String allsynonyms = "";
for (String synonym : synonyms) {
if (!allsynonyms.isEmpty())
allsynonyms += " | ";
allsynonyms += synonym.trim();
}
addField(Fields.ANNOTATIONS, StringUtils.getSortedValueFromPipeSeparatedField(allsynonyms));
}
List<String> ancestors = terminologyservice.getAllAncestorsAccession(cvac);
String allancestors = "";
for (String ancestor : ancestors) {
if (!allancestors.isEmpty())
allancestors += " | ";
// adding Ac
allancestors += ancestor + " | ";
String ancestorname = this.terminologyservice.findCvTermByAccession(ancestor).getName();
allancestors += ancestorname;
}
if (allancestors.endsWith(" domain"))
// don't index generic top
allancestors = "domain";
else // level ancestors
if (allancestors.endsWith("zinc finger region"))
// don't index
allancestors = "zinc finger region";
else // ancestors
if (allancestors.endsWith("repeat"))
// don't index generic top
allancestors = "repeat";
// level ancestors
if (allancestors.length() > 1)
addField(Fields.ANNOTATIONS, StringUtils.getSortedValueFromPipeSeparatedField(allancestors));
}
}
}
use of org.nextprot.api.core.domain.CvTerm in project nextprot-api by calipho-sib.
the class StatementAnnotationBuilder method buildAnnotationEvidences.
protected List<AnnotationEvidence> buildAnnotationEvidences(List<Statement> Statements) {
// Ensures there is no repeated evidence!
Set<AnnotationEvidence> evidencesSet = Statements.stream().map(s -> {
AnnotationEvidence evidence = new AnnotationEvidence();
// TODO to be checked with Amos and Lydie
evidence.setResourceType("database");
evidence.setResourceAssociationType("evidence");
evidence.setQualityQualifier(s.getValue(StatementField.EVIDENCE_QUALITY));
evidence.setResourceId(findPublicationId(s));
AnnotationEvidenceProperty evidenceProperty = addPropertyIfPresent(s.getValue(StatementField.EVIDENCE_INTENSITY), "intensity");
AnnotationEvidenceProperty expContextSubjectProteinOrigin = addPropertyIfPresent(s.getValue(StatementField.ANNOTATION_SUBJECT_SPECIES), "subject-protein-origin");
AnnotationEvidenceProperty expContextObjectProteinOrigin = addPropertyIfPresent(s.getValue(StatementField.ANNOTATION_OBJECT_SPECIES), "object-protein-origin");
// Set properties which are not null
evidence.setProperties(Arrays.asList(evidenceProperty, expContextSubjectProteinOrigin, expContextObjectProteinOrigin).stream().filter(p -> p != null).collect(Collectors.toList()));
String statementEvidenceCode = s.getValue(StatementField.EVIDENCE_CODE);
evidence.setEvidenceCodeAC(statementEvidenceCode);
evidence.setAssignedBy(s.getValue(StatementField.ASSIGNED_BY));
evidence.setAssignmentMethod(s.getValue(StatementField.ASSIGMENT_METHOD));
evidence.setResourceType(s.getValue(StatementField.RESOURCE_TYPE));
evidence.setEvidenceCodeOntology("evidence-code-ontology-cv");
evidence.setNegativeEvidence("true".equalsIgnoreCase(s.getValue(StatementField.IS_NEGATIVE)));
if (statementEvidenceCode != null) {
CvTerm term = terminologyService.findCvTermByAccession(statementEvidenceCode);
if (term != null) {
evidence.setEvidenceCodeName(term.getName());
} else {
throw new NextProtException("Not found " + statementEvidenceCode + " in the database");
}
}
evidence.setNote(s.getValue(StatementField.EVIDENCE_NOTE));
return evidence;
}).collect(Collectors.toSet());
// Ensures there is no repeated evidence!
evidencesSet.forEach(e -> {
long generatedEvidenceId = IdentifierOffset.EVIDENCE_ID_COUNTER_FOR_STATEMENTS.incrementAndGet();
e.setEvidenceId(generatedEvidenceId);
});
List<AnnotationEvidence> evidencesFiltered = evidencesSet.stream().filter(e -> e.getResourceId() != -2).collect(Collectors.toList());
if (evidencesFiltered.size() < evidencesSet.size()) {
int total = evidencesSet.size();
int removed = total - evidencesFiltered.size();
LOGGER.debug("Removed " + removed + " evidence because no resource id from a total of " + total);
}
return new ArrayList<>(evidencesFiltered);
}
Aggregations