use of org.molgenis.data.meta.model.AttributeFactory in project molgenis by molgenis.
the class SortaCsvRepository method getEntityType.
public EntityType getEntityType() {
if (entityType == null) {
AttributeFactory attrMetaFactory = getApplicationContext().getBean(// FIXME do not use application context
AttributeFactory.class);
entityType = EntityType.newInstance(csvRepository.getEntityType(), DEEP_COPY_ATTRS, attrMetaFactory);
entityType.setId(entityTypeId);
entityType.setLabel(entityLabel);
// FIXME do not hardcode backend name
entityType.setBackend("PostgreSQL");
entityType.addAttribute(attrMetaFactory.create().setName(ALLOWED_IDENTIFIER).setNillable(false), ROLE_ID);
Attribute nameAttribute = entityType.getAttribute(SortaServiceImpl.DEFAULT_MATCHING_NAME_FIELD);
if (nameAttribute != null) {
nameAttribute.setLabelAttribute(true);
}
}
return entityType;
}
use of org.molgenis.data.meta.model.AttributeFactory in project molgenis by molgenis.
the class HPOAnnotator method init.
@Override
public void init() {
List<Attribute> attributes = createHpoOutputAttributes();
AnnotatorInfo info = AnnotatorInfo.create(AnnotatorInfo.Status.READY, AnnotatorInfo.Type.PHENOTYPE_ASSOCIATION, NAME, "The Human Phenotype Ontology (HPO) aims to provide a standardized vocabulary of phenotypic abnormalities encountered in human disease." + "Terms in the HPO describes a phenotypic abnormality, such as atrial septal defect.The HPO is currently being developed using the medical literature, Orphanet, DECIPHER, and OMIM. HPO currently contains approximately 11,000 terms and over 115,000 annotations to hereditary diseases." + "Please note that if SnpEff was used to annotate in order to add the gene symbols to the variants, than this annotator should be used on the result entity rather than the variant entity itself.", attributes);
EntityAnnotator entityAnnotator = new AbstractAnnotator(HPO_RESOURCE, info, geneNameQueryCreator, new HpoResultFilter(entityTypeFactory, this), dataService, resources, new SingleFileLocationCmdLineAnnotatorSettingsConfigurer(HPO_LOCATION, HPOAnnotatorSettings)) {
@Override
public List<Attribute> createAnnotatorAttributes(AttributeFactory attributeFactory) {
return createHpoOutputAttributes();
}
};
annotator.init(entityAnnotator);
}
use of org.molgenis.data.meta.model.AttributeFactory in project molgenis by molgenis.
the class VcfRepositoryTest method testGetEntityType.
// Regression test for https://github.com/molgenis/molgenis/issues/6528
@Test(expectedExceptions = MolgenisDataException.class, expectedExceptionsMessageRegExp = "Failed to read VCF Metadata from file; nested exception is java.io.IOException: error processing source")
public void testGetEntityType() throws IOException {
VcfReaderFactory vcfReaderFactory = mock(VcfReaderFactory.class);
VcfReader vcfReader = mock(VcfReader.class);
doThrow(new IOException("error processing source")).when(vcfReader).getVcfMeta();
when(vcfReaderFactory.get()).thenReturn(vcfReader);
String entityTypeId = "entityTypeId";
VcfAttributes vcfAttributes = mock(VcfAttributes.class);
EntityTypeFactory entityTypeFactory = mock(EntityTypeFactory.class);
AttributeFactory attrMetaFactory = mock(AttributeFactory.class);
VcfRepository vcfRepository = new VcfRepository(vcfReaderFactory, entityTypeId, vcfAttributes, entityTypeFactory, attrMetaFactory);
vcfRepository.getEntityType();
}
use of org.molgenis.data.meta.model.AttributeFactory in project molgenis by molgenis.
the class CaddAnnotator method init.
@Override
public void init() {
List<Attribute> attributes = createCaddAnnotatorAttributes();
AnnotatorInfo caddInfo = AnnotatorInfo.create(AnnotatorInfo.Status.READY, AnnotatorInfo.Type.PATHOGENICITY_ESTIMATE, NAME, "CADD is a tool for scoring the deleteriousness of single nucleotide variants as well as insertion/deletions variants in the human genome.\n" + "While many variant annotation and scoring utils are around, most annotations tend to exploit a single information type (e.g. conservation) " + "and/or are restricted in scope (e.g. to missense changes). " + "Thus, a broadly applicable metric that objectively weights and integrates diverse information is needed. " + "Combined Annotation Dependent Depletion (CADD) is a framework that integrates multiple " + "annotations into one metric by contrasting variants that survived natural selection with simulated mutations.\n" + "C-scores strongly correlate with allelic diversity, pathogenicity of both coding and non-coding variants, and experimentally measured " + "regulatory effects, and also highly rank causal variants within " + "individual genome sequences. Finally, C-scores of complex trait-associated variants from genome-wide association studies (GWAS) are " + "significantly higher than matched controls and correlate with study sample size, likely reflecting the increased accuracy of larger GWAS.\n" + "CADD can quantitatively prioritize functional, deleterious, and disease causal variants across a wide range of functional categories, " + "effect sizes and genetic architectures and can be used prioritize " + "causal variation in both research and clinical settings. (source: http://cadd.gs.washington.edu/info)", attributes);
EntityAnnotator entityAnnotator = new AbstractAnnotator(CADD_TABIX_RESOURCE, caddInfo, new LocusQueryCreator(vcfAttributes), new MultiAllelicResultFilter(attributes, true, vcfAttributes), dataService, resources, new SingleFileLocationCmdLineAnnotatorSettingsConfigurer(CaddAnnotatorSettings.Meta.CADD_LOCATION, caddAnnotatorSettings)) {
@Override
public List<Attribute> createAnnotatorAttributes(AttributeFactory attributeFactory) {
return createCaddAnnotatorAttributes();
}
};
annotator.init(entityAnnotator);
}
use of org.molgenis.data.meta.model.AttributeFactory in project molgenis by molgenis.
the class FitConAnnotator method init.
@Override
public void init() {
List<Attribute> attributes = createFitconOutputAttributes();
AnnotatorInfo fitconInfo = AnnotatorInfo.create(AnnotatorInfo.Status.READY, AnnotatorInfo.Type.EFFECT_PREDICTION, NAME, "Summary: Annotating genetic variants, especially non-coding variants, " + "for the purpose of identifying pathogenic variants remains a challenge. " + "Combined annotation-dependent depletion (CADD) is an al- gorithm designed " + "to annotate both coding and non-coding variants, and has been shown to " + "outper- form other annotation algorithms. CADD trains a linear kernel support" + " vector machine (SVM) to dif- ferentiate evolutionarily derived, likely benign," + " alleles from simulated, likely deleterious, variants. However, SVMs cannot " + "capture non-linear relationships among the features, which can limit per- formance. " + "To address this issue, we have developed FITCON. FITCON uses the same feature set and " + "training data as CADD to train a deep neural network (DNN). DNNs can capture non-linear" + " relation- ships among features and are better suited than SVMs for problems with a " + "large number of samples and features. We exploit Compute Unified Device Architecture-compatible" + " graphics processing units and deep learning techniques such as dropout and momentum training to" + " accelerate the DNN train- ing. FITCON achieves about a 19%relative reduction in the error rate and" + " about a 14%relative increase in the area under the curve (AUC) metric over CADD’s SVMmethodology." + " All data and source code are available at https://cbcl.ics.uci.edu/ public_data/FITCON/. Contact:", attributes);
EntityAnnotator entityAnnotator = new AbstractAnnotator(FITCON_TABIX_RESOURCE, fitconInfo, new LocusQueryCreator(vcfAttributes), new MultiAllelicResultFilter(attributes, vcfAttributes), dataService, resources, new SingleFileLocationCmdLineAnnotatorSettingsConfigurer(FITCON_LOCATION, fitConAnnotatorSettings)) {
@Override
public List<Attribute> createAnnotatorAttributes(AttributeFactory attributeFactory) {
return createFitconOutputAttributes();
}
};
annotator.init(entityAnnotator);
}
Aggregations