use of org.molgenis.data.MolgenisDataException in project molgenis by molgenis.
the class GoNLAnnotator method init.
@Override
public void init() {
List<Attribute> attributes = createGoNlOutputAttributes();
AnnotatorInfo thousandGenomeInfo = AnnotatorInfo.create(AnnotatorInfo.Status.READY, AnnotatorInfo.Type.POPULATION_REFERENCE, NAME, "What genetic variation is to be found in the Dutch indigenous population? " + "Detailed knowledge about this is not only interesting in itself, " + "it also helps to extract useful biomedical information from Dutch biobanks. " + "The Dutch biobank collaboration BBMRI-NL has initiated the extensive Rainbow Project “Genome of the Netherlands” (GoNL) " + "because it offers unique opportunities for science and for the development of new treatments and diagnostic techniques. " + "A close-up look at the DNA of 750 Dutch people-250 trio’s of two parents and an adult child-plus a " + "global genetic profile of large numbers of Dutch will disclose a wealth of new information, new insights, " + "and possible applications.", attributes);
LocusQueryCreator locusQueryCreator = new LocusQueryCreator(vcfAttributes);
EntityAnnotator entityAnnotator = new QueryAnnotatorImpl(GONL_MULTI_FILE_RESOURCE, thousandGenomeInfo, locusQueryCreator, dataService, resources, (annotationSourceFileName) -> {
goNLAnnotatorSettings.set(ROOT_DIRECTORY, annotationSourceFileName);
goNLAnnotatorSettings.set(FILEPATTERN, "gonl.chr%s.snps_indels.r5.vcf.gz");
goNLAnnotatorSettings.set(OVERRIDE_CHROMOSOME_FILES, "X:gonl.chrX.release4.gtc.vcf.gz");
goNLAnnotatorSettings.set(CHROMOSOMES, "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X");
}) {
@Override
public List<Attribute> createAnnotatorAttributes(AttributeFactory attributeFactory) {
return createGoNlOutputAttributes();
}
@Override
protected void processQueryResults(Entity entity, Iterable<Entity> annotationSourceEntities, boolean updateMode) {
if (updateMode) {
throw new MolgenisDataException("This annotator/filter does not support updating of values");
}
List<Entity> refMatches = determineRefMatches(entity, annotationSourceEntities);
setGoNLFrequencies(entity, refMatches);
}
};
annotator.init(entityAnnotator);
}
use of org.molgenis.data.MolgenisDataException in project molgenis by molgenis.
the class HpoResultFilter method filterResults.
@Override
public Optional<Entity> filterResults(Iterable<Entity> results, Entity annotatedEntity, boolean updateMode) {
if (updateMode == true) {
throw new MolgenisDataException("This annotator/filter does not support updating of values");
}
StringBuilder ids = new StringBuilder();
StringBuilder terms = new StringBuilder();
for (Entity hpoEntity : results) {
if (ids.length() > 0) {
ids.append('/');
terms.append('/');
}
String hpoId = hpoEntity.getString(HPO_ID_COL_NAME);
String hpoTerm = hpoEntity.getString(HPO_TERM_COL_NAME);
ids.append(hpoId);
terms.append(hpoTerm);
}
EntityType emd = entityTypeFactory.create(HPOAnnotator.NAME);
emd.addAttributes(Arrays.asList(hpoAnnotator.getIdsAttr(), hpoAnnotator.getTermsAttr()));
Entity aggregated = new DynamicEntity(emd);
aggregated.set(HPO_IDS, ids.toString());
aggregated.set(HPO_TERMS, terms.toString());
return ids.length() == 0 ? Optional.absent() : Optional.of(aggregated);
}
use of org.molgenis.data.MolgenisDataException in project molgenis by molgenis.
the class MultiAllelicResultFilter method merge.
/**
* Combine ALT information per reference allele (in VCF there is only 1 reference by letting ALT vary, but that
* might not always be the case)
* <p>
* So we want to support this hypothetical example:
* 3 300 G A 0.2|23.1
* 3 300 G T -2.4|0.123
* 3 300 G X -0.002|2.3
* 3 300 G C 0.5|14.5
* 3 300 GC A 0.2|23.1
* 3 300 GC T -2.4|0.123
* 3 300 C GX -0.002|2.3
* 3 300 C GC 0.5|14.5
* <p>
* <p>
* So we want to support this hypothetical example: 3 300 G A 0.2|23.1 3 300 G T -2.4|0.123 3 300 G X -0.002|2.3 3
* 300 G C 0.5|14.5 3 300 GC A 0.2|23.1 3 300 GC T -2.4|0.123 3 300 C GX -0.002|2.3 3 300 C GC 0.5|14.5
* <p>
* and it should become:
* <p>
* 3 300 G A,T,X,C 0.2|23.1,-2.4|0.123,-0.002|2.3,0.5|14.5 3 300 GC A,T 0.2|23.1,-2.4|0.123 3 300 C GX,GC
* -0.002|2.3,0.5|14.5
* <p>
* <p>
* 3 300 G A,T,X,C 0.2|23.1,-2.4|0.123,-0.002|2.3,0.5|14.5
* 3 300 GC A,T 0.2|23.1,-2.4|0.123
* 3 300 C GX,GC -0.002|2.3,0.5|14.5
* <p>
* so that the multi-allelic filter can then find back the appropriate values as if it were a multi-allelic VCF line
*/
public Iterable<Entity> merge(Iterable<Entity> resourceEntities) {
ArrayList<Entity> resourceEntitiesMerged = new ArrayList<>();
PeekingIterator<Entity> resourceEntitiesIterator = Iterators.peekingIterator(resourceEntities.iterator());
if (!resourceEntitiesIterator.hasNext()) {
return resourceEntitiesMerged;
}
Location location = Location.create(resourceEntitiesIterator.peek());
// collect entities to be merged by ref
Multimap<String, Entity> refToMergedEntity = LinkedListMultimap.create();
while (resourceEntitiesIterator.hasNext()) {
Entity resourceEntity = resourceEntitiesIterator.next();
// verify if all results have the same chrom & pos
Location thisLoc = Location.create(resourceEntity);
// at least chrom and pos have to be the same, ref may be different
if (!location.equals(thisLoc)) {
throw new MolgenisDataException("Mismatch in location! " + location + " vs " + thisLoc);
}
// add to map by ref, so we get [ref -> entities to be merged into one]
refToMergedEntity.put(resourceEntity.getString(REF), resourceEntity);
}
// now iterate over map with refs and merge entities per ref
for (String refKey : refToMergedEntity.keySet()) {
boolean first = true;
Entity mergeWithMe = null;
for (Entity entityToBeMerged : refToMergedEntity.get(refKey)) {
if (first) {
// merge all following entities with the first one
mergeWithMe = entityToBeMerged;
first = false;
} else {
// concatenate alleles
mergeWithMe.set(ALT, mergeWithMe.get(ALT).toString() + "," + entityToBeMerged.get(ALT).toString());
// concatenate allele specific attributes
for (Attribute alleleSpecificAttributes : attributes) {
String attrName = alleleSpecificAttributes.getName();
mergeWithMe.set(attrName, mergeWithMe.get(attrName).toString() + "," + entityToBeMerged.get(attrName).toString());
}
}
}
resourceEntitiesMerged.add(mergeWithMe);
}
return resourceEntitiesMerged;
}
use of org.molgenis.data.MolgenisDataException in project molgenis by molgenis.
the class GeneCsvRepository method getIndex.
private Map<Object, Entity> getIndex() {
if (index.isEmpty()) {
forEach(e -> {
Object key = e.get(sourceAttributeName);
if (key == null)
throw new MolgenisDataException("Missing value for attribute [" + sourceAttributeName + "] in entity [" + e + "]");
index.put(key, e);
});
}
return index;
}
use of org.molgenis.data.MolgenisDataException in project molgenis by molgenis.
the class AmazonBucketIngester method ingest.
public FileMeta ingest(String jobExecutionID, String targetEntityTypeName, String bucket, String key, String extension, String accessKey, String secretKey, String region, boolean isExpression, Progress progress) {
FileMeta fileMeta;
try {
progress.setProgressMax(3);
progress.progress(0, "Connection to Amazon Bucket with accessKey '" + accessKey + "'");
AmazonS3 client = amazonBucketClient.getClient(accessKey, secretKey, region);
progress.progress(1, "downloading...");
File file = amazonBucketClient.downloadFile(client, fileStore, jobExecutionID, bucket, key, extension, isExpression, targetEntityTypeName);
if (targetEntityTypeName != null && ExcelUtils.isExcelFile(file.getName())) {
if (ExcelUtils.getNumberOfSheets(file) == 1) {
ExcelUtils.renameSheet(targetEntityTypeName, file, 0);
} else {
throw new MolgenisDataException("Amazon Bucket imports to a specified entityType are only possible with CSV files or Excel files with one sheet");
}
}
progress.progress(2, "Importing...");
ImportService importService = importServiceFactory.getImportService(file.getName());
File renamed = new File(String.format("%s%s%s.%s", file.getParent(), File.separatorChar, targetEntityTypeName, extension));
Files.copy(file.toPath(), renamed.toPath(), StandardCopyOption.REPLACE_EXISTING);
RepositoryCollection repositoryCollection = fileRepositoryCollectionFactory.createFileRepositoryCollection(renamed);
EntityImportReport report = importService.doImport(repositoryCollection, DatabaseAction.ADD_UPDATE_EXISTING, "base");
progress.status("Download and import from Amazon Bucket done.");
progress.progress(3, "Successfully imported " + report.getNrImportedEntitiesMap().keySet().toString() + " entities.");
fileMeta = createFileMeta(jobExecutionID, file);
} catch (Exception e) {
throw new MolgenisDataException(e);
}
return fileMeta;
}
Aggregations