use of org.molgenis.data.vcf.VcfRepository in project molgenis by molgenis.
the class EffectStructureConverterTest method testCreateVariantEffectStructure.
@Test
public void testCreateVariantEffectStructure() {
VcfRepository vcfRepository = mock(VcfRepository.class);
when(vcfRepository.getEntityType()).thenReturn(vcfInputEntityType);
when(vcfRepository.spliterator()).thenReturn(entities.spliterator());
List<Entity> resultEntities = effectStructureConverter.createVariantEffectStructure(EFFECT, Collections.emptyList(), vcfRepository).collect(Collectors.toList());
assertEquals(resultEntities.size(), 3);
assertEquals(resultEntities.get(0).get("Alt_Allele"), "A");
assertEquals(resultEntities.get(0).get("Gene_Name"), "GEN1");
assertEquals(resultEntities.get(0).get("Annotation"), "missense_variant");
assertEquals(resultEntities.get(0).get("Putative_impact"), "MODERATE");
assertEquals(resultEntities.get(0).get("Gene_ID"), "GEN1");
assertEquals(resultEntities.get(0).get("Feature_type"), "transcript");
assertEquals(resultEntities.get(0).get("Feature_ID"), "NM_123456.7");
assertEquals(resultEntities.get(0).get("Transcript_biotype"), "Coding");
assertEquals(resultEntities.get(0).get("Rank_total"), "4/4");
assertEquals(resultEntities.get(0).get("HGVS_c"), "c.1234C>T");
assertEquals(resultEntities.get(0).get("HGVS_p"), "p.Thr123Met");
assertEquals(resultEntities.get(0).get("cDNA_position"), "1234/5678");
assertEquals(resultEntities.get(0).get("CDS_position"), "2345/6789");
assertEquals(resultEntities.get(0).get("Protein_position"), "111/222");
assertEquals(resultEntities.get(0).get("Distance_to_feature"), "");
assertEquals(resultEntities.get(0).get("Errors"), "");
assertEquals(resultEntities.get(0).get("VARIANT").toString(), variant1.toString());
assertEquals(resultEntities.get(1).get("Alt_Allele"), "A");
assertEquals(resultEntities.get(1).get("Gene_Name"), "GEN1");
assertEquals(resultEntities.get(1).get("Annotation"), "missense_variant");
assertEquals(resultEntities.get(1).get("Putative_impact"), "MODERATE");
assertEquals(resultEntities.get(1).get("Gene_ID"), "GEN1");
assertEquals(resultEntities.get(1).get("Feature_type"), "transcript");
assertEquals(resultEntities.get(1).get("Feature_ID"), "NM_123456.7");
assertEquals(resultEntities.get(1).get("Transcript_biotype"), "Coding");
assertEquals(resultEntities.get(1).get("Rank_total"), "4/4");
assertEquals(resultEntities.get(1).get("HGVS_c"), "c.1234C>T");
assertEquals(resultEntities.get(1).get("HGVS_p"), "p.Thr123Met");
assertEquals(resultEntities.get(1).get("cDNA_position"), "1234/5678");
assertEquals(resultEntities.get(1).get("CDS_position"), "2345/6789");
assertEquals(resultEntities.get(1).get("Protein_position"), "111/222");
assertEquals(resultEntities.get(1).get("Distance_to_feature"), "");
assertEquals(resultEntities.get(1).get("Errors"), "");
assertEquals(resultEntities.get(1).get("VARIANT").toString(), variant2.toString());
assertEquals(resultEntities.get(2).get("Alt_Allele"), "A");
assertEquals(resultEntities.get(2).get("Gene_Name"), "GEN2");
assertEquals(resultEntities.get(2).get("Annotation"), "missense_variant");
assertEquals(resultEntities.get(2).get("Putative_impact"), "MODERATE");
assertEquals(resultEntities.get(2).get("Gene_ID"), "GEN2");
assertEquals(resultEntities.get(2).get("Feature_type"), "transcript");
assertEquals(resultEntities.get(2).get("Feature_ID"), "NM_123456.7");
assertEquals(resultEntities.get(2).get("Transcript_biotype"), "Coding");
assertEquals(resultEntities.get(2).get("Rank_total"), "4/4");
assertEquals(resultEntities.get(2).get("HGVS_c"), "c.1234C>T");
assertEquals(resultEntities.get(2).get("HGVS_p"), "p.Thr123Met");
assertEquals(resultEntities.get(2).get("cDNA_position"), "1234/5678");
assertEquals(resultEntities.get(2).get("CDS_position"), "2345/6789");
assertEquals(resultEntities.get(2).get("Protein_position"), "111/222");
assertEquals(resultEntities.get(2).get("Distance_to_feature"), "");
assertEquals(resultEntities.get(2).get("Errors"), "");
assertEquals(resultEntities.get(2).get("VARIANT").toString(), variant2.toString());
}
use of org.molgenis.data.vcf.VcfRepository in project molgenis by molgenis.
the class SnpEffRunner method getSnpEffects.
@SuppressWarnings("resource")
public Iterator<Entity> getSnpEffects(Iterator<Entity> source, final File inputVcf) {
try {
if (!source.hasNext())
return Collections.<Entity>emptyList().iterator();
// get meta data by peeking at the first entity (work-around for issue #4701)
PeekingIterator<Entity> peekingSourceIterator = Iterators.peekingIterator(source);
EntityType sourceEMD = peekingSourceIterator.peek().getEntityType();
List<String> params = Arrays.asList("-Xmx2g", getSnpEffPath(), "hg19", "-noStats", "-noLog", "-lof", "-canon", "-ud", "0", "-spliceSiteSize", "5");
File outputVcf = jarRunner.runJar(NAME, params, inputVcf);
VcfRepository repo = new VcfRepository(outputVcf, "SNPEFF_OUTPUT_VCF_" + inputVcf.getName(), vcfAttributes, entityTypeFactory, attributeFactory);
PeekingIterator<Entity> snpEffResultIterator = peekingIterator(repo.iterator());
return new Iterator<Entity>() {
final LinkedList<Entity> effects = Lists.newLinkedList();
@Override
public boolean hasNext() {
return (peekingSourceIterator.hasNext() || !effects.isEmpty());
}
@Override
public Entity next() {
if (effects.isEmpty()) {
// go to next source entity and get effects
Entity sourceEntity = peekingSourceIterator.next();
String chromosome = sourceEntity.getString(VcfAttributes.CHROM);
Integer position = sourceEntity.getInt(VcfAttributes.POS);
if (chromosome != null && position != null) {
Entity snpEffEntity = getSnpEffEntity(snpEffResultIterator, chromosome, position);
if (snpEffEntity != null) {
effects.addAll(getSnpEffectsFromSnpEffEntity(sourceEntity, snpEffEntity, getTargetEntityType(sourceEMD)));
} else {
effects.add(getEmptyEffectsEntity(sourceEntity, getTargetEntityType(sourceEMD)));
}
} else {
effects.add(getEmptyEffectsEntity(sourceEntity, getTargetEntityType(sourceEMD)));
}
}
return effects.removeFirst();
}
};
} catch (IOException e) {
throw new UncheckedIOException(e);
} catch (InterruptedException e) {
throw new MolgenisDataException("Exception running SnpEff", e);
}
}
use of org.molgenis.data.vcf.VcfRepository in project molgenis by molgenis.
the class CmdLineAnnotatorUtils method annotate.
/**
* Adds a new compound attribute to an existing CrudRepository
*
* @param annotator the annotator to be runned
* @param vcfAttributes utility class for vcf metadata
* @param entityTypeFactory factory for molgenis entityType
* @param attributeFactory factory for molgenis entityType
* @param effectStructureConverter utility class for converting a vcfRepo from and to the molgenis entity structure for "effects" annotations
* @param inputVcfFile the vcf file to be annotated
* @param outputVCFFile the resulting, annotated vcf file
* @param attributesToInclude the attributes of the annotator that should be written to the result
* @param update boolean indicating if values already present for the annotator attributes should be updated(true) or overwritten (false)
* @return the path of the result vcf file
*/
public static String annotate(RepositoryAnnotator annotator, VcfAttributes vcfAttributes, EntityTypeFactory entityTypeFactory, AttributeFactory attributeFactory, EffectStructureConverter effectStructureConverter, File inputVcfFile, File outputVCFFile, List<String> attributesToInclude, boolean update) throws IOException, MolgenisInvalidFormatException {
try (BufferedWriter outputVCFWriter = createBufferedWriter(outputVCFFile);
VcfRepository vcfRepo = new VcfRepository(inputVcfFile, inputVcfFile.getName(), vcfAttributes, entityTypeFactory, attributeFactory)) {
List<Attribute> outputMetaData = getOutputAttributeMetadatasForAnnotator(annotator, entityTypeFactory, attributeFactory, attributesToInclude, vcfRepo);
VcfWriterUtils.writeVcfHeader(inputVcfFile, outputVCFWriter, VcfUtils.getAtomicAttributesFromList(outputMetaData), attributesToInclude);
Iterable<Entity> entitiesToAnnotate = addAnnotatorMetaDataToRepository(annotator, attributeFactory, effectStructureConverter, vcfRepo);
Iterator<Entity> annotatedRecords = annotateRepo(annotator, effectStructureConverter, update, entitiesToAnnotate);
writeAnnotationResultToVcfFile(attributesToInclude, outputVCFWriter, outputMetaData, annotatedRecords);
}
return outputVCFFile.getAbsolutePath();
}
Aggregations