Search in sources :

Example 1 with VcfRepository

use of org.molgenis.data.vcf.VcfRepository in project molgenis by molgenis.

the class EffectStructureConverterTest method testCreateVariantEffectStructure.

@Test
public void testCreateVariantEffectStructure() {
    VcfRepository vcfRepository = mock(VcfRepository.class);
    when(vcfRepository.getEntityType()).thenReturn(vcfInputEntityType);
    when(vcfRepository.spliterator()).thenReturn(entities.spliterator());
    List<Entity> resultEntities = effectStructureConverter.createVariantEffectStructure(EFFECT, Collections.emptyList(), vcfRepository).collect(Collectors.toList());
    assertEquals(resultEntities.size(), 3);
    assertEquals(resultEntities.get(0).get("Alt_Allele"), "A");
    assertEquals(resultEntities.get(0).get("Gene_Name"), "GEN1");
    assertEquals(resultEntities.get(0).get("Annotation"), "missense_variant");
    assertEquals(resultEntities.get(0).get("Putative_impact"), "MODERATE");
    assertEquals(resultEntities.get(0).get("Gene_ID"), "GEN1");
    assertEquals(resultEntities.get(0).get("Feature_type"), "transcript");
    assertEquals(resultEntities.get(0).get("Feature_ID"), "NM_123456.7");
    assertEquals(resultEntities.get(0).get("Transcript_biotype"), "Coding");
    assertEquals(resultEntities.get(0).get("Rank_total"), "4/4");
    assertEquals(resultEntities.get(0).get("HGVS_c"), "c.1234C>T");
    assertEquals(resultEntities.get(0).get("HGVS_p"), "p.Thr123Met");
    assertEquals(resultEntities.get(0).get("cDNA_position"), "1234/5678");
    assertEquals(resultEntities.get(0).get("CDS_position"), "2345/6789");
    assertEquals(resultEntities.get(0).get("Protein_position"), "111/222");
    assertEquals(resultEntities.get(0).get("Distance_to_feature"), "");
    assertEquals(resultEntities.get(0).get("Errors"), "");
    assertEquals(resultEntities.get(0).get("VARIANT").toString(), variant1.toString());
    assertEquals(resultEntities.get(1).get("Alt_Allele"), "A");
    assertEquals(resultEntities.get(1).get("Gene_Name"), "GEN1");
    assertEquals(resultEntities.get(1).get("Annotation"), "missense_variant");
    assertEquals(resultEntities.get(1).get("Putative_impact"), "MODERATE");
    assertEquals(resultEntities.get(1).get("Gene_ID"), "GEN1");
    assertEquals(resultEntities.get(1).get("Feature_type"), "transcript");
    assertEquals(resultEntities.get(1).get("Feature_ID"), "NM_123456.7");
    assertEquals(resultEntities.get(1).get("Transcript_biotype"), "Coding");
    assertEquals(resultEntities.get(1).get("Rank_total"), "4/4");
    assertEquals(resultEntities.get(1).get("HGVS_c"), "c.1234C>T");
    assertEquals(resultEntities.get(1).get("HGVS_p"), "p.Thr123Met");
    assertEquals(resultEntities.get(1).get("cDNA_position"), "1234/5678");
    assertEquals(resultEntities.get(1).get("CDS_position"), "2345/6789");
    assertEquals(resultEntities.get(1).get("Protein_position"), "111/222");
    assertEquals(resultEntities.get(1).get("Distance_to_feature"), "");
    assertEquals(resultEntities.get(1).get("Errors"), "");
    assertEquals(resultEntities.get(1).get("VARIANT").toString(), variant2.toString());
    assertEquals(resultEntities.get(2).get("Alt_Allele"), "A");
    assertEquals(resultEntities.get(2).get("Gene_Name"), "GEN2");
    assertEquals(resultEntities.get(2).get("Annotation"), "missense_variant");
    assertEquals(resultEntities.get(2).get("Putative_impact"), "MODERATE");
    assertEquals(resultEntities.get(2).get("Gene_ID"), "GEN2");
    assertEquals(resultEntities.get(2).get("Feature_type"), "transcript");
    assertEquals(resultEntities.get(2).get("Feature_ID"), "NM_123456.7");
    assertEquals(resultEntities.get(2).get("Transcript_biotype"), "Coding");
    assertEquals(resultEntities.get(2).get("Rank_total"), "4/4");
    assertEquals(resultEntities.get(2).get("HGVS_c"), "c.1234C>T");
    assertEquals(resultEntities.get(2).get("HGVS_p"), "p.Thr123Met");
    assertEquals(resultEntities.get(2).get("cDNA_position"), "1234/5678");
    assertEquals(resultEntities.get(2).get("CDS_position"), "2345/6789");
    assertEquals(resultEntities.get(2).get("Protein_position"), "111/222");
    assertEquals(resultEntities.get(2).get("Distance_to_feature"), "");
    assertEquals(resultEntities.get(2).get("Errors"), "");
    assertEquals(resultEntities.get(2).get("VARIANT").toString(), variant2.toString());
}
Also used : DynamicEntity(org.molgenis.data.support.DynamicEntity) Entity(org.molgenis.data.Entity) VcfRepository(org.molgenis.data.vcf.VcfRepository) Test(org.testng.annotations.Test) AbstractMolgenisSpringTest(org.molgenis.data.AbstractMolgenisSpringTest)

Example 2 with VcfRepository

use of org.molgenis.data.vcf.VcfRepository in project molgenis by molgenis.

the class SnpEffRunner method getSnpEffects.

@SuppressWarnings("resource")
public Iterator<Entity> getSnpEffects(Iterator<Entity> source, final File inputVcf) {
    try {
        if (!source.hasNext())
            return Collections.<Entity>emptyList().iterator();
        // get meta data by peeking at the first entity (work-around for issue #4701)
        PeekingIterator<Entity> peekingSourceIterator = Iterators.peekingIterator(source);
        EntityType sourceEMD = peekingSourceIterator.peek().getEntityType();
        List<String> params = Arrays.asList("-Xmx2g", getSnpEffPath(), "hg19", "-noStats", "-noLog", "-lof", "-canon", "-ud", "0", "-spliceSiteSize", "5");
        File outputVcf = jarRunner.runJar(NAME, params, inputVcf);
        VcfRepository repo = new VcfRepository(outputVcf, "SNPEFF_OUTPUT_VCF_" + inputVcf.getName(), vcfAttributes, entityTypeFactory, attributeFactory);
        PeekingIterator<Entity> snpEffResultIterator = peekingIterator(repo.iterator());
        return new Iterator<Entity>() {

            final LinkedList<Entity> effects = Lists.newLinkedList();

            @Override
            public boolean hasNext() {
                return (peekingSourceIterator.hasNext() || !effects.isEmpty());
            }

            @Override
            public Entity next() {
                if (effects.isEmpty()) {
                    // go to next source entity and get effects
                    Entity sourceEntity = peekingSourceIterator.next();
                    String chromosome = sourceEntity.getString(VcfAttributes.CHROM);
                    Integer position = sourceEntity.getInt(VcfAttributes.POS);
                    if (chromosome != null && position != null) {
                        Entity snpEffEntity = getSnpEffEntity(snpEffResultIterator, chromosome, position);
                        if (snpEffEntity != null) {
                            effects.addAll(getSnpEffectsFromSnpEffEntity(sourceEntity, snpEffEntity, getTargetEntityType(sourceEMD)));
                        } else {
                            effects.add(getEmptyEffectsEntity(sourceEntity, getTargetEntityType(sourceEMD)));
                        }
                    } else {
                        effects.add(getEmptyEffectsEntity(sourceEntity, getTargetEntityType(sourceEMD)));
                    }
                }
                return effects.removeFirst();
            }
        };
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    } catch (InterruptedException e) {
        throw new MolgenisDataException("Exception running SnpEff", e);
    }
}
Also used : DynamicEntity(org.molgenis.data.support.DynamicEntity) Entity(org.molgenis.data.Entity) VcfRepository(org.molgenis.data.vcf.VcfRepository) EntityType(org.molgenis.data.meta.model.EntityType) MolgenisDataException(org.molgenis.data.MolgenisDataException) PeekingIterator(com.google.common.collect.PeekingIterator) Iterators.peekingIterator(com.google.common.collect.Iterators.peekingIterator) File.createTempFile(java.io.File.createTempFile)

Example 3 with VcfRepository

use of org.molgenis.data.vcf.VcfRepository in project molgenis by molgenis.

the class CmdLineAnnotatorUtils method annotate.

/**
 * Adds a new compound attribute to an existing CrudRepository
 *
 * @param annotator                the annotator to be runned
 * @param vcfAttributes            utility class for vcf metadata
 * @param entityTypeFactory        factory for molgenis entityType
 * @param attributeFactory         factory for molgenis entityType
 * @param effectStructureConverter utility class for converting a vcfRepo from and to the molgenis entity structure for "effects" annotations
 * @param inputVcfFile             the vcf file to be annotated
 * @param outputVCFFile            the resulting, annotated vcf file
 * @param attributesToInclude      the attributes of the annotator that should be written to the result
 * @param update                   boolean indicating if values already present for the annotator attributes should be updated(true) or overwritten (false)
 * @return the path of the result vcf file
 */
public static String annotate(RepositoryAnnotator annotator, VcfAttributes vcfAttributes, EntityTypeFactory entityTypeFactory, AttributeFactory attributeFactory, EffectStructureConverter effectStructureConverter, File inputVcfFile, File outputVCFFile, List<String> attributesToInclude, boolean update) throws IOException, MolgenisInvalidFormatException {
    try (BufferedWriter outputVCFWriter = createBufferedWriter(outputVCFFile);
        VcfRepository vcfRepo = new VcfRepository(inputVcfFile, inputVcfFile.getName(), vcfAttributes, entityTypeFactory, attributeFactory)) {
        List<Attribute> outputMetaData = getOutputAttributeMetadatasForAnnotator(annotator, entityTypeFactory, attributeFactory, attributesToInclude, vcfRepo);
        VcfWriterUtils.writeVcfHeader(inputVcfFile, outputVCFWriter, VcfUtils.getAtomicAttributesFromList(outputMetaData), attributesToInclude);
        Iterable<Entity> entitiesToAnnotate = addAnnotatorMetaDataToRepository(annotator, attributeFactory, effectStructureConverter, vcfRepo);
        Iterator<Entity> annotatedRecords = annotateRepo(annotator, effectStructureConverter, update, entitiesToAnnotate);
        writeAnnotationResultToVcfFile(attributesToInclude, outputVCFWriter, outputMetaData, annotatedRecords);
    }
    return outputVCFFile.getAbsolutePath();
}
Also used : Entity(org.molgenis.data.Entity) Attribute(org.molgenis.data.meta.model.Attribute) VcfRepository(org.molgenis.data.vcf.VcfRepository)

Aggregations

Entity (org.molgenis.data.Entity)3 VcfRepository (org.molgenis.data.vcf.VcfRepository)3 DynamicEntity (org.molgenis.data.support.DynamicEntity)2 Iterators.peekingIterator (com.google.common.collect.Iterators.peekingIterator)1 PeekingIterator (com.google.common.collect.PeekingIterator)1 File.createTempFile (java.io.File.createTempFile)1 AbstractMolgenisSpringTest (org.molgenis.data.AbstractMolgenisSpringTest)1 MolgenisDataException (org.molgenis.data.MolgenisDataException)1 Attribute (org.molgenis.data.meta.model.Attribute)1 EntityType (org.molgenis.data.meta.model.EntityType)1 Test (org.testng.annotations.Test)1