Search in sources :

Example 1 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project gatk by broadinstitute.

the class CachingIndexedFastaSequenceFileUnitTest method testMixedCasesInExample.

// make sure some bases are lower case and some are upper case
@Test
public void testMixedCasesInExample() throws FileNotFoundException, InterruptedException {
    final IndexedFastaSequenceFile original = new IndexedFastaSequenceFile(new File(exampleFASTA));
    final CachingIndexedFastaSequenceFile casePreserving = new CachingIndexedFastaSequenceFile(new File(exampleFASTA), true);
    final CachingIndexedFastaSequenceFile allUpper = new CachingIndexedFastaSequenceFile(new File(exampleFASTA));
    int nMixedCase = 0;
    for (SAMSequenceRecord contig : original.getSequenceDictionary().getSequences()) {
        nMixedCase += testCases(original, casePreserving, allUpper, contig.getSequenceName(), -1, -1);
        final int step = 100;
        for (int lastPos = step; lastPos < contig.getSequenceLength(); lastPos += step) {
            testCases(original, casePreserving, allUpper, contig.getSequenceName(), lastPos - step, lastPos);
        }
    }
    Assert.assertTrue(nMixedCase > 0, "No mixed cases sequences found in file.  Unexpected test state");
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 2 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project gatk by broadinstitute.

the class CachingIndexedFastaSequenceFileUnitTest method testCachingIndexedFastaReaderTwoStage.

// Tests grabbing sequences around a middle cached value.
@Test(dataProvider = "fastas", enabled = !DEBUG)
public void testCachingIndexedFastaReaderTwoStage(File fasta, int cacheSize, int querySize) throws FileNotFoundException {
    final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
    final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true, false);
    SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
    int middleStart = (contig.getSequenceLength() - querySize) / 2;
    int middleStop = middleStart + querySize;
    logger.warn(String.format("Checking contig %s length %d with cache size %d and query size %d with intermediate query", contig.getSequenceName(), contig.getSequenceLength(), cacheSize, querySize));
    for (int i = 0; i < contig.getSequenceLength(); i += 10) {
        int start = i;
        int stop = start + querySize;
        if (stop <= contig.getSequenceLength()) {
            ReferenceSequence grabMiddle = caching.getSubsequenceAt(contig.getSequenceName(), middleStart, middleStop);
            ReferenceSequence cachedVal = caching.getSubsequenceAt(contig.getSequenceName(), start, stop);
            ReferenceSequence uncachedVal = uncached.getSubsequenceAt(contig.getSequenceName(), start, stop);
            Assert.assertEquals(cachedVal.getName(), uncachedVal.getName());
            Assert.assertEquals(cachedVal.getContigIndex(), uncachedVal.getContigIndex());
            Assert.assertEquals(cachedVal.getBases(), uncachedVal.getBases());
        }
    }
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 3 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project hmftools by hartwigmedical.

the class LoadSomaticVariants method main.

public static void main(@NotNull final String[] args) throws ParseException, IOException, SQLException {
    final Options options = createBasicOptions();
    final CommandLine cmd = createCommandLine(args, options);
    final String vcfFileLocation = cmd.getOptionValue(VCF_FILE);
    final String highConfidenceBed = cmd.getOptionValue(HIGH_CONFIDENCE_BED);
    final String fastaFileLocation = cmd.getOptionValue(REF_GENOME);
    final String sample = cmd.getOptionValue(SAMPLE);
    final DatabaseAccess dbAccess = databaseAccess(cmd);
    final CompoundFilter filter = new CompoundFilter(true);
    if (cmd.hasOption(PASS_FILTER)) {
        filter.add(new PassingVariantFilter());
    }
    if (cmd.hasOption(SOMATIC_FILTER)) {
        filter.add(new SomaticFilter());
    }
    LOGGER.info("Reading somatic VCF File");
    final List<SomaticVariant> variants = SomaticVariantFactory.filteredInstance(filter).fromVCFFile(sample, vcfFileLocation);
    LOGGER.info("Reading high confidence bed file");
    final Multimap<String, GenomeRegion> highConfidenceRegions = BEDFileLoader.fromBedFile(highConfidenceBed);
    LOGGER.info("Loading indexed fasta reference file");
    IndexedFastaSequenceFile indexedFastaSequenceFile = new IndexedFastaSequenceFile(new File(fastaFileLocation));
    LOGGER.info("Querying purple database");
    final PurityContext purityContext = dbAccess.readPurityContext(sample);
    if (purityContext == null) {
        LOGGER.warn("Unable to retrieve purple data. Enrichment may be incomplete.");
    }
    final PurityAdjuster purityAdjuster = purityContext == null ? new PurityAdjuster(Gender.FEMALE, 1, 1) : new PurityAdjuster(purityContext.gender(), purityContext.bestFit().purity(), purityContext.bestFit().normFactor());
    final Multimap<String, PurpleCopyNumber> copyNumbers = Multimaps.index(dbAccess.readCopynumbers(sample), PurpleCopyNumber::chromosome);
    final Multimap<String, FittedRegion> copyNumberRegions = Multimaps.index(dbAccess.readCopyNumberRegions(sample), FittedRegion::chromosome);
    LOGGER.info("Incorporating purple purity");
    final PurityAdjustedSomaticVariantFactory purityAdjustmentFactory = new PurityAdjustedSomaticVariantFactory(purityAdjuster, copyNumbers, copyNumberRegions);
    final List<PurityAdjustedSomaticVariant> purityAdjustedVariants = purityAdjustmentFactory.create(variants);
    final double clonalPloidy = ClonalityCutoffKernel.clonalCutoff(purityAdjustedVariants);
    LOGGER.info("Enriching variants");
    final EnrichedSomaticVariantFactory enrichedSomaticVariantFactory = new EnrichedSomaticVariantFactory(highConfidenceRegions, indexedFastaSequenceFile, new ClonalityFactory(purityAdjuster, clonalPloidy), CanonicalTranscriptFactory.create(HmfGenePanelSupplier.allGeneList()));
    final List<EnrichedSomaticVariant> enrichedVariants = enrichedSomaticVariantFactory.enrich(purityAdjustedVariants);
    LOGGER.info("Persisting variants to database");
    dbAccess.writeSomaticVariants(sample, enrichedVariants);
    LOGGER.info("Complete");
}
Also used : Options(org.apache.commons.cli.Options) EnrichedSomaticVariant(com.hartwig.hmftools.common.variant.EnrichedSomaticVariant) PassingVariantFilter(htsjdk.variant.variantcontext.filter.PassingVariantFilter) FittedRegion(com.hartwig.hmftools.common.purple.region.FittedRegion) PurpleCopyNumber(com.hartwig.hmftools.common.purple.copynumber.PurpleCopyNumber) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) DatabaseAccess(com.hartwig.hmftools.patientdb.dao.DatabaseAccess) SomaticFilter(com.hartwig.hmftools.common.variant.filter.SomaticFilter) PurityAdjuster(com.hartwig.hmftools.common.purple.PurityAdjuster) PurityAdjustedSomaticVariant(com.hartwig.hmftools.common.variant.PurityAdjustedSomaticVariant) SomaticVariant(com.hartwig.hmftools.common.variant.SomaticVariant) EnrichedSomaticVariant(com.hartwig.hmftools.common.variant.EnrichedSomaticVariant) PurityAdjustedSomaticVariantFactory(com.hartwig.hmftools.common.variant.PurityAdjustedSomaticVariantFactory) CompoundFilter(htsjdk.variant.variantcontext.filter.CompoundFilter) PurityAdjustedSomaticVariant(com.hartwig.hmftools.common.variant.PurityAdjustedSomaticVariant) CommandLine(org.apache.commons.cli.CommandLine) GenomeRegion(com.hartwig.hmftools.common.region.GenomeRegion) PurityContext(com.hartwig.hmftools.common.purple.purity.PurityContext) EnrichedSomaticVariantFactory(com.hartwig.hmftools.common.variant.EnrichedSomaticVariantFactory) ClonalityFactory(com.hartwig.hmftools.common.variant.ClonalityFactory) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Example 4 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project ASCIIGenome by dariober.

the class GenomicCoords method setSamSeqDictFromFasta.

private boolean setSamSeqDictFromFasta(String fasta) throws IOException {
    try {
        if (new File(fasta + ".fai").exists() && !new File(fasta + ".fai").isDirectory()) {
        // 
        } else {
            throw new FileNotFoundException();
        }
    // fa= new IndexedFastaSequenceFile(new File(fasta));
    } catch (FileNotFoundException e) {
        try {
            new Faidx(new File(fasta));
            (new File(fasta + ".fai")).deleteOnExit();
        } catch (Exception e1) {
        // 
        }
    }
    BufferedReader br = new BufferedReader(new FileReader(new File(fasta + ".fai")));
    // null;
    SAMSequenceDictionary seqDict = new SAMSequenceDictionary();
    while (true) {
        String line = br.readLine();
        if (line == null) {
            break;
        }
        SAMSequenceRecord ssqRec = new SAMSequenceRecord(line.split("\t")[0], Integer.parseInt(line.split("\t")[1]));
        seqDict.addSequence(ssqRec);
    }
    br.close();
    // fa.close();
    this.setSamSeqDictSource(new File(fasta).getAbsolutePath());
    this.setSamSeqDict(seqDict);
    return true;
// }
// fa.close();
// return false;
}
Also used : FileNotFoundException(java.io.FileNotFoundException) BufferedReader(java.io.BufferedReader) Faidx(faidx.Faidx) FileReader(java.io.FileReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) InvalidCommandLineException(exceptions.InvalidCommandLineException) InvalidColourException(exceptions.InvalidColourException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) InvalidGenomicCoordsException(exceptions.InvalidGenomicCoordsException)

Example 5 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project ASCIIGenome by dariober.

the class GenomicCoords method getSequenceFromFasta.

public byte[] getSequenceFromFasta() throws IOException {
    IndexedFastaSequenceFile faSeqFile = null;
    try {
        faSeqFile = new IndexedFastaSequenceFile(new File(this.fastaFile));
        try {
            byte[] seq = faSeqFile.getSubsequenceAt(this.chrom, this.from, this.to).getBases();
            faSeqFile.close();
            return seq;
        } catch (NullPointerException e) {
            System.err.println("Cannot fetch sequence " + this.chrom + ":" + this.from + "-" + this.to + " for fasta file " + this.fastaFile);
            e.printStackTrace();
        }
        faSeqFile.close();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    }
    return null;
}
Also used : FileNotFoundException(java.io.FileNotFoundException) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Aggregations

IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)57 File (java.io.File)34 SamReader (htsjdk.samtools.SamReader)22 SAMRecord (htsjdk.samtools.SAMRecord)20 GenomicSequence (com.github.lindenb.jvarkit.util.picard.GenomicSequence)16 SAMFileHeader (htsjdk.samtools.SAMFileHeader)16 ArrayList (java.util.ArrayList)16 IOException (java.io.IOException)15 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)14 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)13 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)11 CigarElement (htsjdk.samtools.CigarElement)11 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)11 List (java.util.List)11 FileNotFoundException (java.io.FileNotFoundException)10 BufferedReader (java.io.BufferedReader)9 Collectors (java.util.stream.Collectors)9 Cigar (htsjdk.samtools.Cigar)8 CigarOperator (htsjdk.samtools.CigarOperator)7