use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project gatk by broadinstitute.
the class CachingIndexedFastaSequenceFileUnitTest method testMixedCasesInExample.
// make sure some bases are lower case and some are upper case
@Test
public void testMixedCasesInExample() throws FileNotFoundException, InterruptedException {
final IndexedFastaSequenceFile original = new IndexedFastaSequenceFile(new File(exampleFASTA));
final CachingIndexedFastaSequenceFile casePreserving = new CachingIndexedFastaSequenceFile(new File(exampleFASTA), true);
final CachingIndexedFastaSequenceFile allUpper = new CachingIndexedFastaSequenceFile(new File(exampleFASTA));
int nMixedCase = 0;
for (SAMSequenceRecord contig : original.getSequenceDictionary().getSequences()) {
nMixedCase += testCases(original, casePreserving, allUpper, contig.getSequenceName(), -1, -1);
final int step = 100;
for (int lastPos = step; lastPos < contig.getSequenceLength(); lastPos += step) {
testCases(original, casePreserving, allUpper, contig.getSequenceName(), lastPos - step, lastPos);
}
}
Assert.assertTrue(nMixedCase > 0, "No mixed cases sequences found in file. Unexpected test state");
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project gatk by broadinstitute.
the class CachingIndexedFastaSequenceFileUnitTest method testCachingIndexedFastaReaderTwoStage.
// Tests grabbing sequences around a middle cached value.
@Test(dataProvider = "fastas", enabled = !DEBUG)
public void testCachingIndexedFastaReaderTwoStage(File fasta, int cacheSize, int querySize) throws FileNotFoundException {
final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true, false);
SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
int middleStart = (contig.getSequenceLength() - querySize) / 2;
int middleStop = middleStart + querySize;
logger.warn(String.format("Checking contig %s length %d with cache size %d and query size %d with intermediate query", contig.getSequenceName(), contig.getSequenceLength(), cacheSize, querySize));
for (int i = 0; i < contig.getSequenceLength(); i += 10) {
int start = i;
int stop = start + querySize;
if (stop <= contig.getSequenceLength()) {
ReferenceSequence grabMiddle = caching.getSubsequenceAt(contig.getSequenceName(), middleStart, middleStop);
ReferenceSequence cachedVal = caching.getSubsequenceAt(contig.getSequenceName(), start, stop);
ReferenceSequence uncachedVal = uncached.getSubsequenceAt(contig.getSequenceName(), start, stop);
Assert.assertEquals(cachedVal.getName(), uncachedVal.getName());
Assert.assertEquals(cachedVal.getContigIndex(), uncachedVal.getContigIndex());
Assert.assertEquals(cachedVal.getBases(), uncachedVal.getBases());
}
}
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project hmftools by hartwigmedical.
the class LoadSomaticVariants method main.
public static void main(@NotNull final String[] args) throws ParseException, IOException, SQLException {
final Options options = createBasicOptions();
final CommandLine cmd = createCommandLine(args, options);
final String vcfFileLocation = cmd.getOptionValue(VCF_FILE);
final String highConfidenceBed = cmd.getOptionValue(HIGH_CONFIDENCE_BED);
final String fastaFileLocation = cmd.getOptionValue(REF_GENOME);
final String sample = cmd.getOptionValue(SAMPLE);
final DatabaseAccess dbAccess = databaseAccess(cmd);
final CompoundFilter filter = new CompoundFilter(true);
if (cmd.hasOption(PASS_FILTER)) {
filter.add(new PassingVariantFilter());
}
if (cmd.hasOption(SOMATIC_FILTER)) {
filter.add(new SomaticFilter());
}
LOGGER.info("Reading somatic VCF File");
final List<SomaticVariant> variants = SomaticVariantFactory.filteredInstance(filter).fromVCFFile(sample, vcfFileLocation);
LOGGER.info("Reading high confidence bed file");
final Multimap<String, GenomeRegion> highConfidenceRegions = BEDFileLoader.fromBedFile(highConfidenceBed);
LOGGER.info("Loading indexed fasta reference file");
IndexedFastaSequenceFile indexedFastaSequenceFile = new IndexedFastaSequenceFile(new File(fastaFileLocation));
LOGGER.info("Querying purple database");
final PurityContext purityContext = dbAccess.readPurityContext(sample);
if (purityContext == null) {
LOGGER.warn("Unable to retrieve purple data. Enrichment may be incomplete.");
}
final PurityAdjuster purityAdjuster = purityContext == null ? new PurityAdjuster(Gender.FEMALE, 1, 1) : new PurityAdjuster(purityContext.gender(), purityContext.bestFit().purity(), purityContext.bestFit().normFactor());
final Multimap<String, PurpleCopyNumber> copyNumbers = Multimaps.index(dbAccess.readCopynumbers(sample), PurpleCopyNumber::chromosome);
final Multimap<String, FittedRegion> copyNumberRegions = Multimaps.index(dbAccess.readCopyNumberRegions(sample), FittedRegion::chromosome);
LOGGER.info("Incorporating purple purity");
final PurityAdjustedSomaticVariantFactory purityAdjustmentFactory = new PurityAdjustedSomaticVariantFactory(purityAdjuster, copyNumbers, copyNumberRegions);
final List<PurityAdjustedSomaticVariant> purityAdjustedVariants = purityAdjustmentFactory.create(variants);
final double clonalPloidy = ClonalityCutoffKernel.clonalCutoff(purityAdjustedVariants);
LOGGER.info("Enriching variants");
final EnrichedSomaticVariantFactory enrichedSomaticVariantFactory = new EnrichedSomaticVariantFactory(highConfidenceRegions, indexedFastaSequenceFile, new ClonalityFactory(purityAdjuster, clonalPloidy), CanonicalTranscriptFactory.create(HmfGenePanelSupplier.allGeneList()));
final List<EnrichedSomaticVariant> enrichedVariants = enrichedSomaticVariantFactory.enrich(purityAdjustedVariants);
LOGGER.info("Persisting variants to database");
dbAccess.writeSomaticVariants(sample, enrichedVariants);
LOGGER.info("Complete");
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project ASCIIGenome by dariober.
the class GenomicCoords method setSamSeqDictFromFasta.
private boolean setSamSeqDictFromFasta(String fasta) throws IOException {
try {
if (new File(fasta + ".fai").exists() && !new File(fasta + ".fai").isDirectory()) {
//
} else {
throw new FileNotFoundException();
}
// fa= new IndexedFastaSequenceFile(new File(fasta));
} catch (FileNotFoundException e) {
try {
new Faidx(new File(fasta));
(new File(fasta + ".fai")).deleteOnExit();
} catch (Exception e1) {
//
}
}
BufferedReader br = new BufferedReader(new FileReader(new File(fasta + ".fai")));
// null;
SAMSequenceDictionary seqDict = new SAMSequenceDictionary();
while (true) {
String line = br.readLine();
if (line == null) {
break;
}
SAMSequenceRecord ssqRec = new SAMSequenceRecord(line.split("\t")[0], Integer.parseInt(line.split("\t")[1]));
seqDict.addSequence(ssqRec);
}
br.close();
// fa.close();
this.setSamSeqDictSource(new File(fasta).getAbsolutePath());
this.setSamSeqDict(seqDict);
return true;
// }
// fa.close();
// return false;
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project ASCIIGenome by dariober.
the class GenomicCoords method getSequenceFromFasta.
public byte[] getSequenceFromFasta() throws IOException {
IndexedFastaSequenceFile faSeqFile = null;
try {
faSeqFile = new IndexedFastaSequenceFile(new File(this.fastaFile));
try {
byte[] seq = faSeqFile.getSubsequenceAt(this.chrom, this.from, this.to).getBases();
faSeqFile.close();
return seq;
} catch (NullPointerException e) {
System.err.println("Cannot fetch sequence " + this.chrom + ":" + this.from + "-" + this.to + " for fasta file " + this.fastaFile);
e.printStackTrace();
}
faSeqFile.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
return null;
}
Aggregations