Search in sources :

Example 51 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.

the class ExternalProcessFastqAligner method align.

@Override
public void align(final File fastq, final File output, final File reference, final int threads) throws IOException {
    List<String> commandline = template.stream().map(s -> String.format(s, fastq.getAbsolutePath(), reference.getAbsolutePath(), threads)).collect(Collectors.toList());
    String commandlinestr = commandline.stream().collect(Collectors.joining(" "));
    log.info("Invoking external aligner");
    log.info(commandlinestr);
    Process aligner = new ProcessBuilder(commandline).redirectError(Redirect.INHERIT).directory(output.getParentFile()).start();
    final SamReader reader = readerFactory.open(SamInputResource.of(aligner.getInputStream()));
    final SAMFileHeader header = reader.getFileHeader();
    try (final SAMFileWriter writer = writerFactory.makeWriter(header, false, output, reference)) {
        final SAMRecordIterator it = reader.iterator();
        while (it.hasNext()) {
            writer.addAlignment(it.next());
        }
    }
    ExternalProcessHelper.shutdownAligner(aligner, commandlinestr, reference);
}
Also used : SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) Redirect(java.lang.ProcessBuilder.Redirect) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) IOException(java.io.IOException) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) List(java.util.List) SamInputResource(htsjdk.samtools.SamInputResource) Log(htsjdk.samtools.util.Log) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 52 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.

the class MultipleSamFileCommandLineProgram method ensureSequenceDictionary.

public static void ensureSequenceDictionary(File fa) throws IOException {
    File output = new File(fa.getAbsolutePath() + ".dict");
    try (ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(fa)) {
        SAMSequenceDictionary dictionary = reference.getSequenceDictionary();
        if (dictionary == null) {
            log.info("Attempting to generate missing sequence dictionary for ", fa);
            try {
                final SAMSequenceDictionary sequences = new CreateSequenceDictionary().makeSequenceDictionary(fa);
                final SAMFileHeader samHeader = new SAMFileHeader();
                samHeader.setSequenceDictionary(sequences);
                try (SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMWriter(samHeader, false, output)) {
                }
            } catch (Exception e) {
                log.error("Missing sequence dictionary for ", fa, " and creation of sequencing dictionary failed.", "Please run the Picard tools CreateSequenceDictionary utility to create", output);
                throw e;
            }
            log.info("Created sequence dictionary ", output);
        } else {
            log.debug("Found sequence dictionary for ", fa);
        }
    }
}
Also used : CreateSequenceDictionary(picard.sam.CreateSequenceDictionary) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) File(java.io.File) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) ConfigurationException(org.apache.commons.configuration.ConfigurationException)

Example 53 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.

the class SubsetToMissing method doWork.

@Override
protected int doWork() {
    long stop = Long.MAX_VALUE;
    if (STOP_AFTER != null && (long) STOP_AFTER > 0) {
        stop = STOP_AFTER;
    }
    log.debug("Setting language-neutral locale");
    java.util.Locale.setDefault(Locale.ROOT);
    if (TMP_DIR == null || TMP_DIR.size() == 0) {
        TMP_DIR = Lists.newArrayList(new File("."));
    }
    SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
    SamReader input = factory.open(INPUT);
    Iterator<SAMRecord> intputit = new AsyncBufferedIterator<SAMRecord>(input.iterator(), 2, 16384);
    SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(input.getFileHeader(), true, OUTPUT);
    LongSet hashtable;
    if (PREALLOCATE != null) {
        log.info("Preallocating hash table");
        hashtable = new LongOpenHashBigSet(PREALLOCATE);
    } else {
        hashtable = new LongOpenHashBigSet();
    }
    for (File file : LOOKUP) {
        log.info("Loading lookup hashes for " + file.getAbsolutePath());
        SamReader lookup = factory.open(file);
        AsyncBufferedIterator<SAMRecord> it = new AsyncBufferedIterator<SAMRecord>(lookup.iterator(), 2, 16384);
        File cache = new File(file.getAbsolutePath() + ".SubsetToMissing.cache");
        if (cache.exists()) {
            log.info("Loading lookup hashes from cache");
            long n = stop;
            DataInputStream dis = null;
            try {
                long loadCount = 0;
                dis = new DataInputStream(new BufferedInputStream(new FileInputStream(cache)));
                while (n-- > 0) {
                    hashtable.add(dis.readLong());
                    if (loadCount % 10000000 == 0) {
                        log.info(String.format("Loaded %d from cache", loadCount));
                    }
                }
            } catch (EOFException e) {
                try {
                    if (dis != null)
                        dis.close();
                } catch (IOException e1) {
                    log.error(e1);
                }
            } catch (IOException e) {
                log.error(e);
            }
        } else {
            long n = stop;
            ProgressLoggingSAMRecordIterator loggedit = new ProgressLoggingSAMRecordIterator(it, new ProgressLogger(log));
            try {
                DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(cache)));
                while (loggedit.hasNext() && n-- > 0) {
                    long recordhash = hash(loggedit.next());
                    hashtable.add(recordhash);
                    dos.writeLong(recordhash);
                }
                dos.close();
            } catch (Exception e) {
                log.error(e, "Failed to load lookup. Running with partial results");
            }
            loggedit.close();
        }
        it.close();
    }
    long filtered = 0;
    log.info("Processing input");
    intputit = new ProgressLoggingSAMRecordIterator(intputit, new ProgressLogger(log));
    long n = stop;
    while (intputit.hasNext() && n-- > 0) {
        SAMRecord r = intputit.next();
        if (!hashtable.contains(hash(r))) {
            out.addAlignment(r);
        } else {
            filtered++;
            if (filtered % 1000000 == 0) {
                log.info(String.format("Filtered %d reads", filtered));
            }
        }
    }
    log.info("Closing output");
    out.close();
    return 0;
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMFileWriter(htsjdk.samtools.SAMFileWriter) DataOutputStream(java.io.DataOutputStream) LongSet(it.unimi.dsi.fastutil.longs.LongSet) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) ProgressLogger(htsjdk.samtools.util.ProgressLogger) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) EOFException(java.io.EOFException) SamReader(htsjdk.samtools.SamReader) LongOpenHashBigSet(it.unimi.dsi.fastutil.longs.LongOpenHashBigSet) BufferedInputStream(java.io.BufferedInputStream) SAMRecord(htsjdk.samtools.SAMRecord) FileOutputStream(java.io.FileOutputStream) EOFException(java.io.EOFException) ProgressLoggingSAMRecordIterator(au.edu.wehi.idsv.ProgressLoggingSAMRecordIterator) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream)

Example 54 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.

the class SAMFileUtil method merge.

/**
 * Merges a set of SAM files into a single file.
 * The SAM header is taken from the first input file.
 * @param input input files. All input files must have the same sort order.
 * @param output output file.
 * @param readerFactory
 * @param writerFactory
 * @throws IOException
 */
public static void merge(Collection<File> input, File output, SamReaderFactory readerFactory, SAMFileWriterFactory writerFactory) throws IOException {
    if (input == null)
        throw new IllegalArgumentException("input is null");
    File tmpFile = gridss.Defaults.OUTPUT_TO_TEMP_FILE ? FileSystemContext.getWorkingFileFor(output, "gridss.tmp.merging.SAMFileUtil.") : output;
    Map<SamReader, AsyncBufferedIterator<SAMRecord>> map = new HashMap<>(input.size());
    SAMFileHeader header = null;
    try {
        for (File in : input) {
            SamReader r = readerFactory.open(in);
            SAMFileHeader currentHeader = r.getFileHeader();
            if (header == null) {
                header = currentHeader;
            }
            if (header.getSortOrder() != null && currentHeader.getSortOrder() != null && header.getSortOrder() != currentHeader.getSortOrder()) {
                throw new IllegalArgumentException(String.format("Sort order %s of %s does not match %s of %s", currentHeader.getSortOrder(), input, header.getSortOrder(), input.iterator().next()));
            }
            map.put(r, new AsyncBufferedIterator<>(r.iterator(), in.getName()));
        }
        try (SAMFileWriter writer = writerFactory.makeSAMOrBAMWriter(header, true, tmpFile)) {
            Queue<PeekingIterator<SAMRecord>> queue = createMergeQueue(header.getSortOrder());
            for (PeekingIterator<SAMRecord> it : map.values()) {
                if (it.hasNext()) {
                    queue.add(it);
                }
            }
            while (!queue.isEmpty()) {
                PeekingIterator<SAMRecord> it = queue.poll();
                SAMRecord r = it.next();
                writer.addAlignment(r);
                if (it.hasNext()) {
                    queue.add(it);
                }
            }
        }
        for (Entry<SamReader, AsyncBufferedIterator<SAMRecord>> entry : map.entrySet()) {
            CloserUtil.close(entry.getValue());
            CloserUtil.close(entry.getKey());
        }
        if (tmpFile != output) {
            FileHelper.move(tmpFile, output, true);
        }
    } finally {
        for (Entry<SamReader, AsyncBufferedIterator<SAMRecord>> entry : map.entrySet()) {
            CloserUtil.close(entry.getValue());
            CloserUtil.close(entry.getKey());
        }
        if (tmpFile != output & tmpFile.exists()) {
            FileHelper.delete(tmpFile, true);
        }
    }
}
Also used : HashMap(java.util.HashMap) SAMFileWriter(htsjdk.samtools.SAMFileWriter) AsyncBufferedIterator(au.edu.wehi.idsv.util.AsyncBufferedIterator) PeekingIterator(com.google.common.collect.PeekingIterator) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 55 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class PcrClipReads method run.

private int run(final SamReader reader) {
    final SAMFileHeader header1 = reader.getFileHeader();
    final SAMFileHeader header2 = header1.clone();
    Optional<SAMProgramRecord> samProgramRecord = Optional.empty();
    if (this.programId) {
        final SAMProgramRecord spr = header2.createProgramRecord();
        samProgramRecord = Optional.of(spr);
        spr.setProgramName(PcrClipReads.class.getSimpleName());
        spr.setProgramVersion(this.getGitHash());
        spr.setCommandLine(getProgramCommandLine().replace('\t', ' '));
    }
    header2.addComment(getProgramName() + " " + getVersion() + ": Processed with " + getProgramCommandLine());
    header2.setSortOrder(SortOrder.unsorted);
    SAMFileWriter sw = null;
    SAMRecordIterator iter = null;
    try {
        sw = this.writingBamArgs.openSAMFileWriter(outputFile, header2, false);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1).logger(LOG);
        iter = reader.iterator();
        while (iter.hasNext()) {
            SAMRecord rec = progress.watch(iter.next());
            if (this.onlyFlag != -1 && (rec.getFlags() & this.onlyFlag) != 0) {
                sw.addAlignment(rec);
                continue;
            }
            if (rec.getReadUnmappedFlag()) {
                sw.addAlignment(rec);
                continue;
            }
            final Interval fragment = findInterval(rec);
            if (fragment == null) {
                rec.setMappingQuality(0);
                sw.addAlignment(rec);
                continue;
            }
            // strand is '-' and overap in 5' of PCR fragment
            if (rec.getReadNegativeStrandFlag() && fragment.getStart() < rec.getAlignmentStart() && rec.getAlignmentStart() < fragment.getEnd()) {
                rec.setMappingQuality(0);
                sw.addAlignment(rec);
                continue;
            }
            // strand is '+' and overap in 3' of PCR fragment
            if (!rec.getReadNegativeStrandFlag() && fragment.getStart() < rec.getAlignmentEnd() && rec.getAlignmentEnd() < fragment.getEnd()) {
                rec.setMappingQuality(0);
                sw.addAlignment(rec);
                continue;
            }
            // contained int PCR fragment
            if (rec.getAlignmentStart() >= fragment.getStart() && rec.getAlignmentEnd() <= fragment.getEnd()) {
                sw.addAlignment(rec);
                continue;
            }
            final ReadClipper readClipper = new ReadClipper();
            if (samProgramRecord.isPresent()) {
                readClipper.setProgramGroup(samProgramRecord.get().getId());
            }
            rec = readClipper.clip(rec, fragment);
            sw.addAlignment(rec);
        }
        progress.finish();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sw);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) Interval(htsjdk.samtools.util.Interval)

Aggregations

SAMFileWriter (htsjdk.samtools.SAMFileWriter)76 SAMRecord (htsjdk.samtools.SAMRecord)63 SAMFileHeader (htsjdk.samtools.SAMFileHeader)55 SamReader (htsjdk.samtools.SamReader)55 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)46 File (java.io.File)40 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)27 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)25 IOException (java.io.IOException)22 ArrayList (java.util.ArrayList)20 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)14 Cigar (htsjdk.samtools.Cigar)13 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)13 CigarElement (htsjdk.samtools.CigarElement)12 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)10 Interval (htsjdk.samtools.util.Interval)9 PrintWriter (java.io.PrintWriter)9 List (java.util.List)9 HashMap (java.util.HashMap)8