Search in sources :

Example 86 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class ConvertBamChromosomes method doWork.

@Override
public int doWork(List<String> args) {
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    final Set<String> unmappedChromosomes = new HashSet<>();
    try {
        final ContigNameConverter customMapping = ContigNameConverter.fromFile(mappingFile);
        customMapping.setOnNotFound(this.onNotFound);
        sfr = super.openSamReader(oneFileOrNull(args));
        SAMFileHeader header1 = sfr.getFileHeader();
        if (header1 == null) {
            LOG.error("File header missing");
            return -1;
        }
        SAMFileHeader header2 = header1.clone();
        // create new sequence dict
        final SAMSequenceDictionary dict1 = header1.getSequenceDictionary();
        if (dict1 == null) {
            LOG.error("Sequence dict missing");
            return -1;
        }
        final List<SAMSequenceRecord> ssrs = new ArrayList<SAMSequenceRecord>(dict1.size());
        for (int i = 0; i < dict1.size(); ++i) {
            SAMSequenceRecord ssr = dict1.getSequence(i);
            String newName = customMapping.apply(ssr.getSequenceName());
            if (newName == null) {
                // skip unknown chromosomes
                continue;
            }
            ssr = new SAMSequenceRecord(newName, ssr.getSequenceLength());
            ssrs.add(ssr);
        }
        header2.setSequenceDictionary(new SAMSequenceDictionary(ssrs));
        SAMSequenceDictionary dict2 = new SAMSequenceDictionary(ssrs);
        header2.setSequenceDictionary(dict2);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict1);
        sfw = this.writingBamArgs.openSAMFileWriter(output, header2, true);
        long num_ignored = 0L;
        SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            SAMRecord rec1 = iter.next();
            progress.watch(rec1);
            String newName1 = null;
            String newName2 = null;
            if (!SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(rec1.getReferenceName())) {
                newName1 = customMapping.apply(rec1.getReferenceName());
            }
            if (rec1.getReadPairedFlag() && !SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(rec1.getMateReferenceName())) {
                newName2 = customMapping.apply(rec1.getMateReferenceName());
            }
            rec1.setHeader(header2);
            if (!SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(rec1.getReferenceName())) {
                if (newName1 == null) {
                    ++num_ignored;
                    continue;
                }
                rec1.setReferenceName(newName1);
            }
            if (rec1.getReadPairedFlag() && !SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(rec1.getMateReferenceName())) {
                if (newName2 == null) {
                    ++num_ignored;
                    continue;
                }
                rec1.setMateReferenceName(newName2);
            }
            sfw.addAlignment(rec1);
        }
        if (!unmappedChromosomes.isEmpty()) {
            LOG.warning("Unmapped chromosomes: " + unmappedChromosomes);
        }
        LOG.warning("num ignored read:" + num_ignored);
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) HashSet(java.util.HashSet)

Example 87 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class CoverageNormalizer method run.

private int run(SamReader sfr) throws IOException {
    SAMSequenceDictionary dictionary = sfr.getFileHeader().getSequenceDictionary();
    SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dictionary);
    File tmpFile1 = File.createTempFile("cov_", ".dat.gz", this.writingSortingCollection.getTmpDirectories().get(0));
    tmpFile1.deleteOnExit();
    LOG.info("Opening tmp File " + tmpFile1);
    GZIPOutputStream gos = null;
    DataInputStream dis = null;
    SortingCollection<Float> median = null;
    try {
        gos = new GZIPOutputStream(new FileOutputStream(tmpFile1));
        DataOutputStream daos = new DataOutputStream(gos);
        SAMRecordIterator iter = sfr.iterator();
        int curr_tid = -1;
        short[] array = null;
        float minCov = Float.MAX_VALUE;
        float maxCov = 0;
        int[] num_written = new int[dictionary.size()];
        Arrays.fill(num_written, 0);
        for (; ; ) {
            SAMRecord rec = null;
            if (iter.hasNext()) {
                rec = iter.next();
                if (rec.getReadUnmappedFlag())
                    continue;
                progress.watch(rec);
                if (rec.isSecondaryOrSupplementary())
                    continue;
            }
            if (rec == null || curr_tid != rec.getReferenceIndex()) {
                if (curr_tid != -1) {
                    LOG.info("Writing data for chromosome " + dictionary.getSequence(curr_tid).getSequenceName());
                    for (int i = 0; i + window_size <= array.length; i += window_shift) {
                        float sum = 0;
                        for (int j = 0; j < window_size; ++j) {
                            sum += array[i + j];
                        }
                        float v = sum / window_size;
                        daos.writeFloat(v);
                        minCov = (float) Math.min(minCov, v);
                        maxCov = (float) Math.max(maxCov, v);
                        num_written[curr_tid]++;
                    }
                    LOG.info("End writing data N=" + num_written[curr_tid]);
                }
                if (rec == null)
                    break;
                curr_tid = rec.getReferenceIndex();
                SAMSequenceRecord ssr = dictionary.getSequence(curr_tid);
                LOG.info("allocate " + ssr.getSequenceLength() + " for " + ssr.getSequenceName());
                array = null;
                System.gc();
                array = new short[ssr.getSequenceLength()];
                LOG.info("done: allocate.");
                Arrays.fill(array, (short) 0);
            }
            for (int i = rec.getAlignmentStart(); i < rec.getAlignmentEnd() && i < array.length; ++i) {
                array[i] = (short) Math.min((int) Short.MAX_VALUE, 1 + (int) array[i]);
            }
        }
        array = null;
        LOG.info("Closing BAM");
        CloserUtil.close(sfr);
        LOG.info("Closing " + tmpFile1);
        daos.flush();
        gos.finish();
        gos.flush();
        gos.close();
        // start normalizing min/max find median value
        long nWritten = 0L;
        median = SortingCollection.newInstance(Float.class, new FloatCodec(), new FloatCmp(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        median.setDestructiveIteration(true);
        dis = new DataInputStream(new GZIPInputStream(new FileInputStream(tmpFile1)));
        for (int n_items : num_written) {
            for (int i = 0; i < n_items; ++i) {
                float v = dis.readFloat();
                if (v < min_coverage)
                    continue;
                v = (float) ((v - minCov) / (double) (maxCov - minCov));
                median.add(v);
                ++nWritten;
            }
        }
        median.doneAdding();
        CloserUtil.close(dis);
        // get median
        float median_value = 0f;
        long half = nWritten / 2L;
        CloseableIterator<Float> iterFloat = median.iterator();
        while (iterFloat.hasNext() && half > 0) {
            median_value = iterFloat.next();
            --half;
            if (half <= 0) {
                LOG.info("median = " + median_value);
                break;
            }
        }
        CloserUtil.close(iterFloat);
        median.cleanup();
        median = null;
        progress = new SAMSequenceDictionaryProgress(dictionary);
        // dump data
        dis = new DataInputStream(new GZIPInputStream(new FileInputStream(tmpFile1)));
        for (int chrom_id = 0; chrom_id < num_written.length; ++chrom_id) {
            int n_items = num_written[chrom_id];
            int i = 0;
            Float value_start = null;
            while (i < n_items) {
                if (value_start == null) {
                    value_start = dis.readFloat();
                }
                int j = i + 1;
                Float value_end = null;
                while (j < n_items) {
                    value_end = dis.readFloat();
                    if (value_start.intValue() == value_end.intValue()) {
                        ++j;
                    } else {
                        break;
                    }
                }
                progress.watch(chrom_id, i * window_shift);
                if (value_start >= min_coverage) {
                    System.out.print(dictionary.getSequence(chrom_id).getSequenceName());
                    System.out.print('\t');
                    System.out.print(i * window_shift);
                    System.out.print('\t');
                    System.out.print((j - 1) * window_shift + window_size);
                    System.out.print('\t');
                    System.out.print((float) ((value_start - minCov) / (double) (maxCov - minCov)));
                    System.out.print('\t');
                    System.out.print(median_value);
                    System.out.println();
                    if (System.out.checkError())
                        break;
                }
                i = j;
                if (value_end == null)
                    break;
                value_start = value_end;
            }
        }
        CloserUtil.close(dis);
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(gos);
        CloserUtil.close(dis);
        if (tmpFile1 != null)
            tmpFile1.delete();
        if (median != null)
            median.cleanup();
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) DataOutputStream(java.io.DataOutputStream) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) DataInputStream(java.io.DataInputStream) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) GZIPInputStream(java.util.zip.GZIPInputStream) GZIPOutputStream(java.util.zip.GZIPOutputStream) FileOutputStream(java.io.FileOutputStream) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File)

Example 88 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class ReferenceToVCF method doWork.

@Override
public int doWork(List<String> args) {
    if (this.bedFile != null) {
        if (limitBed == null)
            limitBed = new IntervalTreeMap<Boolean>();
        try {
            Pattern tab = Pattern.compile("[\t]");
            BufferedReader r = IOUtils.openFileForBufferedReading(this.bedFile);
            String line;
            while ((line = r.readLine()) != null) {
                if (BedLine.isBedHeader(line))
                    continue;
                if (line.startsWith("#") || line.isEmpty())
                    continue;
                String[] tokens = tab.split(line, 4);
                limitBed.put(new Interval(tokens[0], 1 + Integer.parseInt(tokens[1]), 1 + Integer.parseInt(tokens[2])), true);
            }
            CloserUtil.close(r);
        } catch (Exception err) {
            LOG.error(err);
            return -1;
        }
    }
    Random random = new Random(0L);
    VariantContextWriter out = null;
    try {
        final IndexedFastaSequenceFile fasta = new IndexedFastaSequenceFile(new File(oneAndOnlyOneFile(args)));
        SAMSequenceDictionary dict = fasta.getSequenceDictionary();
        out = super.openVariantContextWriter(this.outputFile);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        VCFHeader header = new VCFHeader();
        header.setSequenceDictionary(dict);
        out.writeHeader(header);
        final List<List<Allele>> combination = new ArrayList<List<Allele>>(4);
        // always keep REF as first allele please
        combination.add(Arrays.asList(Allele.create("A", true), Allele.create("C", false), Allele.create("G", false), Allele.create("T", false)));
        combination.add(Arrays.asList(Allele.create("C", true), Allele.create("A", false), Allele.create("G", false), Allele.create("T", false)));
        combination.add(Arrays.asList(Allele.create("G", true), Allele.create("A", false), Allele.create("C", false), Allele.create("T", false)));
        combination.add(Arrays.asList(Allele.create("T", true), Allele.create("A", false), Allele.create("C", false), Allele.create("G", false)));
        for (SAMSequenceRecord ssr : dict.getSequences()) {
            GenomicSequence genome = new GenomicSequence(fasta, ssr.getSequenceName());
            if (this.limitBed != null) {
                Interval interval = new Interval(ssr.getSequenceName(), 1, genome.length());
                if (!this.limitBed.containsOverlapping(interval))
                    continue;
            }
            for (int n = 0; n < genome.length(); ++n) {
                progress.watch(ssr.getSequenceIndex(), n);
                List<Allele> alleles = null;
                byte ref = (byte) genome.charAt(n);
                switch(ref) {
                    case 'a':
                    case 'A':
                        alleles = combination.get(0);
                        break;
                    case 'c':
                    case 'C':
                        alleles = combination.get(1);
                        break;
                    case 'g':
                    case 'G':
                        alleles = combination.get(2);
                        break;
                    case 't':
                    case 'T':
                        alleles = combination.get(3);
                        break;
                    default:
                        break;
                }
                if (alleles == null)
                    continue;
                if (this.limitBed != null) {
                    Interval interval = new Interval(ssr.getSequenceName(), n + 1, n + 1);
                    if (!this.limitBed.containsOverlapping(interval))
                        continue;
                }
                if (!disjoint_alts) {
                    VariantContextBuilder vcb = new VariantContextBuilder();
                    vcb.chr(ssr.getSequenceName());
                    vcb.start(n + 1);
                    vcb.stop(n + 1);
                    vcb.alleles(alleles);
                    out.add(vcb.make());
                } else {
                    for (// index 0 is always REF
                    int a = 1; // index 0 is always REF
                    a < 4; // index 0 is always REF
                    ++a) {
                        VariantContextBuilder vcb = new VariantContextBuilder();
                        vcb.chr(ssr.getSequenceName());
                        vcb.start(n + 1);
                        vcb.stop(n + 1);
                        // index 0 is always REF
                        vcb.alleles(Arrays.asList(alleles.get(0), alleles.get(a)));
                        out.add(vcb.make());
                    }
                }
                if (insertion_size > 0 && n + 1 < genome.length()) {
                    alleles = new ArrayList<Allele>(2);
                    // REFERENCE
                    alleles.add(Allele.create("" + genome.charAt(n) + genome.charAt(n + 1), true));
                    StringBuilder sb = new StringBuilder(insertion_size + 2);
                    sb.append(genome.charAt(n));
                    for (int n2 = 0; n2 < insertion_size; ++n2) {
                        switch(random.nextInt(4)) {
                            case 0:
                                sb.append('A');
                                break;
                            case 1:
                                sb.append('C');
                                break;
                            case 2:
                                sb.append('G');
                                break;
                            case 3:
                                sb.append('T');
                                break;
                        }
                    }
                    sb.append(genome.charAt(n + 1));
                    alleles.add(Allele.create(sb.toString(), false));
                    VariantContextBuilder vcb = new VariantContextBuilder();
                    vcb.chr(ssr.getSequenceName());
                    vcb.start(n + 1);
                    vcb.alleles(alleles);
                    vcb.computeEndFromAlleles(alleles, n + 1);
                    out.add(vcb.make());
                }
                if (deletion_size > 0 && n + deletion_size + 1 < genome.length()) {
                    alleles = new ArrayList<Allele>(2);
                    // REF
                    StringBuilder sb = new StringBuilder(deletion_size + 2);
                    sb.append(genome.charAt(n));
                    int lastpos = n + 1;
                    for (int n2 = 0; n2 < deletion_size; ++n2, lastpos++) {
                        sb.append(genome.charAt(lastpos));
                    }
                    sb.append(genome.charAt(lastpos));
                    alleles.add(Allele.create(sb.toString(), true));
                    alleles.add(Allele.create("" + genome.charAt(n) + genome.charAt(lastpos), false));
                    VariantContextBuilder vcb = new VariantContextBuilder();
                    vcb.chr(ssr.getSequenceName());
                    vcb.start(n + 1);
                    vcb.alleles(alleles);
                    vcb.computeEndFromAlleles(alleles, n + 1);
                    out.add(vcb.make());
                }
                if (out.checkError())
                    break;
            }
            if (out.checkError())
                break;
        }
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
    }
}
Also used : ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Random(java.util.Random) ArrayList(java.util.ArrayList) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) Pattern(java.util.regex.Pattern) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Interval(htsjdk.samtools.util.Interval)

Example 89 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class SamMaskAlignedBases method doWork.

@Override
public int doWork(List<String> args) {
    final byte RESET_CHAR = (byte) 'N';
    final byte RESET_QUAL = (byte) SAMUtils.fastqToPhred('#');
    long nRecords = 0L;
    long nRecordMasked = 0L;
    long nBasesMasked = 0L;
    long nBases = 0L;
    SAMRecordIterator iter = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        sfr = openSamReader(oneFileOrNull(args));
        final SAMFileHeader header1 = sfr.getFileHeader();
        if (header1 == null) {
            LOG.error("File header missing");
            return -1;
        }
        final SAMFileHeader header2 = header1.clone();
        header2.addComment(getProgramName() + ":" + getVersion() + ":" + getProgramCommandLine());
        header2.setSortOrder(SortOrder.unsorted);
        sfw = this.writingBamArgs.openSAMFileWriter(outputFile, header2, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
        iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            ++nRecords;
            nBases += rec.getReadLength();
            if (rec.getReadUnmappedFlag()) {
                SAMUtils.makeReadUnmapped(rec);
                sfw.addAlignment(rec);
                continue;
            }
            if (rec.isSecondaryOrSupplementary()) {
                continue;
            }
            final Cigar cigar = rec.getCigar();
            byte[] bases = rec.getReadBases();
            byte[] quals = rec.getBaseQualities();
            if (cigar == null || cigar.isEmpty()) {
                SAMUtils.makeReadUnmapped(rec);
                sfw.addAlignment(rec);
                continue;
            }
            int readpos = 0;
            for (final CigarElement ce : cigar) {
                final CigarOperator op = ce.getOperator();
                if (op.consumesReadBases()) {
                    if (op.consumesReferenceBases()) {
                        for (int x = 0; x < ce.getLength(); ++x) {
                            if (bases != null)
                                bases[readpos + x] = RESET_CHAR;
                            if (quals != null)
                                quals[readpos + x] = RESET_QUAL;
                            ++nBasesMasked;
                        }
                    }
                    readpos += ce.getLength();
                }
            }
            ++nRecordMasked;
            SAMUtils.makeReadUnmapped(rec);
            sfw.addAlignment(rec);
        }
        iter.close();
        sfw.close();
        progress.finish();
        LOG.info("done : reads masked " + nRecordMasked + "/" + nRecords + " Bases masked:" + nBasesMasked + "/" + nBases);
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 90 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class SamRetrieveSeqAndQual method doWork.

@Override
public int doWork(List<String> args) {
    FastqReader[] fastqReaders = null;
    SamReader samReader = null;
    SAMFileWriter samWriter = null;
    SAMRecordIterator iter = null;
    try {
        if (fastqFin == null) {
            LOG.error("undefined fastq file");
            return -1;
        } else {
            LOG.info("opening " + fastqFin);
            FastqReader r1 = new FastqReader(fastqFin);
            if (fastqRin == null) {
                fastqReaders = new FastqReader[] { r1 };
            } else {
                LOG.info("opening " + fastqRin);
                FastqReader r2 = new FastqReader(fastqRin);
                fastqReaders = new FastqReader[] { r1, r2 };
            }
        }
        samReader = super.openSamReader(oneFileOrNull(args));
        final SAMFileHeader.SortOrder sortOrder = samReader.getFileHeader().getSortOrder();
        if (sortOrder == null) {
            LOG.warning("undefined sort order read are in the sam order");
        } else if (!sortOrder.equals(SAMFileHeader.SortOrder.queryname)) {
            LOG.error("Bad Sort Order. Sort this input on read name");
            return -1;
        }
        SAMFileHeader header = samReader.getFileHeader().clone();
        SAMProgramRecord prg = header.createProgramRecord();
        prg.setCommandLine(this.getProgramCommandLine());
        prg.setProgramName(this.getProgramName());
        prg.setProgramVersion(this.getVersion());
        samWriter = writingBamArgs.openSAMFileWriter(bamOut, header, true);
        iter = samReader.iterator();
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
        FastqRecord[] currFastq = new FastqRecord[] { null, null };
        while (iter.hasNext()) {
            SAMRecord rec = progress.watch(iter.next());
            String readName = rec.getReadName();
            int fastq_index = 0;
            if (rec.getReadPairedFlag()) {
                if (fastqReaders.length != 2) {
                    LOG.error("Not paired but number of fastq!=2");
                    return -1;
                }
                fastq_index = (rec.getFirstOfPairFlag() ? 0 : 1);
            } else {
                if (fastqReaders.length != 1) {
                    LOG.error("Not paired but number of fastq!=1");
                    return -1;
                }
                fastq_index = 0;
            }
            if (sortOrder == SAMFileHeader.SortOrder.queryname) {
                while (currFastq[fastq_index] == null || normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) < 0) {
                    if (!fastqReaders[fastq_index].hasNext()) {
                        LOG.error("Read Missing for " + readName);
                        return -1;
                    }
                    currFastq[fastq_index] = fastqReaders[fastq_index].next();
                    if (normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) > 0) {
                        LOG.error("Read Missing for " + readName);
                        return -1;
                    }
                }
            } else {
                if (!fastqReaders[fastq_index].hasNext()) {
                    LOG.error("Read Missing for " + readName);
                    return -1;
                }
                currFastq[fastq_index] = fastqReaders[fastq_index].next();
            }
            if (normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) != 0) {
                LOG.error("Read Missing/Error for " + readName + " current:" + currFastq[fastq_index].getReadName());
                return -1;
            }
            String fastqBases = currFastq[fastq_index].getReadString();
            String fastqQuals = currFastq[fastq_index].getBaseQualityString();
            /* handle orientation */
            if (!rec.getReadUnmappedFlag() && rec.getReadNegativeStrandFlag()) {
                fastqBases = AcidNucleics.reverseComplement(fastqBases);
                StringBuilder sb = new StringBuilder(fastqQuals.length());
                for (int i = fastqQuals.length() - 1; i >= 0; --i) sb.append(fastqQuals.charAt(i));
                fastqQuals = sb.toString();
            }
            /* remove hard clip */
            Cigar cigar = rec.getCigar();
            if (cigar != null) {
                List<CigarElement> ceList = cigar.getCigarElements();
                if (!ceList.isEmpty()) {
                    CigarElement ce = ceList.get(ceList.size() - 1);
                    if (ce.getOperator() == CigarOperator.HARD_CLIP) {
                        fastqBases = fastqBases.substring(0, fastqBases.length() - ce.getLength());
                        fastqQuals = fastqQuals.substring(0, fastqQuals.length() - ce.getLength());
                    }
                    ce = ceList.get(0);
                    if (ce.getOperator() == CigarOperator.HARD_CLIP) {
                        fastqBases = fastqBases.substring(ce.getLength());
                        fastqQuals = fastqQuals.substring(ce.getLength());
                    }
                }
            }
            rec.setBaseQualityString(fastqQuals);
            rec.setReadString(fastqBases);
            samWriter.addAlignment(rec);
        }
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        if (fastqReaders != null)
            for (FastqReader r : fastqReaders) CloserUtil.close(r);
        CloserUtil.close(iter);
        CloserUtil.close(samReader);
        CloserUtil.close(samWriter);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) FastqRecord(htsjdk.samtools.fastq.FastqRecord) CigarElement(htsjdk.samtools.CigarElement) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) FastqReader(htsjdk.samtools.fastq.FastqReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Aggregations

SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)146 ArrayList (java.util.ArrayList)64 VariantContext (htsjdk.variant.variantcontext.VariantContext)59 VCFHeader (htsjdk.variant.vcf.VCFHeader)57 SAMRecord (htsjdk.samtools.SAMRecord)54 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)54 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)48 IOException (java.io.IOException)48 File (java.io.File)47 SamReader (htsjdk.samtools.SamReader)40 SAMFileHeader (htsjdk.samtools.SAMFileHeader)38 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)37 HashSet (java.util.HashSet)34 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)32 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)30 List (java.util.List)30 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)29 HashMap (java.util.HashMap)28 Parameter (com.beust.jcommander.Parameter)27 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)27