Search in sources :

Example 1 with Barcode

use of cz1.gbs.core.Barcode in project polyGembler by c-zhou.

the class ParseBarcodeRead method setupBarcodeFiles.

/**
 * Reads in an Illumina key file, creates a linear array of {@link Barcode} objects
 * representing the barcodes in the key file, then creates a hash map containing
 * indices from the linear array indexed by sequence.  The names of barcode objects
 * follow the pattern samplename:flowcell:lane:well, since sample names alone are not unique.
 *
 * @param keyFile Illumina key file.
 * @param flowcell Only barcodes from this flowcell will be added to the array.
 * @param lane Only barcodes from this lane will be added to the array.
 * @return Number of barcodes in the array.
 */
private int setupBarcodeFiles(File keyFile, String flowcell, String lane) {
    try {
        BufferedReader br = new BufferedReader(new FileReader(keyFile), 65536);
        ArrayList<Barcode> theBarcodesArrayList = new ArrayList<Barcode>();
        String temp;
        int k = 0;
        while (((temp = br.readLine()) != null)) {
            // split by whitespace
            String[] s = temp.split("\\t");
            Barcode theBC = null;
            if (s[0].equals(flowcell) && s[1].equals(lane)) {
                String well = (s[6].length() < 2) ? (s[5] + '0' + s[6]) : s[5] + s[6];
                if (s.length < 8 || s[7] == null || s[7].equals("")) {
                    // use the plate and well
                    theBC = new Barcode(s[2], initialCutSiteRemnant, s[3] + ":" + s[0] + ":" + s[1] + ":" + s[4] + ":" + well, flowcell, lane, k++);
                } else {
                    // use the "libraryPlateWellID" or whatever is in column H of the key file, IF it is an integer
                    try {
                        int libPrepID = Integer.parseInt(s[7]);
                        theBC = new Barcode(s[2], initialCutSiteRemnant, s[3] + ":" + s[0] + ":" + s[1] + ":" + libPrepID, flowcell, lane, k++);
                    } catch (NumberFormatException nfe) {
                        theBC = new Barcode(s[2], initialCutSiteRemnant, s[3] + ":" + s[0] + ":" + s[1] + ":" + s[4] + ":" + well, flowcell, lane, k++);
                    }
                }
                theBarcodesArrayList.add(theBC);
                System.out.println(theBC.getBarcodeString() + " " + theBC.getTaxaName());
            }
        }
        br.close();
        theBarcodes = new Barcode[theBarcodesArrayList.size()];
        theBarcodesArrayList.toArray(theBarcodes);
        Arrays.sort(theBarcodes);
        int nBL = theBarcodes[0].getBarOverLong().length;
        quickBarcodeList = new long[theBarcodes.length * nBL];
        quickMap = new HashMap<Long, Integer>();
        for (int i = 0; i < theBarcodes.length; i++) {
            for (int j = 0; j < nBL; j++) {
                quickBarcodeList[i * nBL + j] = theBarcodes[i].getBarOverLong()[j];
                quickMap.put(theBarcodes[i].getBarOverLong()[j], i);
            }
        }
        Arrays.sort(quickBarcodeList);
    } catch (Exception e) {
        System.out.println("Error with setupBarcodeFiles: " + e);
    }
    return theBarcodes.length;
}
Also used : ArrayList(java.util.ArrayList) Barcode(cz1.gbs.core.Barcode) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader)

Example 2 with Barcode

use of cz1.gbs.core.Barcode in project polyGembler by c-zhou.

the class ParseBarcodeRead method parseReadIntoTagAndTaxa.

/**
 * The barcode libraries used for this study can include two types of
 * extraneous sequence at the end of reads. The first are chimeras created
 * with the free ends. These will recreate the restriction site. The second
 * are short regions (less than 64bp), so that will they will contain a
 * portion of site and the universal adapter. This finds the first of site
 * in likelyReadEnd, keeps the restriction site overhang and then sets
 * everything to polyA afterwards
 *
 * @param seq An unprocessed tag sequence.
 * @param maxLength The maximum number of bp in the processed sequence.
 * @return returnValue A ReadBarcodeResult object containing the unprocessed
 * tag, Cut site position, Processed tag, and Poly-A padded tag.
 */
public ReadBarcodeResult parseReadIntoTagAndTaxa(String seqS, String qualS, int minQual) {
    if (seqS == null)
        return null;
    if (minQual != 0 && seqS != null) {
        try {
            final char[] seqC = (char[]) field.get(seqS), qualC = (char[]) field.get(qualS);
            int len = seqC.length;
            for (int i = 0; i < len; i++) if (qualC[i] - 33 < minQual)
                seqC[i] = 'N';
            seqS = String.valueOf(seqC);
        } catch (IllegalArgumentException | IllegalAccessException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    Barcode bestBarcode = findBestBarcode(seqS, maximumMismatchInBarcodeAndOverhang);
    if (bestBarcode == null) {
        // overhang missing so skip
        return null;
    }
    seqS = StringUtils.strip(seqS.substring(bestBarcode.getBarLength()), "N");
    if (seqS.length() < 32)
        return null;
    int cutSitePosition = seqS.length();
    for (String end : likelyReadEnd) {
        int w = seqS.indexOf(end);
        if (w >= 0 && w < cutSitePosition)
            cutSitePosition = w;
    }
    String processedSeqS = StringUtils.strip(seqS.substring(0, cutSitePosition), "N");
    if (processedSeqS.length() >= 32)
        return new ReadBarcodeResult(BaseEncoder.getBitSetFromSeq(processedSeqS), bestBarcode.getTaxaId());
    return null;
}
Also used : Barcode(cz1.gbs.core.Barcode) ReadBarcodeResult(cz1.gbs.core.ReadBarcodeResult)

Example 3 with Barcode

use of cz1.gbs.core.Barcode in project polyGembler by c-zhou.

the class FastqToTagSequence method run.

@Override
public void run() {
    String[] countFileNames = null;
    File inputDirectory = new File(this.myInputDirName);
    File[] fastqFiles = inputDirectory.listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return name.matches("(?i).*\\.fq$|.*\\.fq\\.gz$|.*\\.fastq$|.*_fastq\\.txt$|.*_fastq\\.gz$|.*_fastq\\.txt\\.gz$|.*_sequence\\.txt$|.*_sequence\\.txt\\.gz$");
        // (?i) denotes case insensitive;                 \\. denotes escape . so it doesn't mean 'any char' & escape the backslash
        }
    });
    if (fastqFiles.length == 0 || fastqFiles == null) {
        myLogger.warn("Couldn't find any files that end with \".fq\", \".fq.gz\", \".fastq\", \"_fastq.txt\", \"_fastq.gz\", \"_fastq.txt.gz\", \"_sequence.txt\", or \"_sequence.txt.gz\" in the supplied directory.");
        return;
    } else {
        myLogger.info("Using the following FASTQ files:");
        countFileNames = new String[fastqFiles.length];
        for (int i = 0; i < fastqFiles.length; i++) {
            countFileNames[i] = fastqFiles[i].getName().replaceAll("(?i)\\.fq$|\\.fq\\.gz$|\\.fastq$|_fastq\\.txt$|_fastq\\.gz$|_fastq\\.txt\\.gz$|_sequence\\.txt$|_sequence\\.txt\\.gz$", "");
            // \\. escape . so it doesn't mean 'any char' & escape the backslash
            myLogger.info(fastqFiles[i].getAbsolutePath());
        }
    }
    for (int laneNum = 0; laneNum < fastqFiles.length; laneNum++) {
        // for (int laneNum = 2; laneNum < 3; laneNum++) {
        if (new File(myOutputDir + System.getProperty("file.separator") + countFileNames[laneNum] + ".cnt.gz").exists()) {
            myLogger.info("Fastq file " + fastqFiles[laneNum] + " skipped.");
            continue;
        }
        File outputFile = new File(this.myOutputDir + File.separator + countFileNames[laneNum]);
        if (outputFile.isFile()) {
            myLogger.warn("An output file " + countFileNames[laneNum] + "\n" + " already exists in the output directory for file " + fastqFiles[laneNum] + ".  Skipping.");
            continue;
        }
        myLogger.info("Reading FASTQ file: " + fastqFiles[laneNum]);
        String[] filenameField = fastqFiles[laneNum].getName().split("_");
        thePBR = new ParseBarcodeRead[this.myEnzyme.length];
        for (int i = 0; i < this.myEnzyme.length; i++) {
            if (filenameField.length == 3) {
                thePBR[i] = new ParseBarcodeRead(this.myKeyfile, this.myEnzyme[i], filenameField[0], filenameField[1]);
            } else if (filenameField.length == 4) {
                thePBR[i] = new ParseBarcodeRead(this.myKeyfile, this.myEnzyme[i], filenameField[0], filenameField[2]);
            } else // B08AAABXX_s_1_sequence.txt.gz
            if (filenameField.length == 5) {
                thePBR[i] = new ParseBarcodeRead(this.myKeyfile, this.myEnzyme[i], filenameField[1], filenameField[3]);
            } else {
                myLogger.error("Error in parsing file name: " + fastqFiles[laneNum]);
                myLogger.error("   The filename does not contain either 3, 4, or 5 underscore-delimited values.");
                myLogger.error("   Expect: flowcell_lane_fastq.txt.gz OR flowcell_s_lane_fastq.txt.gz OR code_flowcell_s_lane_fastq.txt.gz");
                continue;
            }
        }
        taxa = thePBR[0].getSortedTaxaNames();
        n = taxa.length;
        os = countFileNames[laneNum];
        volume = 0;
        myLogger.info("Total barcodes found in lane:" + thePBR[0].getBarCodeCount());
        if (thePBR[0].getBarCodeCount() == 0) {
            myLogger.warn("No barcodes found.  Skipping this flowcell lane.");
            continue;
        }
        String[] taxaNames = new String[thePBR[0].getBarCodeCount()];
        for (int i = 0; i < taxaNames.length; i++) {
            taxaNames[i] = thePBR[0].getTheBarcodes(i).getTaxaName();
        }
        long start = System.currentTimeMillis();
        this.initial_thread_pool();
        try {
            BufferedReader br = Utils.getBufferedReader(fastqFiles[laneNum], 65536);
            int block = 10000;
            String[][] Qs = new String[block][2];
            int k = 0;
            allReads = 0;
            goodBarcodedReads = 0;
            tags = 0;
            String temp = br.readLine();
            while (temp != null) {
                try {
                    Qs[k][0] = br.readLine();
                    br.readLine();
                    Qs[k][1] = br.readLine();
                } catch (NullPointerException e) {
                    myLogger.error("Unable to correctly parse the sequence from fastq file.  " + "Your fastq file may have been corrupted.");
                    System.exit(1);
                }
                k++;
                temp = br.readLine();
                if (k == block || temp == null) {
                    if (usedMemory() / maxMemory() > load) {
                        this.waitFor();
                        writeHardDisk();
                        this.initial_thread_pool();
                    }
                    executor.submit(new Runnable() {

                        private String[][] fastq;

                        @Override
                        public void run() {
                            // TODO Auto-generated method stub
                            try {
                                final Map<BitSet, short[]> block_tagCounts = new HashMap<BitSet, short[]>();
                                int block_allReads = 0, block_goodBarcodedReads = 0;
                                ReadBarcodeResult rr = null;
                                BitSet key;
                                for (int i = 0; i < fastq.length; i++) {
                                    if (fastq[i][0] == null)
                                        break;
                                    // synchronized(lock) {
                                    // allReads++;
                                    // }
                                    block_allReads++;
                                    outerloop: for (int j = 0; j < myLeadingTrim.length; j++) {
                                        for (int k = 0; k < thePBR.length; k++) {
                                            rr = thePBR[k].parseReadIntoTagAndTaxa(fastq[i][0].substring(myLeadingTrim[j]), fastq[i][1].substring(myLeadingTrim[j]), myMinQualS);
                                            if (rr != null)
                                                break outerloop;
                                        }
                                    }
                                    if (rr != null) {
                                        key = rr.read;
                                        /**
                                         *											synchronized(lock) {
                                         *												goodBarcodedReads++;
                                         *												if (allReads % 1000000 == 0) {
                                         *													myLogger.info("Total Reads:" + allReads +
                                         *															" Reads with barcode and cut site overhang:" +
                                         *															goodBarcodedReads);
                                         *												}
                                         *												if(!tagCounts.containsKey(key)) {
                                         *													tagCounts.put(key, new short[n]);
                                         *													tags++;
                                         *												}
                                         *												tagCounts.get(key)[rr.taxonId]++;
                                         *											}
                                         */
                                        block_goodBarcodedReads++;
                                        if (!block_tagCounts.containsKey(key)) {
                                            block_tagCounts.put(key, new short[n]);
                                        }
                                        block_tagCounts.get(key)[rr.taxonId]++;
                                    }
                                }
                                synchronized (lock) {
                                    allReads += block_allReads;
                                    goodBarcodedReads += block_goodBarcodedReads;
                                    // goodBarcodedReads);
                                    for (BitSet bs : block_tagCounts.keySet()) {
                                        if (!tagCounts.containsKey(bs)) {
                                            tags++;
                                            tagCounts.put(bs, block_tagCounts.get(bs));
                                        } else {
                                            short[] copy = tagCounts.get(bs);
                                            short[] block_copy = block_tagCounts.get(bs);
                                            for (int i = 0; i < n; i++) copy[i] += block_copy[i];
                                        }
                                    }
                                }
                            } catch (Exception e) {
                                Thread t = Thread.currentThread();
                                t.getUncaughtExceptionHandler().uncaughtException(t, e);
                                e.printStackTrace();
                                executor.shutdown();
                                System.exit(1);
                            }
                        }

                        public Runnable init(String[][] fastq) {
                            this.fastq = fastq;
                            return (this);
                        }
                    }.init(Qs));
                    k = 0;
                    Qs = new String[block][2];
                }
            }
            if (!tagCounts.isEmpty()) {
                this.waitFor();
                writeHardDisk();
            }
            br.close();
            myLogger.info("Total number of reads in lane=" + allReads);
            myLogger.info("Total number of good barcoded reads=" + goodBarcodedReads);
            myLogger.info("Total number of tags=" + tags);
            myLogger.info("Process took " + (System.currentTimeMillis() - start) / 1000 + " seconds.");
        } catch (Exception e) {
            e.printStackTrace();
        }
        myLogger.info("Finished reading " + (laneNum + 1) + " of " + fastqFiles.length + " sequence files.");
        tagCounts.clear();
    }
}
Also used : HashMap(java.util.HashMap) BitSet(java.util.BitSet) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) FilenameFilter(java.io.FilenameFilter) ParseBarcodeRead(cz1.gbs.model.ParseBarcodeRead) BufferedReader(java.io.BufferedReader) ReadBarcodeResult(cz1.gbs.core.ReadBarcodeResult) File(java.io.File)

Example 4 with Barcode

use of cz1.gbs.core.Barcode in project polyGembler by c-zhou.

the class GBSpileup method setParameters.

@Override
public void setParameters(String[] args) {
    // TODO Auto-generated method stub
    if (args.length == 0) {
        printUsage();
        throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
    }
    if (myArgsEngine == null) {
        myArgsEngine = new ArgsEngine();
        myArgsEngine.add("-i", "--input-fastq", true);
        myArgsEngine.add("-k", "--key-file", true);
        myArgsEngine.add("-e", "--enzyme", true);
        myArgsEngine.add("-q", "--min-qualS", true);
        myArgsEngine.add("-p", "--ploidy", true);
        myArgsEngine.add("-t", "--threads", true);
        myArgsEngine.add("-T", "--trim-leading", true);
        myArgsEngine.add("-b", "--unassgined-reads", true);
        myArgsEngine.add("-f", "--reference", true);
        myArgsEngine.add("-z", "--skip-freebayes", false);
        myArgsEngine.add("-x", "--max-coverage", true);
        myArgsEngine.add("-o", "--prefix", true);
        myArgsEngine.parse(args);
    }
    if (myArgsEngine.getBoolean("-i")) {
        myInputDirName = myArgsEngine.getString("-i");
    } else {
        printUsage();
        throw new IllegalArgumentException("Please specify the location of your FASTQ files.");
    }
    if (myArgsEngine.getBoolean("-k")) {
        myKeyfile = myArgsEngine.getString("-k");
    } else {
        printUsage();
        throw new IllegalArgumentException("Please specify a barcode key file.");
    }
    if (myArgsEngine.getBoolean("-z")) {
        this.skipFB = true;
    } else {
        this.require("freebayes");
    }
    if (myArgsEngine.getBoolean("-f")) {
        myReference = myArgsEngine.getString("-f");
        if (!new File(myReference + ".amb").exists() || !new File(myReference + ".ann").exists() || !new File(myReference + ".bwt").exists() || !new File(myReference + ".pac").exists() || !new File(myReference + ".sa").exists()) {
            String index = "bwa index -p " + myReference + " -a bwtsw " + myReference;
            this.consume(this.bash(index));
        }
    } else {
        printUsage();
        throw new IllegalArgumentException("Please specify the reference.");
    }
    if (myArgsEngine.getBoolean("-e")) {
        myEnzyme = myArgsEngine.getString("-e").split("-");
    } else {
        myLogger.warn("No enzyme specified.  Using enzyme listed in key file.");
        try {
            BufferedReader br = Utils.getBufferedReader(myKeyfile);
            String[] s = br.readLine().split("\\s+");
            int k = -1;
            for (int i = 0; i < s.length; i++) if (s[i].toLowerCase().equals("enzyme"))
                k = i;
            if (k < 0)
                throw new IllegalArgumentException("No enzyme found in the key file. " + "Please specify the enzyme with -e option.\n\n");
            s = br.readLine().split("\\s+");
            myEnzyme = s[k].split("-");
            br.close();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    if (myArgsEngine.getBoolean("-q")) {
        myMinQualS = Integer.parseInt(myArgsEngine.getString("-q"));
    }
    if (myArgsEngine.getBoolean("-p")) {
        myPloidy = Integer.parseInt(myArgsEngine.getString("-p"));
    }
    if (myArgsEngine.getBoolean("-t")) {
        THREADS = Integer.parseInt(myArgsEngine.getString("-t"));
    }
    if (myArgsEngine.getBoolean("-x")) {
        maxCov = Long.parseLong(myArgsEngine.getString("-x"));
    }
    if (myArgsEngine.getBoolean("-T")) {
        int leading = Integer.parseInt(myArgsEngine.getString("-T"));
        if (leading > 0) {
            List<Integer> leadings = new ArrayList<Integer>();
            leadings.add(leading);
            for (int i = 1; i < 4; i++) {
                if (leading - i >= 0)
                    leadings.add(leading - i);
                leadings.add(leading + i);
            }
            myLeadingTrim = new int[leadings.size()];
            for (int i = 0; i < myLeadingTrim.length; i++) myLeadingTrim[i] = leadings.get(i);
        }
    }
    if (myArgsEngine.getBoolean("-o")) {
        myOutputDir = myArgsEngine.getString("-o");
    }
    File out = new File(myOutputDir);
    if (out.exists() && out.isDirectory()) {
        myLogger.warn("Output directory " + myOutputDir + " exsits. " + "We strongly recommend a new location.");
    }
}
Also used : BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) IOException(java.io.IOException) File(java.io.File) ArgsEngine(cz1.util.ArgsEngine)

Example 5 with Barcode

use of cz1.gbs.core.Barcode in project polyGembler by c-zhou.

the class TenXSamtools method setParameters.

@Override
public void setParameters(String[] args) {
    // TODO Auto-generated method stub
    if (args.length == 0) {
        printUsage();
        throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
    }
    switch(args[0].toUpperCase()) {
        case "SORT":
            this.task = Task.sort;
            break;
        default:
            printUsage();
            throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
    }
    String[] args2 = new String[args.length - 1];
    System.arraycopy(args, 1, args2, 0, args2.length);
    switch(this.task) {
        case sort:
            if (myArgsEngine == null) {
                myArgsEngine = new ArgsEngine();
                myArgsEngine.add("-i", "--in-bam", true);
                myArgsEngine.add("-b", "--barcode", false);
                myArgsEngine.add("-n", "--name", false);
                myArgsEngine.add("-s", "--batch-size", true);
                myArgsEngine.add("-t", "--threads", true);
                myArgsEngine.add("-o", "--out-bam", true);
                myArgsEngine.parse(args2);
            }
            if (myArgsEngine.getBoolean("-i")) {
                this.bam_in = myArgsEngine.getString("-i");
            } else {
                printUsage();
                throw new IllegalArgumentException("Please specify the input BAM file.");
            }
            if (myArgsEngine.getBoolean("-b") && myArgsEngine.getBoolean("-n")) {
                printUsage();
                throw new IllegalArgumentException("Options -b and -n are exculsive!!!");
            }
            if (myArgsEngine.getBoolean("-b")) {
                this.sort_order = Order.barcode;
                comprator = new Comparator<SAMRecord>() {

                    @Override
                    public int compare(SAMRecord record1, SAMRecord record2) {
                        // TODO Auto-generated method stub
                        if (record1 == null && record2 == null)
                            return 0;
                        if (record1 == null)
                            return 1;
                        if (record2 == null)
                            return -1;
                        String bx1 = record1.getStringAttribute("BX"), bx2 = record2.getStringAttribute("BX");
                        if (bx1 == null && bx2 == null)
                            return compareSAMRecord(record1, record2);
                        // none barcode record to the end
                        if (bx1 == null)
                            return 1;
                        if (bx2 == null)
                            return -1;
                        int diff = bx1.compareTo(bx2);
                        return diff == 0 ? compareSAMRecord(record1, record2) : diff;
                    }
                };
            }
            if (myArgsEngine.getBoolean("-n")) {
                this.sort_order = Order.queryname;
                comprator = new Comparator<SAMRecord>() {

                    @Override
                    public int compare(SAMRecord record1, SAMRecord record2) {
                        // TODO Auto-generated method stub
                        if (record1 == null && record2 == null)
                            return 0;
                        if (record1 == null)
                            return 1;
                        if (record2 == null)
                            return -1;
                        if (record1.getReadName().compareTo(record2.getReadName()) == 0)
                            return compareSAMRecord(record1, record2);
                        return record1.getReadName().compareTo(record2.getReadName());
                    }
                };
            }
            if (myArgsEngine.getBoolean("-s")) {
                this.batch_size = Integer.parseInt(myArgsEngine.getString("-s"));
            }
            if (myArgsEngine.getBoolean("-t")) {
                this.THREADS = Integer.parseInt(myArgsEngine.getString("-t"));
            }
            if (myArgsEngine.getBoolean("-o")) {
                this.bam_out = myArgsEngine.getString("-o");
            } else {
                printUsage();
                throw new IllegalArgumentException("Please specify the output BAM file.");
            }
            break;
        default:
            throw new RuntimeException("!!!");
    }
}
Also used : SAMRecord(htsjdk.samtools.SAMRecord) ArgsEngine(cz1.util.ArgsEngine)

Aggregations

ArgsEngine (cz1.util.ArgsEngine)5 BufferedReader (java.io.BufferedReader)4 Barcode (cz1.gbs.core.Barcode)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 ReadBarcodeResult (cz1.gbs.core.ReadBarcodeResult)2 File (java.io.File)2 ParseBarcodeRead (cz1.gbs.model.ParseBarcodeRead)1 GBS (cz1.simulation.model.GBS)1 SAMRecord (htsjdk.samtools.SAMRecord)1 FileNotFoundException (java.io.FileNotFoundException)1 FileReader (java.io.FileReader)1 FilenameFilter (java.io.FilenameFilter)1 BitSet (java.util.BitSet)1 HashMap (java.util.HashMap)1