Search in sources :

Example 6 with Barcode

use of cz1.gbs.core.Barcode in project polyGembler by c-zhou.

the class ParseBarcodeRead method findBestBarcode.

/**
 * Returns the best barcode match for a given sequence.
 * @param queryS query sequence to be tested against all barcodes
 * @param maxDivergence maximum divergence to permit
 * @return best barcode match (null if no good match)
 */
Barcode findBestBarcode(String queryS, int maxDivergence) {
    long query = BaseEncoder.getLongFromSeq(queryS.substring(0, chunkSize));
    // note because the barcodes are polyA after the sequence, they should always
    // sort ahead of the hit, this is the reason for the -(closestHit+2)
    int closestHit = Arrays.binarySearch(quickBarcodeList, query);
    // Below is the old pipeline approach, which works (at least for maxDivergence of 0)
    if (closestHit < -1) {
        // should always be true, as the barcode+overhang is padded to 32 bases with polyA
        int index = quickMap.get(quickBarcodeList[-(closestHit + 2)]);
        if (theBarcodes[index].compareSequence(query, 1) == 0) {
            return theBarcodes[index];
        } else if (maxDivergence == 0) {
            // return null if not a perfect match
            return null;
        }
    } else {
        // should never go to this line
        return null;
    }
    int maxLength = 0, minDiv = maxDivergence + 1;
    Barcode bestBC = null;
    for (Barcode bc : theBarcodes) {
        int div = bc.compareSequence(query, maxDivergence + 1);
        if (div <= minDiv) {
            if ((div < minDiv) || (bc.getBarOverLength() > maxLength)) {
                minDiv = div;
                maxLength = bc.getBarOverLength();
                bestBC = bc;
            } else {
                // it is a tie, so return that not resolvable
                bestBC = null;
            }
        }
    }
    return bestBC;
}
Also used : Barcode(cz1.gbs.core.Barcode)

Example 7 with Barcode

use of cz1.gbs.core.Barcode in project polyGembler by c-zhou.

the class FastqToTagSequence method setParameters.

@Override
public void setParameters(String[] args) {
    // TODO Auto-generated method stub
    if (args.length == 0) {
        printUsage();
        throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
    }
    if (myArgsEngine == null) {
        myArgsEngine = new ArgsEngine();
        myArgsEngine.add("-i", "--input-fastq", true);
        myArgsEngine.add("-k", "--key-file", true);
        myArgsEngine.add("-e", "--enzyme", true);
        myArgsEngine.add("-q", "--min-qualS", true);
        myArgsEngine.add("-t", "--threads", true);
        myArgsEngine.add("-T", "--trim-leading", true);
        myArgsEngine.add("-b", "--unassgined-reads", true);
        myArgsEngine.add("-o", "--prefix", true);
        myArgsEngine.parse(args);
    }
    if (myArgsEngine.getBoolean("-i")) {
        myInputDirName = myArgsEngine.getString("-i");
    } else {
        printUsage();
        throw new IllegalArgumentException("Please specify the location of your FASTQ files.");
    }
    if (myArgsEngine.getBoolean("-k")) {
        myKeyfile = myArgsEngine.getString("-k");
    } else {
        printUsage();
        throw new IllegalArgumentException("Please specify a barcode key file.");
    }
    if (myArgsEngine.getBoolean("-e")) {
        myEnzyme = myArgsEngine.getString("-e").split("-");
    } else {
        myLogger.warn("No enzyme specified.  Using enzyme listed in key file.");
        try {
            BufferedReader br = Utils.getBufferedReader(myKeyfile);
            String[] s = br.readLine().split("\\s+");
            int k = -1;
            for (int i = 0; i < s.length; i++) if (s[i].toLowerCase().equals("enzyme"))
                k = i;
            if (k < 0)
                throw new IllegalArgumentException("No enzyme found in the key file. " + "Please specify the enzyme with -e option.\n\n");
            s = br.readLine().split("\\s+");
            myEnzyme = s[k].split("-");
            br.close();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    if (myArgsEngine.getBoolean("-q")) {
        myMinQualS = Integer.parseInt(myArgsEngine.getString("-q"));
    }
    if (myArgsEngine.getBoolean("-t")) {
        THREADS = Integer.parseInt(myArgsEngine.getString("-t"));
    }
    if (myArgsEngine.getBoolean("-T")) {
        int leading = Integer.parseInt(myArgsEngine.getString("-T"));
        if (leading > 0) {
            List<Integer> leadings = new ArrayList<Integer>();
            leadings.add(leading);
            for (int i = 1; i < 4; i++) {
                if (leading - i >= 0)
                    leadings.add(leading - i);
                leadings.add(leading + i);
            }
            myLeadingTrim = new int[leadings.size()];
            for (int i = 0; i < myLeadingTrim.length; i++) myLeadingTrim[i] = leadings.get(i);
        }
    }
    if (myArgsEngine.getBoolean("-o")) {
        myOutputDir = myArgsEngine.getString("-o");
    }
    this.makeOutputDir();
}
Also used : BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ArgsEngine(cz1.util.ArgsEngine)

Example 8 with Barcode

use of cz1.gbs.core.Barcode in project polyGembler by c-zhou.

the class SamFileExtract method setParameters.

@Override
public void setParameters(String[] args) {
    // TODO Auto-generated method stub
    if (args.length == 0) {
        printUsage();
        throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
    }
    if (myArgsEngine == null) {
        myArgsEngine = new ArgsEngine();
        myArgsEngine.add("-i", "--input-file", true);
        myArgsEngine.add("-b", "--bed-dir", true);
        myArgsEngine.add("-o", "--output-file", true);
        myArgsEngine.parse(args);
    }
    if (myArgsEngine.getBoolean("-i")) {
        bam_in = myArgsEngine.getString("-i");
    } else {
        printUsage();
        throw new IllegalArgumentException("Please specify the location of your FASTQ files.");
    }
    if (myArgsEngine.getBoolean("-b")) {
        bed_in = myArgsEngine.getString("-b");
    } else {
        printUsage();
        throw new IllegalArgumentException("Please specify a barcode key file.");
    }
    if (myArgsEngine.getBoolean("-o")) {
        bam_out = myArgsEngine.getString("-o");
    } else {
        myLogger.warn("No enzyme specified.  Using enzyme listed in key file.");
    }
}
Also used : ArgsEngine(cz1.util.ArgsEngine)

Example 9 with Barcode

use of cz1.gbs.core.Barcode in project polyGembler by c-zhou.

the class GBSSimulator method setParameters.

@Override
public void setParameters(String[] args) {
    // TODO Auto-generated method stub
    if (args.length == 0) {
        printUsage();
        throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
    }
    if (myArgsEngine == null) {
        myArgsEngine = new ArgsEngine();
        myArgsEngine.add("-f", "--fasta-file", true);
        myArgsEngine.add("-e", "--enzyme", true);
        myArgsEngine.add("-l", "--library", true);
        myArgsEngine.add("-t", "--threads", true);
        myArgsEngine.add("-b", "--barcode-file", true);
        myArgsEngine.add("-m", "--avg-depth", true);
        myArgsEngine.add("-s", "--sdev", true);
        myArgsEngine.add("-S", "--random-seed", true);
        myArgsEngine.add("-q", "--quality-file", true);
        myArgsEngine.add("-o", "--output-file", true);
        myArgsEngine.parse(args);
    }
    String fastaFileDir, enzymeName = "PstI", libPrepFilePath = null, barcodeFilePath = null, params = null, outputDir = "./";
    double avg = 5, sd = 5;
    long RANDOM_SEED = System.nanoTime();
    if (myArgsEngine.getBoolean("-f")) {
        fastaFileDir = myArgsEngine.getString("-f");
    } else {
        printUsage();
        throw new IllegalArgumentException("Please specify the FASTA files.");
    }
    if (myArgsEngine.getBoolean("-e")) {
        enzymeName = myArgsEngine.getString("-e");
    }
    if (myArgsEngine.getBoolean("-l")) {
        libPrepFilePath = myArgsEngine.getString("-l");
    }
    if (myArgsEngine.getBoolean("-b")) {
        barcodeFilePath = myArgsEngine.getString("-b");
    }
    if (myArgsEngine.getBoolean("-t")) {
        THREADS = Integer.parseInt(myArgsEngine.getString("-t"));
    }
    if (myArgsEngine.getBoolean("-m")) {
        avg = Double.parseDouble(myArgsEngine.getString("-m"));
    }
    if (myArgsEngine.getBoolean("-s")) {
        sd = Double.parseDouble(myArgsEngine.getString("-s"));
    }
    if (myArgsEngine.getBoolean("-S")) {
        RANDOM_SEED = Long.parseLong(myArgsEngine.getString("-S"));
    }
    if (myArgsEngine.getBoolean("-q")) {
        params = myArgsEngine.getString("-q");
    }
    if (myArgsEngine.getBoolean("-o")) {
        outputDir = myArgsEngine.getString("-o");
    }
    gbs = new GBS(fastaFileDir, enzymeName, avg, sd, params, libPrepFilePath, barcodeFilePath, outputDir, RANDOM_SEED);
}
Also used : GBS(cz1.simulation.model.GBS) ArgsEngine(cz1.util.ArgsEngine)

Aggregations

ArgsEngine (cz1.util.ArgsEngine)5 BufferedReader (java.io.BufferedReader)4 Barcode (cz1.gbs.core.Barcode)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 ReadBarcodeResult (cz1.gbs.core.ReadBarcodeResult)2 File (java.io.File)2 ParseBarcodeRead (cz1.gbs.model.ParseBarcodeRead)1 GBS (cz1.simulation.model.GBS)1 SAMRecord (htsjdk.samtools.SAMRecord)1 FileNotFoundException (java.io.FileNotFoundException)1 FileReader (java.io.FileReader)1 FilenameFilter (java.io.FilenameFilter)1 BitSet (java.util.BitSet)1 HashMap (java.util.HashMap)1