use of cz1.gbs.core.Barcode in project polyGembler by c-zhou.
the class ParseBarcodeRead method findBestBarcode.
/**
* Returns the best barcode match for a given sequence.
* @param queryS query sequence to be tested against all barcodes
* @param maxDivergence maximum divergence to permit
* @return best barcode match (null if no good match)
*/
Barcode findBestBarcode(String queryS, int maxDivergence) {
long query = BaseEncoder.getLongFromSeq(queryS.substring(0, chunkSize));
// note because the barcodes are polyA after the sequence, they should always
// sort ahead of the hit, this is the reason for the -(closestHit+2)
int closestHit = Arrays.binarySearch(quickBarcodeList, query);
// Below is the old pipeline approach, which works (at least for maxDivergence of 0)
if (closestHit < -1) {
// should always be true, as the barcode+overhang is padded to 32 bases with polyA
int index = quickMap.get(quickBarcodeList[-(closestHit + 2)]);
if (theBarcodes[index].compareSequence(query, 1) == 0) {
return theBarcodes[index];
} else if (maxDivergence == 0) {
// return null if not a perfect match
return null;
}
} else {
// should never go to this line
return null;
}
int maxLength = 0, minDiv = maxDivergence + 1;
Barcode bestBC = null;
for (Barcode bc : theBarcodes) {
int div = bc.compareSequence(query, maxDivergence + 1);
if (div <= minDiv) {
if ((div < minDiv) || (bc.getBarOverLength() > maxLength)) {
minDiv = div;
maxLength = bc.getBarOverLength();
bestBC = bc;
} else {
// it is a tie, so return that not resolvable
bestBC = null;
}
}
}
return bestBC;
}
use of cz1.gbs.core.Barcode in project polyGembler by c-zhou.
the class FastqToTagSequence method setParameters.
@Override
public void setParameters(String[] args) {
// TODO Auto-generated method stub
if (args.length == 0) {
printUsage();
throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
}
if (myArgsEngine == null) {
myArgsEngine = new ArgsEngine();
myArgsEngine.add("-i", "--input-fastq", true);
myArgsEngine.add("-k", "--key-file", true);
myArgsEngine.add("-e", "--enzyme", true);
myArgsEngine.add("-q", "--min-qualS", true);
myArgsEngine.add("-t", "--threads", true);
myArgsEngine.add("-T", "--trim-leading", true);
myArgsEngine.add("-b", "--unassgined-reads", true);
myArgsEngine.add("-o", "--prefix", true);
myArgsEngine.parse(args);
}
if (myArgsEngine.getBoolean("-i")) {
myInputDirName = myArgsEngine.getString("-i");
} else {
printUsage();
throw new IllegalArgumentException("Please specify the location of your FASTQ files.");
}
if (myArgsEngine.getBoolean("-k")) {
myKeyfile = myArgsEngine.getString("-k");
} else {
printUsage();
throw new IllegalArgumentException("Please specify a barcode key file.");
}
if (myArgsEngine.getBoolean("-e")) {
myEnzyme = myArgsEngine.getString("-e").split("-");
} else {
myLogger.warn("No enzyme specified. Using enzyme listed in key file.");
try {
BufferedReader br = Utils.getBufferedReader(myKeyfile);
String[] s = br.readLine().split("\\s+");
int k = -1;
for (int i = 0; i < s.length; i++) if (s[i].toLowerCase().equals("enzyme"))
k = i;
if (k < 0)
throw new IllegalArgumentException("No enzyme found in the key file. " + "Please specify the enzyme with -e option.\n\n");
s = br.readLine().split("\\s+");
myEnzyme = s[k].split("-");
br.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
if (myArgsEngine.getBoolean("-q")) {
myMinQualS = Integer.parseInt(myArgsEngine.getString("-q"));
}
if (myArgsEngine.getBoolean("-t")) {
THREADS = Integer.parseInt(myArgsEngine.getString("-t"));
}
if (myArgsEngine.getBoolean("-T")) {
int leading = Integer.parseInt(myArgsEngine.getString("-T"));
if (leading > 0) {
List<Integer> leadings = new ArrayList<Integer>();
leadings.add(leading);
for (int i = 1; i < 4; i++) {
if (leading - i >= 0)
leadings.add(leading - i);
leadings.add(leading + i);
}
myLeadingTrim = new int[leadings.size()];
for (int i = 0; i < myLeadingTrim.length; i++) myLeadingTrim[i] = leadings.get(i);
}
}
if (myArgsEngine.getBoolean("-o")) {
myOutputDir = myArgsEngine.getString("-o");
}
this.makeOutputDir();
}
use of cz1.gbs.core.Barcode in project polyGembler by c-zhou.
the class SamFileExtract method setParameters.
@Override
public void setParameters(String[] args) {
// TODO Auto-generated method stub
if (args.length == 0) {
printUsage();
throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
}
if (myArgsEngine == null) {
myArgsEngine = new ArgsEngine();
myArgsEngine.add("-i", "--input-file", true);
myArgsEngine.add("-b", "--bed-dir", true);
myArgsEngine.add("-o", "--output-file", true);
myArgsEngine.parse(args);
}
if (myArgsEngine.getBoolean("-i")) {
bam_in = myArgsEngine.getString("-i");
} else {
printUsage();
throw new IllegalArgumentException("Please specify the location of your FASTQ files.");
}
if (myArgsEngine.getBoolean("-b")) {
bed_in = myArgsEngine.getString("-b");
} else {
printUsage();
throw new IllegalArgumentException("Please specify a barcode key file.");
}
if (myArgsEngine.getBoolean("-o")) {
bam_out = myArgsEngine.getString("-o");
} else {
myLogger.warn("No enzyme specified. Using enzyme listed in key file.");
}
}
use of cz1.gbs.core.Barcode in project polyGembler by c-zhou.
the class GBSSimulator method setParameters.
@Override
public void setParameters(String[] args) {
// TODO Auto-generated method stub
if (args.length == 0) {
printUsage();
throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
}
if (myArgsEngine == null) {
myArgsEngine = new ArgsEngine();
myArgsEngine.add("-f", "--fasta-file", true);
myArgsEngine.add("-e", "--enzyme", true);
myArgsEngine.add("-l", "--library", true);
myArgsEngine.add("-t", "--threads", true);
myArgsEngine.add("-b", "--barcode-file", true);
myArgsEngine.add("-m", "--avg-depth", true);
myArgsEngine.add("-s", "--sdev", true);
myArgsEngine.add("-S", "--random-seed", true);
myArgsEngine.add("-q", "--quality-file", true);
myArgsEngine.add("-o", "--output-file", true);
myArgsEngine.parse(args);
}
String fastaFileDir, enzymeName = "PstI", libPrepFilePath = null, barcodeFilePath = null, params = null, outputDir = "./";
double avg = 5, sd = 5;
long RANDOM_SEED = System.nanoTime();
if (myArgsEngine.getBoolean("-f")) {
fastaFileDir = myArgsEngine.getString("-f");
} else {
printUsage();
throw new IllegalArgumentException("Please specify the FASTA files.");
}
if (myArgsEngine.getBoolean("-e")) {
enzymeName = myArgsEngine.getString("-e");
}
if (myArgsEngine.getBoolean("-l")) {
libPrepFilePath = myArgsEngine.getString("-l");
}
if (myArgsEngine.getBoolean("-b")) {
barcodeFilePath = myArgsEngine.getString("-b");
}
if (myArgsEngine.getBoolean("-t")) {
THREADS = Integer.parseInt(myArgsEngine.getString("-t"));
}
if (myArgsEngine.getBoolean("-m")) {
avg = Double.parseDouble(myArgsEngine.getString("-m"));
}
if (myArgsEngine.getBoolean("-s")) {
sd = Double.parseDouble(myArgsEngine.getString("-s"));
}
if (myArgsEngine.getBoolean("-S")) {
RANDOM_SEED = Long.parseLong(myArgsEngine.getString("-S"));
}
if (myArgsEngine.getBoolean("-q")) {
params = myArgsEngine.getString("-q");
}
if (myArgsEngine.getBoolean("-o")) {
outputDir = myArgsEngine.getString("-o");
}
gbs = new GBS(fastaFileDir, enzymeName, avg, sd, params, libPrepFilePath, barcodeFilePath, outputDir, RANDOM_SEED);
}
Aggregations