use of net.sf.samtools.SAMRecord in project SeqMonk by s-andrews.
the class BAMFileParser method setOptionsFromFile.
private void setOptionsFromFile(File file) {
// This just reads the first few thousand lines from the first file and
// tries to set the preferences options to the correct defaults.
SAMFileReader.setDefaultValidationStringency(SAMFileReader.ValidationStringency.SILENT);
SAMFileReader inputSam = new SAMFileReader(file);
int lineCount = 0;
// Now process the file
boolean pairedEnd = false;
boolean spliced = false;
int maxQ = 0;
for (SAMRecord samRecord : inputSam) {
++lineCount;
if (lineCount == 100000)
break;
if (samRecord.getMappingQuality() > maxQ) {
maxQ = samRecord.getMappingQuality();
}
if (samRecord.getCigarString().contains("N")) {
spliced = true;
}
if (samRecord.getReadPairedFlag()) {
pairedEnd = true;
}
}
inputSam.close();
prefs.setPairedEnd(pairedEnd);
prefs.setSpliced(spliced);
prefs.setMinMappingQuality(Math.min(maxQ, 20));
}
use of net.sf.samtools.SAMRecord in project SeqMonk by s-andrews.
the class BAMFileParser method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
pairedEndImport = prefs.pairedEnd();
pairedEndDistance = prefs.pairDistanceCutoff();
separateSplicedReads = prefs.splitSplicedReads();
importIntrons = prefs.importIntrons();
extendBy = prefs.extendReads();
minMappingQuality = prefs.minMappingQuality();
primaryAlignmentsOnly = prefs.primaryAlignmentsOnly();
File[] samFiles = getFiles();
DataSet[] newData = new DataSet[samFiles.length];
try {
for (int f = 0; f < samFiles.length; f++) {
SAMFileReader.setDefaultValidationStringency(SAMFileReader.ValidationStringency.SILENT);
SAMFileReader inputSam = new SAMFileReader(samFiles[f]);
if (prefs.isHiC()) {
newData[f] = new PairedDataSet(samFiles[f].getName(), samFiles[f].getCanonicalPath(), prefs.removeDuplicates(), prefs.hiCDistance(), prefs.hiCIgnoreTrans());
} else {
newData[f] = new DataSet(samFiles[f].getName(), samFiles[f].getCanonicalPath(), prefs.removeDuplicates());
}
int lineCount = 0;
// Now process the file
// A flag we can set to skip the next record if we're getting
// out of sync during single end HiC import.
boolean skipNext = false;
for (SAMRecord samRecord : inputSam) {
if (skipNext) {
skipNext = false;
continue;
}
if (cancel) {
inputSam.close();
progressCancelled();
return;
}
++lineCount;
if (lineCount % 100000 == 0) {
progressUpdated("Read " + lineCount + " lines from " + samFiles[f].getName(), f, samFiles.length);
}
if (pairedEndImport && !samRecord.getReadPairedFlag()) {
progressWarningReceived(new SeqMonkException("Data was single ended during paired end import"));
continue;
}
if (samRecord.getReadUnmappedFlag()) {
// There was no match
continue;
}
if (primaryAlignmentsOnly && samRecord.getNotPrimaryAlignmentFlag()) {
// alignments
continue;
}
if (pairedEndImport && !separateSplicedReads && samRecord.getMateUnmappedFlag()) {
// No match on the reverse strand. Doesn't matter if we're doing spliced reads.
continue;
}
if (minMappingQuality > 0 && samRecord.getMappingQuality() < minMappingQuality) {
if (prefs.isHiC()) {
if (((PairedDataSet) newData[f]).importSequenceSkipped()) {
// Skip the next line
skipNext = true;
}
}
continue;
}
if (pairedEndImport && !separateSplicedReads && !samRecord.getReadNegativeStrandFlag()) {
// field.
continue;
}
if (pairedEndImport && !separateSplicedReads && !samRecord.getReferenceName().equals(samRecord.getMateReferenceName())) {
// progressWarningReceived(new SeqMonkException("Paired reads mapped to different chromosomes"));
continue;
}
// TODO: Check what this actually stores - might be a real name rather than 0/=
if (pairedEndImport && !separateSplicedReads && !prefs.isHiC() && samRecord.getMateReferenceName() == "0") {
if (samRecord.getMateReferenceName() != "=") {
inputSam.close();
throw new SeqMonkException("Unexpected mate referenece name " + samRecord.getMateReferenceName());
}
// Matches were on different chromosomes
continue;
}
try {
if (pairedEndImport && !separateSplicedReads) {
SequenceReadWithChromosome read = getPairedEndRead(samRecord);
newData[f].addData(read.chromosome, read.read);
} else if (separateSplicedReads) {
SequenceReadWithChromosome[] reads = getSplitSingleEndRead(samRecord);
for (int r = 0; r < reads.length; r++) {
newData[f].addData(reads[r].chromosome, reads[r].read);
}
} else {
SequenceReadWithChromosome read = getSingleEndRead(samRecord);
newData[f].addData(read.chromosome, read.read);
}
} catch (SeqMonkException ex) {
progressWarningReceived(ex);
if (prefs.isHiC()) {
if (((PairedDataSet) newData[f]).importSequenceSkipped()) {
// Skip the next line
skipNext = true;
}
}
}
}
// We're finished with the file.
inputSam.close();
// Cache the data in the new dataset
progressUpdated("Caching data from " + samFiles[f].getName(), f, samFiles.length);
newData[f].finalise();
}
} catch (Exception ex) {
progressExceptionReceived(ex);
return;
}
processingFinished(newData);
}
Aggregations