Search in sources :

Example 1 with SAMRecord

use of net.sf.samtools.SAMRecord in project SeqMonk by s-andrews.

the class BAMFileParser method setOptionsFromFile.

private void setOptionsFromFile(File file) {
    // This just reads the first few thousand lines from the first file and
    // tries to set the preferences options to the correct defaults.
    SAMFileReader.setDefaultValidationStringency(SAMFileReader.ValidationStringency.SILENT);
    SAMFileReader inputSam = new SAMFileReader(file);
    int lineCount = 0;
    // Now process the file
    boolean pairedEnd = false;
    boolean spliced = false;
    int maxQ = 0;
    for (SAMRecord samRecord : inputSam) {
        ++lineCount;
        if (lineCount == 100000)
            break;
        if (samRecord.getMappingQuality() > maxQ) {
            maxQ = samRecord.getMappingQuality();
        }
        if (samRecord.getCigarString().contains("N")) {
            spliced = true;
        }
        if (samRecord.getReadPairedFlag()) {
            pairedEnd = true;
        }
    }
    inputSam.close();
    prefs.setPairedEnd(pairedEnd);
    prefs.setSpliced(spliced);
    prefs.setMinMappingQuality(Math.min(maxQ, 20));
}
Also used : SAMRecord(net.sf.samtools.SAMRecord) SAMFileReader(net.sf.samtools.SAMFileReader)

Example 2 with SAMRecord

use of net.sf.samtools.SAMRecord in project SeqMonk by s-andrews.

the class BAMFileParser method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    pairedEndImport = prefs.pairedEnd();
    pairedEndDistance = prefs.pairDistanceCutoff();
    separateSplicedReads = prefs.splitSplicedReads();
    importIntrons = prefs.importIntrons();
    extendBy = prefs.extendReads();
    minMappingQuality = prefs.minMappingQuality();
    primaryAlignmentsOnly = prefs.primaryAlignmentsOnly();
    File[] samFiles = getFiles();
    DataSet[] newData = new DataSet[samFiles.length];
    try {
        for (int f = 0; f < samFiles.length; f++) {
            SAMFileReader.setDefaultValidationStringency(SAMFileReader.ValidationStringency.SILENT);
            SAMFileReader inputSam = new SAMFileReader(samFiles[f]);
            if (prefs.isHiC()) {
                newData[f] = new PairedDataSet(samFiles[f].getName(), samFiles[f].getCanonicalPath(), prefs.removeDuplicates(), prefs.hiCDistance(), prefs.hiCIgnoreTrans());
            } else {
                newData[f] = new DataSet(samFiles[f].getName(), samFiles[f].getCanonicalPath(), prefs.removeDuplicates());
            }
            int lineCount = 0;
            // Now process the file
            // A flag we can set to skip the next record if we're getting
            // out of sync during single end HiC import.
            boolean skipNext = false;
            for (SAMRecord samRecord : inputSam) {
                if (skipNext) {
                    skipNext = false;
                    continue;
                }
                if (cancel) {
                    inputSam.close();
                    progressCancelled();
                    return;
                }
                ++lineCount;
                if (lineCount % 100000 == 0) {
                    progressUpdated("Read " + lineCount + " lines from " + samFiles[f].getName(), f, samFiles.length);
                }
                if (pairedEndImport && !samRecord.getReadPairedFlag()) {
                    progressWarningReceived(new SeqMonkException("Data was single ended during paired end import"));
                    continue;
                }
                if (samRecord.getReadUnmappedFlag()) {
                    // There was no match
                    continue;
                }
                if (primaryAlignmentsOnly && samRecord.getNotPrimaryAlignmentFlag()) {
                    // alignments
                    continue;
                }
                if (pairedEndImport && !separateSplicedReads && samRecord.getMateUnmappedFlag()) {
                    // No match on the reverse strand.  Doesn't matter if we're doing spliced reads.
                    continue;
                }
                if (minMappingQuality > 0 && samRecord.getMappingQuality() < minMappingQuality) {
                    if (prefs.isHiC()) {
                        if (((PairedDataSet) newData[f]).importSequenceSkipped()) {
                            // Skip the next line
                            skipNext = true;
                        }
                    }
                    continue;
                }
                if (pairedEndImport && !separateSplicedReads && !samRecord.getReadNegativeStrandFlag()) {
                    // field.
                    continue;
                }
                if (pairedEndImport && !separateSplicedReads && !samRecord.getReferenceName().equals(samRecord.getMateReferenceName())) {
                    // progressWarningReceived(new SeqMonkException("Paired reads mapped to different chromosomes"));
                    continue;
                }
                // TODO: Check what this actually stores - might be a real name rather than 0/=
                if (pairedEndImport && !separateSplicedReads && !prefs.isHiC() && samRecord.getMateReferenceName() == "0") {
                    if (samRecord.getMateReferenceName() != "=") {
                        inputSam.close();
                        throw new SeqMonkException("Unexpected mate referenece name " + samRecord.getMateReferenceName());
                    }
                    // Matches were on different chromosomes
                    continue;
                }
                try {
                    if (pairedEndImport && !separateSplicedReads) {
                        SequenceReadWithChromosome read = getPairedEndRead(samRecord);
                        newData[f].addData(read.chromosome, read.read);
                    } else if (separateSplicedReads) {
                        SequenceReadWithChromosome[] reads = getSplitSingleEndRead(samRecord);
                        for (int r = 0; r < reads.length; r++) {
                            newData[f].addData(reads[r].chromosome, reads[r].read);
                        }
                    } else {
                        SequenceReadWithChromosome read = getSingleEndRead(samRecord);
                        newData[f].addData(read.chromosome, read.read);
                    }
                } catch (SeqMonkException ex) {
                    progressWarningReceived(ex);
                    if (prefs.isHiC()) {
                        if (((PairedDataSet) newData[f]).importSequenceSkipped()) {
                            // Skip the next line
                            skipNext = true;
                        }
                    }
                }
            }
            // We're finished with the file.
            inputSam.close();
            // Cache the data in the new dataset
            progressUpdated("Caching data from " + samFiles[f].getName(), f, samFiles.length);
            newData[f].finalise();
        }
    } catch (Exception ex) {
        progressExceptionReceived(ex);
        return;
    }
    processingFinished(newData);
}
Also used : DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) PairedDataSet(uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet) SAMRecord(net.sf.samtools.SAMRecord) PairedDataSet(uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet) SAMFileReader(net.sf.samtools.SAMFileReader) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) File(java.io.File) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) SequenceReadWithChromosome(uk.ac.babraham.SeqMonk.DataTypes.Sequence.SequenceReadWithChromosome)

Aggregations

SAMFileReader (net.sf.samtools.SAMFileReader)2 SAMRecord (net.sf.samtools.SAMRecord)2 File (java.io.File)1 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)1 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)1 SequenceReadWithChromosome (uk.ac.babraham.SeqMonk.DataTypes.Sequence.SequenceReadWithChromosome)1 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)1