use of htsjdk.samtools.SAMProgramRecord in project jvarkit by lindenb.
the class FindMyVirus method doWork.
@Override
public int doWork(List<String> args) {
if (virusNames.isEmpty()) {
LOG.error("no virus name");
return -1;
}
SamReader sfr = null;
SAMFileWriter[] sfwArray = new SAMFileWriter[CAT.values().length];
try {
sfr = openSamReader(oneFileOrNull(args));
SAMFileHeader header = sfr.getFileHeader();
for (CAT category : CAT.values()) {
LOG.info("Opening " + category);
SAMFileHeader header2 = header.clone();
header2.addComment("Category:" + category.name());
header2.addComment("Description:" + category.getDescription());
SAMProgramRecord rec = header2.createProgramRecord();
rec.setCommandLine(this.getProgramCommandLine());
rec.setProgramName(getProgramName());
rec.setProgramVersion(getVersion());
rec.setAttribute("CAT", category.name());
File outputFile = new File(this.outputFile.getParentFile(), this.outputFile.getName() + "." + category.name() + ".bam");
LOG.info("Opening " + outputFile);
File countFile = new File(outputFile.getParentFile(), outputFile.getName() + "." + category.name() + ".count.txt");
SAMFileWriter sfw = writingBamArgs.openSAMFileWriter(outputFile, header2, true);
sfw = new SAMFileWriterCount(sfw, countFile, category);
sfwArray[category.ordinal()] = sfw;
}
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
OtherCanonicalAlignFactory xpAlignFactory = new OtherCanonicalAlignFactory(header);
SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext()) {
SAMRecord rec = iter.next();
progress.watch(rec);
CAT category = null;
List<OtherCanonicalAlign> xpList = Collections.emptyList();
if (category == null && !rec.getReadPairedFlag()) {
category = CAT.unpaired;
}
if (category == null && rec.isSecondaryOrSupplementary()) {
category = CAT.secondary;
}
if (category == null && rec.getReadFailsVendorQualityCheckFlag()) {
category = CAT.failsqual;
}
if (category == null && rec.getDuplicateReadFlag()) {
category = CAT.duplicate;
}
if (category == null && rec.getReadUnmappedFlag()) {
category = CAT.unmapped;
}
if (category == null) {
xpList = xpAlignFactory.getXPAligns(rec);
}
boolean xp_containsVirus = false;
boolean xp_containsChrom = false;
for (OtherCanonicalAlign xpa : xpList) {
if (virusNames.contains(xpa.getReferenceName())) {
xp_containsVirus = true;
} else {
xp_containsChrom = true;
}
}
/* both reads mapped on ref */
if (category == null && !rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && !virusNames.contains(rec.getReferenceName()) && !virusNames.contains(rec.getMateReferenceName())) {
if (!xp_containsVirus) {
category = CAT.both_ref;
} else {
category = CAT.ref_and_virus_spliced;
}
}
/* pair(unmapped,mapped on reference) */
if (category == null && ((!rec.getReadUnmappedFlag() && rec.getMateUnmappedFlag() && !virusNames.contains(rec.getReferenceName())) || (rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && !virusNames.contains(rec.getMateReferenceName())))) {
if (!xp_containsVirus) {
category = CAT.ref_orphan;
} else {
category = CAT.ref_and_virus_spliced;
}
}
/* both reads mapped on virus */
if (category == null && !rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && virusNames.contains(rec.getReferenceName()) && virusNames.contains(rec.getMateReferenceName())) {
if (!xp_containsChrom) {
category = CAT.both_virus;
} else {
category = CAT.ref_and_virus_spliced;
}
}
if (category == null && ((!rec.getReadUnmappedFlag() && rec.getMateUnmappedFlag() && virusNames.contains(rec.getReferenceName())) || (rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && virusNames.contains(rec.getMateReferenceName())))) {
if (!xp_containsChrom) {
category = CAT.virus_orphan;
} else {
category = CAT.ref_and_virus_spliced;
}
}
if (category == null && !rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && ((virusNames.contains(rec.getReferenceName()) && !virusNames.contains(rec.getMateReferenceName())) || (!virusNames.contains(rec.getReferenceName()) && virusNames.contains(rec.getMateReferenceName())))) {
category = CAT.ref_and_virus;
}
/*dispatch */
if (category == null) {
LOG.warning("Not handled: " + rec);
category = CAT.undetermined;
}
sfwArray[category.ordinal()].addAlignment(rec);
}
for (SAMFileWriter sfw : sfwArray) {
LOG.info("Closing " + sfw);
sfw.close();
}
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
LOG.info("Closing");
CloserUtil.close(sfr);
CloserUtil.close(sfwArray);
}
}
use of htsjdk.samtools.SAMProgramRecord in project jvarkit by lindenb.
the class BamStage method createProgramRecordPane.
private Tab createProgramRecordPane(final SAMFileHeader header) {
final TableView<SAMProgramRecord> table = new TableView<>(header == null ? FXCollections.observableArrayList() : FXCollections.observableArrayList(header.getProgramRecords()));
table.getColumns().add(makeColumn("ID", G -> G.getId()));
table.getColumns().add(makeColumn("PG-ID", G -> G.getProgramGroupId()));
table.getColumns().add(makeColumn("Prev-PG-ID", G -> G.getPreviousProgramGroupId()));
table.getColumns().add(makeColumn("Version", G -> G.getProgramVersion()));
table.getColumns().add(makeColumn("Command", G -> G.getCommandLine()));
final Tab tab = new Tab("PG", table);
tab.setClosable(false);
table.setPlaceholder(new Label("No Program-Group."));
return tab;
}
use of htsjdk.samtools.SAMProgramRecord in project jvarkit by lindenb.
the class Biostar139647 method doWork.
@Override
public int doWork(final List<String> args) {
SAMFileWriter w = null;
LineIterator r = null;
try {
final String filename = super.oneFileOrNull(args);
if (filename == null) {
LOG.info("Reading from stdin");
r = IOUtils.openStreamForLineIterator(stdin());
} else {
LOG.info("Reading from " + filename);
r = IOUtils.openURIForLineIterator(filename);
}
Format format = Format.None;
while (r.hasNext() && format == Format.None) {
String line = r.peek();
if (line.trim().isEmpty()) {
r.next();
continue;
}
if (line.startsWith("CLUSTAL")) {
format = Format.Clustal;
// consume
r.next();
break;
} else if (line.startsWith(">")) {
format = Format.Fasta;
break;
} else {
LOG.error("MSA format not recognized in " + line);
return -1;
}
}
LOG.info("format : " + format);
if (Format.Fasta.equals(format)) {
// this.consensus=new FastaConsensus();
AlignSequence curr = null;
while (r.hasNext()) {
String line = r.next();
if (line.startsWith(">")) {
curr = new AlignSequence();
curr.name = line.substring(1).trim();
if (sample2sequence.containsKey(curr.name)) {
LOG.error("Sequence ID " + curr.name + " defined twice");
return -1;
}
sample2sequence.put(curr.name, curr);
} else if (curr != null) {
curr.seq.append(line.trim());
this.align_length = Math.max(this.align_length, curr.seq.length());
}
}
} else if (Format.Clustal.equals(format)) {
AlignSequence curr = null;
int columnStart = -1;
while (r.hasNext()) {
String line = r.next();
if (line.trim().isEmpty() || line.startsWith("CLUSTAL W")) {
columnStart = -1;
continue;
}
if (line.charAt(0) == ' ') {
if (columnStart == -1) {
LOG.error("illegal consensus line for " + line);
return -1;
}
} else {
if (columnStart == -1) {
columnStart = line.indexOf(' ');
if (columnStart == -1) {
LOG.error("no whithespace in " + line);
return -1;
}
while (columnStart < line.length() && line.charAt(columnStart) == ' ') {
columnStart++;
}
}
String seqname = line.substring(0, columnStart).trim();
curr = this.sample2sequence.get(seqname);
if (curr == null) {
curr = new AlignSequence();
curr.name = seqname;
this.sample2sequence.put(curr.name, curr);
}
curr.seq.append(line.substring(columnStart));
this.align_length = Math.max(align_length, curr.seq.length());
}
}
} else {
LOG.error("Undefined input format");
return -1;
}
final SAMFileHeader header = new SAMFileHeader();
final SAMSequenceDictionary dict = new SAMSequenceDictionary();
dict.addSequence(new SAMSequenceRecord(REF, this.align_length));
header.setSortOrder(SortOrder.unsorted);
header.setSequenceDictionary(dict);
final SAMProgramRecord pgr = header.createProgramRecord();
pgr.setProgramName(getProgramName());
pgr.setProgramVersion(getVersion());
if (getProgramCommandLine().trim().isEmpty()) {
pgr.setCommandLine("(empty)");
} else {
pgr.setCommandLine(getProgramCommandLine());
}
w = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, false);
final DefaultSAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
for (final String seqName : this.sample2sequence.keySet()) {
final AlignSequence shortRead = this.sample2sequence.get(seqName);
final SAMRecord rec = samRecordFactory.createSAMRecord(header);
rec.setReadName(seqName);
rec.setReadString(shortRead.seq.toString().replaceAll("[\\*\\- ]+", ""));
int start = 0;
while (start < shortRead.seq.length() && !Character.isLetter(shortRead.at(start))) {
start++;
}
rec.setAlignmentStart(start + 1);
int end = shortRead.seq.length() - 1;
while (end > 0 && !Character.isLetter(shortRead.at(end))) {
--end;
}
// rec.setAlignmentEnd(end+1); not supported
int n = start;
StringBuilder dna = new StringBuilder();
final List<CigarElement> cigars = new ArrayList<>();
while (n <= end) {
if (!Character.isLetter(shortRead.at(n))) {
cigars.add(new CigarElement(1, CigarOperator.D));
} else {
cigars.add(new CigarElement(1, CigarOperator.M));
dna.append(shortRead.at(n));
}
n++;
}
// simplify cigar string
n = 0;
while (n + 1 < cigars.size()) {
CigarElement c1 = cigars.get(n);
CigarElement c2 = cigars.get(n + 1);
if (c1.getOperator().equals(c2.getOperator())) {
cigars.set(n, new CigarElement(c1.getLength() + c2.getLength(), c1.getOperator()));
cigars.remove(n + 1);
} else {
++n;
}
}
rec.setReadPairedFlag(false);
rec.setMappingQuality(60);
rec.setReferenceName(REF);
rec.setReadString(dna.toString());
rec.setCigar(new Cigar(cigars));
rec.setAttribute("PG", pgr.getId());
w.addAlignment(rec);
}
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
CloserUtil.close(w);
}
}
Aggregations