use of htsjdk.samtools.SAMReadGroupRecord in project polyGembler by c-zhou.
the class SamToTaxa method distributeSortedBam.
private void distributeSortedBam() {
// TODO Auto-generated method stub
long start = System.currentTimeMillis();
try {
File indexFile = new File(myIndexFileName);
myLogger.info("Using the following index file:");
myLogger.info(indexFile.getAbsolutePath());
File samFile = new File(mySamFileName);
myLogger.info("Using the following BAM file:");
myLogger.info(samFile.getAbsolutePath());
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(samFile);
header_sam = inputSam.getFileHeader();
header_sam.setSortOrder(SortOrder.unsorted);
indexReader = Utils.getBufferedReader(indexFile, 65536);
String line = indexReader.readLine();
String[] samples = line.split("\\s+");
for (int i = 1; i < samples.length; i++) {
String taxa = samples[i];
SAMFileHeader header = header_sam.clone();
SAMReadGroupRecord rg = new SAMReadGroupRecord(taxa);
rg.setSample(taxa);
header.addReadGroup(rg);
bam_writers.put(taxa, new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, new File(myOutputDir + System.getProperty("file.separator") + taxa + ".bam")));
locks.put(taxa, new Object());
}
SAMRecordIterator iter = inputSam.iterator();
cache();
this.initial_thread_pool();
final int block = 10000;
int bS = 0;
SAMRecord[] Qs = new SAMRecord[block];
SAMRecord temp = iter.next();
int k = 0;
allReads = 0;
long tag;
while (temp != null) {
allReads++;
Qs[k] = temp;
temp = iter.hasNext() ? iter.next() : null;
k++;
tag = temp == null ? 0 : Long.parseLong(temp.getReadName());
if (k == block || temp == null || tag >= cursor) {
executor.submit(new Runnable() {
private SAMRecord[] sam;
private int block_i;
@Override
public void run() {
// TODO Auto-generated method stub
long tag;
Tag tagObj;
String taxa;
for (int i = 0; i < sam.length; i++) {
try {
if (sam[i] == null)
break;
tag = Long.parseLong(sam[i].getReadName());
tagObj = indexMap.get(tag);
int l = tagObj.taxa.length;
long cov = 0;
for (int t = 0; t < l; t++) cov += tagObj.count[t];
if (cov > maxCov) {
myLogger.info("?tag id, " + tag + "::tag sequence, " + sam[i].getReadString() + "::aligned to " + sam[i].getReferenceName() + ":" + sam[i].getAlignmentStart() + "-" + sam[i].getAlignmentEnd() + "::omitted due to abnormal high coverage, " + cov);
continue;
}
for (int t = 0; t < l; t++) {
taxa = tagObj.taxa[t];
sam[i].setAttribute("RG", taxa);
int p = tagObj.count[t];
synchronized (locks.get(taxa)) {
for (int r = 0; r < p; r++) bam_writers.get(taxa).addAlignment(sam[i]);
}
}
} catch (Exception e) {
Thread t = Thread.currentThread();
t.getUncaughtExceptionHandler().uncaughtException(t, e);
e.printStackTrace();
executor.shutdown();
System.exit(1);
}
}
if (block * (block_i + 1) % 1000000 == 0)
myLogger.info("Tag processed: " + block * (block_i + 1));
}
public Runnable init(SAMRecord[] sam, int bS) {
this.sam = sam;
this.block_i = bS;
return (this);
}
}.init(Qs, bS));
k = 0;
Qs = new SAMRecord[block];
bS++;
if (temp == null || tag >= cursor) {
this.waitFor();
if (temp != null) {
cache();
this.initial_thread_pool();
}
}
}
}
iter.close();
inputSam.close();
indexReader.close();
// executor.awaitTermination(365, TimeUnit.DAYS);
for (String key : bam_writers.keySet()) bam_writers.get(key).close();
myLogger.info("Total number of reads in lane=" + allReads);
myLogger.info("Process took " + (System.currentTimeMillis() - start) / 1000 + " seconds.");
} catch (Exception e) {
e.printStackTrace();
}
}
use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.
the class BamStats02 method run.
private void run(String filename, SamReader r, PrintWriter out) {
final Counter<Category> counter = new Counter<>();
SAMRecordIterator iter = null;
try {
iter = r.iterator();
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(r.getFileHeader().getSequenceDictionary());
while (iter.hasNext()) {
final SAMRecord record = progress.watch(iter.next());
final Category cat = new Category();
cat.set(STRING_PROPS.filename, filename);
cat.flag = record.getFlags();
cat.set(INT_PROPS.inTarget, -1);
final SAMReadGroupRecord g = record.getReadGroup();
if (g != null) {
cat.set(STRING_PROPS.samplename, g.getSample());
cat.set(STRING_PROPS.platform, g.getPlatform());
cat.set(STRING_PROPS.platformUnit, g.getPlatformUnit());
cat.set(STRING_PROPS.library, g.getLibrary());
}
final ShortReadName readName = ShortReadName.parse(record);
if (readName.isValid()) {
cat.set(STRING_PROPS.instrument, readName.getInstrumentName());
cat.set(STRING_PROPS.flowcell, readName.getFlowCellId());
cat.set(INT_PROPS.lane, readName.getFlowCellLane());
cat.set(INT_PROPS.run, readName.getRunId());
}
if (record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
cat.set(STRING_PROPS.mate_chromosome, record.getMateReferenceName());
}
if (!record.getReadUnmappedFlag()) {
cat.set(INT_PROPS.mapq, (int) (Math.ceil(record.getMappingQuality() / 10.0) * 10));
cat.set(STRING_PROPS.chromosome, record.getReferenceName());
if (this.intervals != null) {
if (this.intervals.containsOverlapping(new Interval(record.getReferenceName(), record.getAlignmentStart(), record.getAlignmentEnd()))) {
cat.set(INT_PROPS.inTarget, 1);
} else {
cat.set(INT_PROPS.inTarget, 0);
}
}
}
counter.incr(cat);
}
progress.finish();
for (final Category cat : counter.keySetDecreasing()) {
cat.print(out);
out.print("\t");
out.print(counter.count(cat));
out.println();
}
out.flush();
} finally {
CloserUtil.close(iter);
}
}
use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.
the class BamStats05 method doWork.
protected int doWork(final PrintWriter pw, final Map<String, List<Interval>> gene2interval, final String filename, final SamReader IN) throws Exception {
try {
LOG.info("Scanning " + filename);
final SAMFileHeader header = IN.getFileHeader();
final List<SAMReadGroupRecord> rgs = header.getReadGroups();
if (rgs == null || rgs.isEmpty())
throw new IOException("No read groups in " + filename);
final Set<String> groupNames = this.groupBy.getPartitions(rgs);
for (final String partition : groupNames) {
if (partition.isEmpty())
throw new IOException("Empty " + groupBy.name());
for (final String gene : gene2interval.keySet()) {
int geneStart = Integer.MAX_VALUE;
int geneEnd = 0;
final List<Integer> counts = new ArrayList<>();
for (final Interval interval : gene2interval.get(gene)) {
geneStart = Math.min(geneStart, interval.getStart() - 1);
geneEnd = Math.max(geneEnd, interval.getEnd());
/* picard javadoc: - Sequence name - Start position (1-based) - End position (1-based, end inclusive) */
int[] interval_counts = new int[interval.getEnd() - interval.getStart() + 1];
if (interval_counts.length == 0)
continue;
Arrays.fill(interval_counts, 0);
if (IN.getFileHeader().getSequenceIndex(interval.getContig()) == -1) {
throw new IllegalArgumentException("NO DICT FOR \"" + interval.getContig() + "\"");
}
/**
* start - 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
* end - 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence.
*/
SAMRecordIterator r = IN.query(new QueryInterval[] { new QueryInterval(header.getSequenceIndex(interval.getContig()), interval.getStart(), interval.getEnd()) }, false);
while (r.hasNext()) {
final SAMRecord rec = r.next();
if (rec.getReadUnmappedFlag())
continue;
if (filter.filterOut(rec))
continue;
if (!rec.getReferenceName().equals(interval.getContig()))
continue;
final SAMReadGroupRecord rg = rec.getReadGroup();
if (rg == null || !partition.equals(this.groupBy.apply(rg)))
continue;
final Cigar cigar = rec.getCigar();
if (cigar == null)
continue;
int refpos1 = rec.getAlignmentStart();
for (final CigarElement ce : cigar.getCigarElements()) {
final CigarOperator op = ce.getOperator();
if (!op.consumesReferenceBases())
continue;
if (op.consumesReadBases()) {
for (int i = 0; i < ce.getLength(); ++i) {
if (refpos1 + i >= interval.getStart() && refpos1 + i <= interval.getEnd()) {
interval_counts[refpos1 + i - interval.getStart()]++;
}
}
}
refpos1 += ce.getLength();
}
}
/* end while r */
r.close();
for (int d : interval_counts) {
counts.add(d);
}
}
/* end interval */
Collections.sort(counts);
int count_no_coverage = 0;
double mean = 0;
for (int cov : counts) {
if (cov <= MIN_COVERAGE)
++count_no_coverage;
mean += cov;
}
mean /= counts.size();
pw.println(gene2interval.get(gene).get(0).getContig() + "\t" + geneStart + "\t" + geneEnd + "\t" + gene + "\t" + partition + "\t" + counts.size() + "\t" + counts.get(0) + "\t" + counts.get(counts.size() - 1) + "\t" + mean + "\t" + count_no_coverage + "\t" + (int) (((counts.size() - count_no_coverage) / (double) counts.size()) * 100.0));
}
// end gene
}
// end sample
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(IN);
}
}
use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.
the class IgvReview method start.
@Override
public void start(final Stage stage) throws Exception {
stage.setTitle(getClass().getSimpleName());
Predicate<VariantContext> ctxFilter;
Map<String, String> params = super.getParameters().getNamed();
if (params.containsKey("--filter")) {
ctxFilter = JexlVariantPredicate.create(params.get("--filter"));
} else {
ctxFilter = V -> true;
}
final List<String> args = super.getParameters().getUnnamed();
final File configFile;
if (args.isEmpty()) {
final FileChooser fc = new FileChooser();
final String lastDirStr = preferences.get(LAST_USED_DIR_KEY, null);
if (lastDirStr != null && !lastDirStr.isEmpty()) {
fc.setInitialDirectory(new File(lastDirStr));
}
fc.getExtensionFilters().addAll(Collections.singletonList(new FileChooser.ExtensionFilter("Config file", "*.config", "*.cfg", "*.list")));
configFile = fc.showOpenDialog(stage);
} else if (args.size() == 1) {
configFile = new File(args.get(0));
} else {
configFile = null;
}
if (configFile == null || !configFile.exists()) {
JfxUtils.dialog().cause("Illegal number of arguments or file doesn't exists.").show(stage);
Platform.exit();
return;
}
if (configFile.isFile() && configFile.getParentFile() != null) {
this.preferences.put(LAST_USED_DIR_KEY, configFile.getParentFile().getPath());
}
final List<String> configLines = Files.readAllLines(configFile.toPath());
final Predicate<String> ignoreComment = (S) -> !S.startsWith("#");
final Predicate<String> predVcf = S -> S.endsWith(".vcf") || S.endsWith(".vcf.gz");
if (configLines.stream().filter(ignoreComment).filter(predVcf).count() != 1) {
JfxUtils.dialog().cause("Found more than one vcf file in " + configFile).show(stage);
Platform.exit();
return;
}
final File vcfFile = configLines.stream().filter(ignoreComment).filter(predVcf).map(S -> new File(S)).findFirst().get();
LOG.info("Opening vcf file and loading in memory");
VCFFileReader vfr = null;
CloseableIterator<VariantContext> iter = null;
final Set<String> sampleNames;
try {
this.variants.clear();
vfr = new VCFFileReader(vcfFile, false);
this.vcfHeader = vfr.getFileHeader();
sampleNames = new HashSet<>(this.vcfHeader.getSampleNamesInOrder());
if (sampleNames.isEmpty()) {
JfxUtils.dialog().cause("No Genotypes in " + vcfFile).show(stage);
Platform.exit();
return;
}
iter = vfr.iterator();
this.variants.addAll(iter.stream().filter(ctxFilter).filter(CTX -> CTX.isVariant()).collect(Collectors.toList()));
} catch (final Exception err) {
JfxUtils.dialog().cause(err).show(stage);
Platform.exit();
return;
} finally {
CloserUtil.close(iter);
CloserUtil.close(vfr);
}
if (this.variants.isEmpty()) {
JfxUtils.dialog().cause("No Variants").show(stage);
Platform.exit();
return;
}
final SAMSequenceDictionary dict = this.vcfHeader.getSequenceDictionary();
if (dict == null || dict.isEmpty()) {
JfxUtils.dialog().cause(JvarkitException.VcfDictionaryMissing.getMessage(vcfFile.getPath())).show(stage);
Platform.exit();
return;
}
final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
configLines.stream().filter(ignoreComment).filter(S -> S.endsWith(".bam")).map(S -> new File(S)).forEach(F -> {
final SamReader samIn = srf.open(F);
final SAMFileHeader header = samIn.getFileHeader();
CloserUtil.close(samIn);
String sample = null;
for (final SAMReadGroupRecord rg : header.getReadGroups()) {
String s = rg.getSample();
if (s == null)
continue;
if (sample == null) {
sample = s;
} else if (!sample.equals(s)) {
JfxUtils.dialog().cause("Two samples in " + F).show(stage);
Platform.exit();
return;
}
}
if (sample == null) {
JfxUtils.dialog().cause("No sample in " + F + ". Ignoring").show(stage);
return;
}
if (!sampleNames.contains(sample)) {
JfxUtils.dialog().cause("Not in VCF header " + sample + " / " + F + ". Ignoring").show(stage);
return;
}
this.sample2bamFile.put(sample, F);
});
if (this.sample2bamFile.isEmpty()) {
JfxUtils.dialog().cause("No valid bam file in " + configFile).show(stage);
return;
}
sampleNames.retainAll(this.sample2bamFile.keySet());
if (sampleNames.isEmpty()) {
JfxUtils.dialog().cause("No Sample associated to bam").show(stage);
return;
}
ObservableList<VariantAndGenotype> genotypes = FXCollections.observableArrayList(this.variants.stream().flatMap(CTX -> CTX.getGenotypes().stream().filter(G -> sampleNames.contains(G.getSampleName())).map(G -> new VariantAndGenotype(CTX, G))).collect(Collectors.toList()));
if (genotypes.isEmpty()) {
JfxUtils.dialog().cause("No Genotype to show").show(stage);
return;
}
Menu menu = new Menu("File");
MenuItem menuItem = new MenuItem("Save as...");
menuItem.setOnAction(AE -> {
saveVariantsAs(stage);
});
menu.getItems().add(menuItem);
menuItem = new MenuItem("Save");
menuItem.setOnAction(AE -> {
if (this.saveAsFile != null) {
saveVariants(stage, this.saveAsFile);
} else {
saveVariantsAs(stage);
}
});
menu.getItems().add(menuItem);
menu.getItems().add(new SeparatorMenuItem());
menuItem = new MenuItem("Quit");
menuItem.setOnAction(AE -> {
Platform.exit();
});
menu.getItems().add(menuItem);
MenuBar bar = new MenuBar(menu);
this.genotypeTable = new TableView<>(genotypes);
this.genotypeTable.getColumns().add(makeColumn("CHROM", G -> G.ctx.getContig()));
this.genotypeTable.getColumns().add(makeColumn("POS", G -> G.ctx.getStart()));
this.genotypeTable.getColumns().add(makeColumn("ID", G -> G.ctx.getID()));
this.genotypeTable.getColumns().add(makeColumn("REF", G -> G.ctx.getReference().getDisplayString()));
this.genotypeTable.getColumns().add(makeColumn("ALT", G -> G.ctx.getAlternateAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining(","))));
this.genotypeTable.getColumns().add(makeColumn("Sample", G -> G.g.getSampleName()));
this.genotypeTable.getColumns().add(makeColumn("Type", G -> G.g.getType().name()));
this.genotypeTable.getColumns().add(makeColumn("Alleles", G -> G.g.getAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining(","))));
TableColumn<VariantAndGenotype, String> reviewCol = new TableColumn<>("Review");
reviewCol.setCellValueFactory(C -> C.getValue().getReviewProperty());
reviewCol.setCellFactory(TextFieldTableCell.forTableColumn());
reviewCol.setOnEditCommit((E) -> {
int y = E.getTablePosition().getRow();
this.genotypeTable.getItems().get(y).setReview(E.getNewValue());
});
reviewCol.setEditable(true);
this.genotypeTable.getColumns().add(reviewCol);
this.genotypeTable.getSelectionModel().cellSelectionEnabledProperty().set(true);
this.genotypeTable.setEditable(true);
final ContextMenu cm = new ContextMenu();
MenuItem mi1 = new MenuItem("Menu 1");
cm.getItems().add(mi1);
MenuItem mi2 = new MenuItem("Menu 2");
cm.getItems().add(mi2);
this.genotypeTable.setOnMousePressed(event -> {
if (event.isPrimaryButtonDown() && (event.getClickCount() == 3 || event.isShiftDown())) {
moveIgvTo(stage, genotypeTable.getSelectionModel().getSelectedItem());
} else if (event.isSecondaryButtonDown()) {
cm.show(genotypeTable, event.getScreenX(), event.getScreenY());
}
});
final BorderPane pane2 = new BorderPane(this.genotypeTable);
pane2.setPadding(new Insets(10, 10, 10, 10));
VBox vbox1 = new VBox(bar, pane2);
final Scene scene = new Scene(vbox1, 500, 300);
stage.setScene(scene);
stage.show();
}
use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.
the class Biostar78400 method doWork.
@Override
public int doWork(List<String> args) {
if (this.XML == null) {
LOG.error("XML file missing");
return -1;
}
final Map<String, Map<Integer, String>> flowcell2lane2id = new HashMap<String, Map<Integer, String>>();
SamReader sfr = null;
SAMFileWriter sfw = null;
try {
final Pattern readNameSignature = Pattern.compile(this.readNameSignatureStr);
final JAXBContext context = JAXBContext.newInstance(ReadGroup.class, ReadGroupList.class);
final Unmarshaller unmarshaller = context.createUnmarshaller();
final ReadGroupList rgl = unmarshaller.unmarshal(new StreamSource(XML), ReadGroupList.class).getValue();
if (rgl.flowcells.isEmpty()) {
LOG.error("empty XML " + XML);
return -1;
}
sfr = openSamReader(oneFileOrNull(args));
final SAMFileHeader header = sfr.getFileHeader().clone();
header.addComment("Processed with " + getProgramName());
final Set<String> seenids = new HashSet<String>();
final List<SAMReadGroupRecord> samReadGroupRecords = new ArrayList<SAMReadGroupRecord>();
for (final FlowCell fc : rgl.flowcells) {
final Map<Integer, String> lane2id = new HashMap<Integer, String>();
for (final Lane lane : fc.lanes) {
for (final ReadGroup rg : lane.readGroups) {
if (seenids.contains(rg.id)) {
LOG.error("Group id " + rg.id + " defined twice");
return -1;
}
seenids.add(rg.id);
// create the read group we'll be using
final SAMReadGroupRecord rgrec = new SAMReadGroupRecord(rg.id);
rgrec.setLibrary(rg.library);
rgrec.setPlatform(rg.platform);
rgrec.setSample(rg.sample);
rgrec.setPlatformUnit(rg.platformunit);
if (rg.center != null)
rgrec.setSequencingCenter(rg.center);
if (rg.description != null)
rgrec.setDescription(rg.description);
lane2id.put(lane.id, rg.id);
samReadGroupRecords.add(rgrec);
}
}
if (flowcell2lane2id.containsKey(fc.name)) {
LOG.error("FlowCell id " + fc.name + " defined twice in XML");
return -1;
}
flowcell2lane2id.put(fc.name, lane2id);
}
header.setReadGroups(samReadGroupRecords);
sfw = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
final SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
final Matcher matcher = readNameSignature.matcher(rec.getReadName());
final String flowcellStr;
final String laneStr;
if (matcher.matches()) {
flowcellStr = matcher.group(1);
laneStr = matcher.group(2);
} else {
LOG.error("Read name " + rec.getReadName() + " doesn't match regular expression " + readNameSignature.pattern() + ". please check options");
return -1;
}
String RGID = null;
final Map<Integer, String> lane2id = flowcell2lane2id.get(flowcellStr);
if (lane2id == null)
throw new RuntimeException("Cannot get flowcell/readgroup for " + rec.getReadName());
try {
RGID = lane2id.get(Integer.parseInt(laneStr));
} catch (final Exception e) {
LOG.error("bad lane-Id in " + rec.getReadName());
return -1;
}
if (RGID == null) {
throw new RuntimeException("Cannot get RGID for " + rec.getReadName() + " flowcell:" + flowcellStr + " lane2id:" + laneStr + " dict:" + lane2id);
}
rec.setAttribute(SAMTag.RG.name(), RGID);
sfw.addAlignment(rec);
}
progress.finish();
iter.close();
LOG.info("done");
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(sfw);
CloserUtil.close(sfr);
}
}
Aggregations