use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.
the class SamToPsl method scan.
private void scan(final SamReader in) {
final SAMSequenceDictionary dict = in.getFileHeader().getSequenceDictionary();
if (dict == null)
throw new RuntimeException("Sequence dictionary missing...");
final SAMRecordIterator iter = in.iterator();
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
while (iter.hasNext() && !this.out.checkError()) {
final SAMRecord rec = progress.watch(iter.next());
if (rec.getReadUnmappedFlag())
continue;
for (final PslAlign a : makePslAlign(rec, dict)) {
out.println(toString(a, rec));
}
}
progress.finish();
iter.close();
}
use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.
the class VcfRenameSamples method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
final VCFHeader header1 = in.getHeader();
final Set<String> samples1 = new LinkedHashSet<String>(header1.getSampleNamesInOrder());
final List<String> newHeader = new ArrayList<String>(samples1);
for (int i = 0; i < newHeader.size(); ++i) {
final String destName = this.oldNameToNewName.get(newHeader.get(i));
if (destName == null)
continue;
newHeader.set(i, destName);
}
if (newHeader.size() != new HashSet<String>(newHeader).size()) {
throw new RuntimeException("Error in input : there are some diplicates in the resulting new VCF header: " + newHeader);
}
for (final String srcName : this.oldNameToNewName.keySet()) {
if (!samples1.contains(srcName)) {
if (missing_user_name_is_error) {
LOG.error("Source Sample " + srcName + " missing in " + samples1 + ". Use option -E to ignore");
return -1;
} else {
LOG.warning("Missing src-sample:" + srcName);
}
}
}
final VCFHeader header2 = new VCFHeader(header1.getMetaDataInInputOrder(), newHeader);
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
out.writeHeader(header2);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
while (in.hasNext()) {
final VariantContext ctx = progress.watch(in.next());
final VariantContextBuilder b = new VariantContextBuilder(ctx);
final List<Genotype> genotypes = new ArrayList<Genotype>();
for (final String oldName : samples1) {
Genotype g = ctx.getGenotype(oldName);
final String destName = this.oldNameToNewName.get(oldName);
if (destName != null) {
final GenotypeBuilder gb = new GenotypeBuilder(g);
gb.name(destName);
g = gb.make();
}
genotypes.add(g);
}
b.genotypes(genotypes);
out.add(b.make());
}
progress.finish();
return 0;
}
use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.
the class FindNewSpliceSites method scan.
private void scan(SamReader in) {
SAMSequenceDictionary dict = in.getFileHeader().getSequenceDictionary();
if (dict == null)
throw new RuntimeException("Sequence dictionary missing");
SAMRecordIterator iter = in.iterator();
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
if (rec.getReadUnmappedFlag())
continue;
if (rec.isSecondaryOrSupplementary())
continue;
progress.watch(rec);
if (isWeird(rec, dict)) {
this.weird.addAlignment(rec);
continue;
}
for (CigarElement ce : rec.getCigar().getCigarElements()) {
if (ce.getOperator().equals(CigarOperator.N)) {
scanRead(rec, dict);
break;
}
}
}
iter.close();
progress.finish();
}
use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.
the class SamFixCigar method doWork.
@Override
public int doWork(List<String> args) {
if (this.faidx == null) {
LOG.error("Reference was not specified.");
return -1;
}
GenomicSequence genomicSequence = null;
SamReader sfr = null;
SAMFileWriter sfw = null;
try {
this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
sfr = openSamReader(oneFileOrNull(args));
final SAMFileHeader header = sfr.getFileHeader();
sfw = this.writingBamArgs.setReferenceFile(this.faidx).openSAMFileWriter(outputFile, header, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
final List<CigarElement> newCigar = new ArrayList<CigarElement>();
final SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
Cigar cigar = rec.getCigar();
byte[] bases = rec.getReadBases();
if (rec.getReadUnmappedFlag() || cigar == null || cigar.getCigarElements().isEmpty() || bases == null) {
sfw.addAlignment(rec);
continue;
}
if (genomicSequence == null || genomicSequence.getSAMSequenceRecord().getSequenceIndex() != rec.getReferenceIndex()) {
genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
}
newCigar.clear();
int refPos1 = rec.getAlignmentStart();
int readPos0 = 0;
for (final CigarElement ce : cigar.getCigarElements()) {
final CigarOperator op = ce.getOperator();
if (op.equals(CigarOperator.M)) {
for (int i = 0; i < ce.getLength(); ++i) {
char c1 = Character.toUpperCase((char) bases[readPos0]);
char c2 = Character.toUpperCase(refPos1 - 1 < genomicSequence.length() ? genomicSequence.charAt(refPos1 - 1) : '*');
if (c2 == 'N' || c1 == c2) {
newCigar.add(new CigarElement(1, CigarOperator.EQ));
} else {
newCigar.add(new CigarElement(1, CigarOperator.X));
}
refPos1++;
readPos0++;
}
} else {
newCigar.add(ce);
if (op.consumesReadBases())
readPos0 += ce.getLength();
if (op.consumesReferenceBases())
refPos1 += ce.getLength();
}
}
int i = 0;
while (i < newCigar.size()) {
final CigarOperator op1 = newCigar.get(i).getOperator();
final int length1 = newCigar.get(i).getLength();
if (i + 1 < newCigar.size() && newCigar.get(i + 1).getOperator() == op1) {
final CigarOperator op2 = newCigar.get(i + 1).getOperator();
int length2 = newCigar.get(i + 1).getLength();
newCigar.set(i, new CigarElement(length1 + length2, op2));
newCigar.remove(i + 1);
} else {
++i;
}
}
cigar = new Cigar(newCigar);
// info("changed "+rec.getCigarString()+" to "+newCigarStr+" "+rec.getReadName()+" "+rec.getReadString());
rec.setCigar(cigar);
sfw.addAlignment(rec);
}
progress.finish();
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(this.indexedFastaSequenceFile);
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.
the class VcfCompareCallers method doWork.
@SuppressWarnings("resource")
@Override
public int doWork(final List<String> args) {
if (this.archiveFile == null) {
LOG.error("undefined output");
return -1;
}
if (!this.archivePrefix.isEmpty() && !archivePrefix.endsWith(".")) {
this.archivePrefix = this.archivePrefix + ".";
}
if (!this.archiveFile.getName().endsWith(".zip")) {
IOUtil.assertDirectoryIsWritable(this.archiveFile);
}
PeekVCF vcfIterator1 = null;
PeekVCF vcfIterator2 = null;
ArchiveFactory archiveFactory = null;
IntervalTreeMap<Boolean> capture = null;
PrintWriter makefileWriter = null;
try {
if (args.size() == 1) {
LOG.info("Reading from VCF1=stdin and VCF2=" + args.get(0));
vcfIterator1 = new PeekVCF(VCFUtils.createVcfIteratorFromInputStream(stdin()), "<STDIN>");
vcfIterator2 = new PeekVCF(VCFUtils.createVcfIterator(args.get(0)), args.get(0));
} else if (args.size() == 2) {
LOG.info("Reading from VCF1=" + args.get(0) + " and VCF2=" + args.get(1));
vcfIterator1 = new PeekVCF(VCFUtils.createVcfIterator(args.get(0)), args.get(0));
vcfIterator2 = new PeekVCF(VCFUtils.createVcfIterator(args.get(1)), args.get(1));
} else {
LOG.error("illegal number of arguments");
return -1;
}
if (this.captureFile != null) {
LOG.info("Reading " + this.captureFile);
capture = super.readBedFileAsBooleanIntervalTreeMap(this.captureFile);
}
this.global_dictionary = vcfIterator1.dict;
if (!SequenceUtil.areSequenceDictionariesEqual(vcfIterator1.dict, vcfIterator2.dict)) {
throw new JvarkitException.DictionariesAreNotTheSame(vcfIterator1.dict, vcfIterator2.dict);
}
for (int side = 0; side < 2; ++side) {
final List<String> jexlExprStrings = (side == 0 ? this.jexlExprStrings1 : this.jexlExprStrings2);
final PeekVCF peek = (side == 0 ? vcfIterator1 : vcfIterator2);
// initialize JEXL map
if (!jexlExprStrings.isEmpty())
continue;
final ArrayList<String> dummyNames = new ArrayList<String>(jexlExprStrings.size());
for (int expCount = 1; expCount < jexlExprStrings.size(); ++expCount) {
dummyNames.add(String.format("vce%d", expCount));
}
peek.jexlVCMatchExps.addAll(VariantContextUtils.initializeMatchExps(dummyNames, jexlExprStrings));
}
/* samples */
final Set<String> samples0 = new HashSet<>(vcfIterator1.header.getSampleNamesInOrder());
final Set<String> samples1 = new HashSet<>(vcfIterator2.header.getSampleNamesInOrder());
final Set<String> commonSamples = new TreeSet<>(samples0);
commonSamples.retainAll(samples1);
if (commonSamples.size() != samples0.size() || commonSamples.size() != samples1.size()) {
LOG.warn("Warning: Not the same samples set. Using intersection of both lists.");
}
if (commonSamples.isEmpty()) {
LOG.error("No common samples");
return -1;
}
final Map<String, SampleInfo> sample2info = new HashMap<>(commonSamples.size());
for (final String sampleName : commonSamples) {
sample2info.put(sampleName, new SampleInfo(sampleName));
}
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(this.global_dictionary);
for (; ; ) {
VariantContext ctx0 = vcfIterator1.peek();
VariantContext ctx1 = vcfIterator2.peek();
final VariantContext smallest;
if (ctx0 == null && ctx1 == null) {
smallest = null;
break;
} else if (ctx0 == null && ctx1 != null) {
smallest = ctx1;
} else if (ctx0 != null && ctx1 == null) {
smallest = ctx0;
} else {
final int diff = this.compareChromPosRef.compare(ctx0, ctx1);
if (diff < 0) {
smallest = ctx0;
ctx1 = null;
} else if (diff > 0) {
smallest = ctx1;
ctx0 = null;
} else {
smallest = ctx0;
}
}
progress.watch(smallest);
vcfIterator1.reset(smallest);
vcfIterator2.reset(smallest);
if (capture != null) {
final Interval interval = new Interval(smallest.getContig(), smallest.getStart(), smallest.getEnd());
if (!capture.containsOverlapping(interval))
continue;
}
for (final String sampleName : sample2info.keySet()) {
sample2info.get(sampleName).visit(ctx0, ctx1);
}
}
progress.finish();
vcfIterator1.close();
vcfIterator2.close();
archiveFactory = ArchiveFactory.open(this.archiveFile);
makefileWriter = archiveFactory.openWriter(this.archivePrefix + "Makefile");
makefileWriter.println(".PHONY:all all2");
makefileWriter.println("ALL_TARGETS=");
makefileWriter.println("all:all2");
for (final SampleInfo sampleInfo : sample2info.values()) {
for (final SampleCategory sampleCat : sampleInfo.sampleCat.values()) {
final String basename = this.archivePrefix + sampleInfo.sampleName + "." + sampleCat.variantCatName.replace(' ', '_');
final String tsv = basename + ".tsv";
PrintWriter dataW = archiveFactory.openWriter(tsv);
for (final String sk : new TreeSet<String>(sampleCat.counter.keySet())) {
dataW.print(escapeUnderscore(sk));
dataW.print("\t");
dataW.print(sampleCat.counter.count(sk));
dataW.println();
}
dataW.flush();
dataW.close();
final String png = "$(addsuffix .png," + basename + ")";
makefileWriter.println("ALL_TARGETS+=" + png);
makefileWriter.println(png + ":" + tsv);
makefileWriter.println("\techo '" + "set ylabel \"Number of Genotypes " + escapeUnderscore(sampleInfo.sampleName) + "\";" + "set yrange [0:];" + "set xlabel \"Category " + escapeUnderscore(vcf1Name) + ": " + vcfIterator1.count + ", " + escapeUnderscore(vcf2Name) + ": " + vcfIterator2.count + " variants \";" + "set xtic rotate by 90 right;" + "set size ratio 0.618;" + "set title \"" + escapeUnderscore(vcf1Name) + " vs " + escapeUnderscore(vcf2Name) + " : Genotypes " + escapeUnderscore(sampleInfo.sampleName) + " / Variants: " + escapeUnderscore(sampleCat.variantCatName) + " \";" + "set key off;" + "set datafile separator \"\t\";" + "set auto x;" + "set style histogram;" + "set style data histogram;" + "set style fill solid border -1;" + "set terminal png truecolor size " + (500 + sampleCat.counter.getCountCategories() * 50) + ", 1500;" + "set output \"$@\";" + "plot \"$<\" using 2:xtic(1) ti \"\";' | " + "gnuplot");
}
}
makefileWriter.println("all2:${ALL_TARGETS}");
makefileWriter.close();
makefileWriter = null;
archiveFactory.close();
archiveFactory = null;
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(makefileWriter);
CloserUtil.close(archiveFactory);
CloserUtil.close(vcfIterator1);
CloserUtil.close(vcfIterator1);
}
}
Aggregations