use of com.github.lindenb.jvarkit.util.bio.bed.BedLine in project jvarkit by lindenb.
the class BamStats01 method run.
private void run(final String filename, SamReader samFileReader) throws IOException {
Map<String, Histogram> sample2hist = new HashMap<String, BamStats01.Histogram>();
SAMSequenceDictionary currDict = samFileReader.getFileHeader().getSequenceDictionary();
if (samSequenceDictionary == null) {
samSequenceDictionary = currDict;
}
if (this.bedFile != null) {
if (!SequenceUtil.areSequenceDictionariesEqual(currDict, samSequenceDictionary)) {
samFileReader.close();
throw new IOException("incompatible sequence dictionaries." + filename);
}
if (intervalTreeMap == null) {
final BedLineCodec bedCodec = new BedLineCodec();
intervalTreeMap = new IntervalTreeMap<>();
LOG.info("opening " + this.bedFile);
String line;
final BufferedReader bedIn = IOUtils.openFileForBufferedReading(bedFile);
while ((line = bedIn.readLine()) != null) {
final BedLine bedLine = bedCodec.decode(line);
if (bedLine == null)
continue;
int seqIndex = currDict.getSequenceIndex(bedLine.getContig());
if (seqIndex == -1) {
throw new IOException("unknown chromosome from dict in in " + line + " " + this.bedFile);
}
intervalTreeMap.put(bedLine.toInterval(), Boolean.TRUE);
}
bedIn.close();
LOG.info("done reading " + this.bedFile);
}
}
this.chrX_index = -1;
this.chrY_index = -1;
for (final SAMSequenceRecord rec : currDict.getSequences()) {
final String chromName = rec.getSequenceName().toLowerCase();
if (chromName.equals("x") || chromName.equals("chrx")) {
this.chrX_index = rec.getSequenceIndex();
} else if (chromName.equals("y") || chromName.equals("chry")) {
this.chrY_index = rec.getSequenceIndex();
}
}
final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(currDict);
final SAMRecordIterator iter = samFileReader.iterator();
while (iter.hasNext()) {
final SAMRecord rec = progess.watch(iter.next());
String sampleName = groupBy.getPartion(rec);
if (sampleName == null || sampleName.isEmpty())
sampleName = "undefined";
Histogram hist = sample2hist.get(sampleName);
if (hist == null) {
hist = new Histogram();
sample2hist.put(sampleName, hist);
}
hist.histograms[Category2.ALL.ordinal()].watch(rec);
if (intervalTreeMap == null)
continue;
if (rec.getReadUnmappedFlag()) {
continue;
}
if (!intervalTreeMap.containsOverlapping(new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd()))) {
hist.histograms[Category2.OFF_TARGET.ordinal()].watch(rec);
} else {
hist.histograms[Category2.IN_TARGET.ordinal()].watch(rec);
}
}
progess.finish();
samFileReader.close();
samFileReader = null;
for (final String sampleName : sample2hist.keySet()) {
final Histogram hist = sample2hist.get(sampleName);
out.print(filename + "\t" + sampleName);
for (final Category2 cat2 : Category2.values()) {
for (// je je suis libertineuuh, je suis une cat1
final Category cat1 : // je je suis libertineuuh, je suis une cat1
Category.values()) {
out.print("\t");
out.print(hist.histograms[cat2.ordinal()].counts[cat1.ordinal()]);
}
if (intervalTreeMap == null)
break;
}
out.println();
}
}
use of com.github.lindenb.jvarkit.util.bio.bed.BedLine in project jvarkit by lindenb.
the class Biostar178713 method doWork.
@Override
public int doWork(final List<String> args) {
if (this.outputFile == null || !outputFile.getName().endsWith(".zip")) {
LOG.error("output file option must be declared and must en with .zip");
return -1;
}
final Set<String> inputs = IOUtils.unrollFiles(args);
List<BedLine> bedLines = new ArrayList<>();
FileOutputStream fos = null;
ZipOutputStream zout = null;
try {
if (inputs.isEmpty()) {
LOG.info("reading bed from stdin");
LineIterator r = IOUtils.openStreamForLineIterator(stdin());
this.readBed(bedLines, r);
CloserUtil.close(r);
} else
for (final String input : inputs) {
LOG.info("reading bed from " + input);
LineIterator r = IOUtils.openURIForLineIterator(input);
this.readBed(bedLines, r);
CloserUtil.close(r);
}
LOG.info("sorting " + bedLines.size());
Collections.sort(bedLines, new Comparator<BedLine>() {
@Override
public int compare(BedLine o1, BedLine o2) {
int i = o1.getContig().compareTo(o2.getContig());
if (i != 0)
return i;
i = o1.getStart() - o2.getStart();
if (i != 0)
return i;
i = o1.getEnd() - o2.getEnd();
return i;
}
});
if (bedLines.isEmpty()) {
LOG.error("no bed line found");
return -1;
}
LOG.info("creating zip " + this.outputFile);
fos = new FileOutputStream(this.outputFile);
zout = new ZipOutputStream(fos);
int chunk = 0;
while (!bedLines.isEmpty()) {
++chunk;
final ZipEntry entry = new ZipEntry("bed" + String.format("%03d", chunk) + ".bed");
LOG.info("creating " + entry.getName());
zout.putNextEntry(entry);
PrintWriter pw = new PrintWriter(zout);
BedLine prev = bedLines.get(0);
bedLines.remove(0);
pw.println(prev.join());
int n_in_entry = 1;
int i = 0;
while (i < bedLines.size()) {
final BedLine curr = bedLines.get(i);
final double distance = curr.getStart() - prev.getEnd();
if (!prev.getContig().equals(curr.getContig()) || distance > this.distancebed) {
pw.println(curr.join());
prev = curr;
bedLines.remove(i);
++n_in_entry;
} else {
++i;
}
}
pw.flush();
zout.closeEntry();
LOG.info("closing " + entry.getName() + " N=" + n_in_entry);
}
zout.finish();
zout.close();
return RETURN_OK;
} catch (Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(fos);
}
}
use of com.github.lindenb.jvarkit.util.bio.bed.BedLine in project jvarkit by lindenb.
the class KnimeVariantHelper method parseBedAsBooleanIntervalTreeMap.
public IntervalTreeMap<Boolean> parseBedAsBooleanIntervalTreeMap(final String bedUri) throws IOException {
if (bedUri == null || bedUri.isEmpty())
throw new IllegalArgumentException("bad bed uri");
BufferedReader r = null;
final IntervalTreeMap<Boolean> treeMap = new IntervalTreeMap<>();
try {
r = IOUtils.openURIForBufferedReading(bedUri);
final BedLineCodec codec = new BedLineCodec();
r.lines().forEach(L -> {
final BedLine bedline = codec.decode(L);
if (bedline == null) {
LOG.warn("Ignoring line in BED (doesn't look like a BED line):" + L);
return;
}
treeMap.put(bedline.toInterval(), Boolean.TRUE);
});
return treeMap;
} finally {
CloserUtil.close(r);
}
}
use of com.github.lindenb.jvarkit.util.bio.bed.BedLine in project jvarkit by lindenb.
the class Biostar77828 method doWork.
@Override
public int doWork(List<String> args) {
PrintStream pw = null;
try {
LOG.info("load BED");
BufferedReader in = super.openBufferedReader(oneFileOrNull(args));
final BedLineCodec codec = new BedLineCodec();
String line;
while ((line = in.readLine()) != null) {
if (line.isEmpty() || line.startsWith("#"))
continue;
final BedLine bedLine = codec.decode(line);
if (bedLine == null)
continue;
if (bedLine.getColumnCount() < 3)
throw new IOException("bad BED input " + bedLine);
final Segment seg = new Segment(bedLine.getContig(), bedLine.getStart() - 1, bedLine.getEnd());
if (seg.size() == 0)
continue;
this.effective_genome_size += seg.size();
this.all_segments.add(seg);
}
pw = super.openFileOrStdoutAsPrintStream(this.outputFile);
Solution best = null;
for (long generation = 0; generation < this.N_ITERATIONS; ++generation) {
if (generation % 100000 == 0)
LOG.info("generation:" + generation + "/" + this.N_ITERATIONS + " " + (int) ((generation / (double) this.N_ITERATIONS) * 100.0) + "% " + String.valueOf(best));
Solution sol = createSolution();
sol.generation = generation;
if (best == null || sol.compareTo(best) < 0) {
best = sol;
/*
if(LOG.isDebugEnabled())
{
LOG.info("%%generation:"+generation);
best.print(stderr());
}*/
}
}
if (best != null) {
best.print(pw);
}
pw.flush();
pw.close();
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(pw);
}
}
use of com.github.lindenb.jvarkit.util.bio.bed.BedLine in project jvarkit by lindenb.
the class FindAllCoverageAtPosition method doWork.
@Override
public int doWork(final List<String> args) {
final Set<Mutation> mutations = new TreeSet<>();
BufferedReader r = null;
try {
if (this.referenceFileFile != null) {
this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.referenceFileFile);
}
mutations.addAll(Arrays.asList(this.positionStr.split("[ ]")).stream().filter(S -> !S.trim().isEmpty()).map(S -> new Mutation(S)).collect(Collectors.toSet()));
for (final String s : positionStr.split("[ ]")) {
if (s.trim().isEmpty())
continue;
mutations.add(new Mutation(s));
}
if (this.positionFile != null) {
String line;
r = IOUtils.openFileForBufferedReading(positionFile);
if (positionFile.getName().endsWith(".bed")) {
final BedLineCodec codec = new BedLineCodec();
while ((line = r.readLine()) != null) {
final BedLine bedLine = codec.decode(line);
if (bedLine == null)
continue;
for (int x = bedLine.getStart(); x <= bedLine.getEnd(); ++x) {
mutations.add(new Mutation(bedLine.getContig(), x));
}
}
} else {
while ((line = r.readLine()) != null) {
if (line.trim().isEmpty() || line.startsWith("#"))
continue;
mutations.add(new Mutation(line));
}
}
r.close();
r = null;
}
if (mutations.isEmpty()) {
LOG.fatal("undefined position \'str\'");
return -1;
}
LOG.info("number of mutations " + mutations.size());
this.out = this.openFileOrStdoutAsPrintWriter(this.outputFile);
out.print("#File");
out.print('\t');
out.print("CHROM");
out.print('\t');
out.print("POS");
if (this.indexedFastaSequenceFile != null) {
out.print('\t');
out.print("REF");
}
out.print('\t');
out.print(this.groupBy.name().toUpperCase());
out.print('\t');
out.print("DEPTH");
for (final CigarOperator op : CigarOperator.values()) {
out.print('\t');
out.print(op.name());
}
for (char c : BASES_To_PRINT) {
out.print('\t');
out.print("Base(" + c + ")");
}
out.println();
if (args.isEmpty()) {
LOG.info("Reading from stdin");
r = new BufferedReader(new InputStreamReader(stdin()));
scan(r, mutations);
r.close();
r = null;
} else {
for (final String filename : args) {
LOG.info("Reading from " + filename);
r = IOUtils.openURIForBufferedReading(filename);
scan(r, mutations);
r.close();
r = null;
}
}
this.out.flush();
return 0;
} catch (Exception err) {
LOG.severe(err);
return -1;
} finally {
CloserUtil.close(this.indexedFastaSequenceFile);
CloserUtil.close(this.out);
CloserUtil.close(r);
}
}
Aggregations