use of com.github.lindenb.jvarkit.setfile.SetFileRecord in project jvarkit by lindenb.
the class SetFileTools method doInterBed.
/**
* print whole setrecords overlapping bed file, there is to trimming
*/
private int doInterBed(final List<String> args) throws IOException {
if (this.intersectBedPath != null) {
LOG.info("intersectBedPath shouldn' be specified");
return -1;
}
if (!this.intersectVcfPath.isEmpty()) {
LOG.info("intersectVcfPath shouldn't be specified");
return -1;
}
if (args.size() != 2) {
LOG.error("expected 2 files but got " + args.size() + " " + args);
return -1;
}
if (args.get(0).equals("-") && args.get(1).equals("-")) {
LOG.error("cannot use both files on stdin");
return -1;
}
final IntervalTreeMap<BedLine> peaksTreeMap;
try (BedLineReader blr = openBedLineReader(args.get(0).equals("-") ? null : Paths.get(args.get(0)))) {
peaksTreeMap = blr.toIntervalTreeMap();
}
try (CloseableIterator<SetFileRecord> iter = openSetFileIterator((args.get(1).equals("-") ? Collections.emptyList() : args.subList(1, 2)))) {
try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
while (iter.hasNext()) {
final SetFileRecord rec = iter.next();
if (rec.stream().noneMatch(B -> peaksTreeMap.containsOverlapping(B)))
continue;
print(pw, rec);
}
pw.flush();
}
}
return 0;
}
use of com.github.lindenb.jvarkit.setfile.SetFileRecord in project jvarkit by lindenb.
the class SetFileTools method doStats.
/**
* statistics for setFile
*/
private int doStats(final List<String> args) throws IOException {
final Counter<String> chrom2count = new Counter<>();
final DiscreteMedian<Integer> d_size = new DiscreteMedian<>();
final DiscreteMedian<Integer> d_nitems = new DiscreteMedian<>();
final DiscreteMedian<Integer> d_distance = new DiscreteMedian<>();
final DiscreteMedian<Integer> d_item_size = new DiscreteMedian<>();
for (final SAMSequenceRecord ssr : this.theDict.getSequences()) {
chrom2count.initializeIfNotExists(noChr(ssr.getSequenceName()));
}
chrom2count.initializeIfNotExists("*multiple*");
chrom2count.initializeIfNotExists("*empty*");
try (CloseableIterator<SetFileRecord> iter = openSetFileIterator(args)) {
while (iter.hasNext()) {
final SetFileRecord rec = iter.next();
final Set<String> chroms = rec.getChromosomes();
switch(chroms.size()) {
case 0:
chrom2count.incr("*empty*");
break;
case 1:
chrom2count.incr(noChr(chroms.iterator().next()));
break;
default:
chrom2count.incr("*multiple*");
break;
}
int len = rec.stream().mapToInt(B -> B.getLengthOnReference()).sum();
d_size.add(len);
d_nitems.add(rec.size());
if (rec.size() > 0) {
len = len / rec.size();
d_item_size.add(len);
}
if (rec.size() > 1 && chroms.size() == 1) {
int d = 0;
final List<Locatable> L = sortAndMerge(rec);
for (int i = 0; i + 1 < L.size(); i++) {
d += (rec.get(i + 1).getStart() - rec.get(i).getEnd());
}
d = d / (L.size() - 1);
d_distance.add(d);
}
}
}
try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
for (final String key : chrom2count.keySetDecreasing()) {
pw.println("C\trecords-per-chrom\t" + key + "\t" + chrom2count.count(key));
}
pw.println("AS\taverage-size\t" + (d_size.isEmpty() ? "." : String.valueOf(d_size.getAverage().orElse(0.0))));
pw.println("MS\tmedian-size\t" + (d_size.isEmpty() ? "." : String.valueOf(d_size.getMedian().orElse(0.0))));
pw.println("AIS\taverage-item-size\t" + (d_item_size.isEmpty() ? "." : String.valueOf(d_item_size.getAverage().orElse(0.0))));
pw.println("MIS\tmedian-item-size\t" + (d_item_size.isEmpty() ? "." : String.valueOf(d_item_size.getMedian().orElse(0.0))));
pw.println("AN\taverage-nitems\t" + (d_nitems.isEmpty() ? "." : String.valueOf(d_nitems.getAverage().orElse(0.0))));
pw.println("MN\tmedian-nitems\t" + (d_nitems.isEmpty() ? "." : String.valueOf(d_nitems.getMedian().orElse(0.0))));
pw.println("AD\taverage-distance-between-items\t" + (d_distance.isEmpty() ? "." : String.valueOf(d_distance.getAverage().orElse(0.0))));
pw.println("MD\tmedian-distance-between-items\t" + (d_distance.isEmpty() ? "." : String.valueOf(d_distance.getMedian().orElse(0.0))));
pw.flush();
}
return 0;
}
use of com.github.lindenb.jvarkit.setfile.SetFileRecord in project jvarkit by lindenb.
the class SetFileTools method toBed.
private int toBed(final List<String> args) throws IOException {
try (CloseableIterator<SetFileRecord> iter = openSetFileIterator(args)) {
try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
while (iter.hasNext()) {
final SetFileRecord rec = iter.next();
for (Locatable loc : rec) {
pw.print(noChr(loc.getContig()));
pw.print("\t");
pw.print(loc.getStart() - 1);
pw.print("\t");
pw.print(loc.getEnd());
pw.print("\t");
pw.print(rec.getName());
pw.println();
}
}
pw.flush();
}
}
return 0;
}
use of com.github.lindenb.jvarkit.setfile.SetFileRecord in project jvarkit by lindenb.
the class SetFileTools method makeClusters.
private int makeClusters(final List<String> args) throws IOException {
if (this.number_of_jobs < 1 && this.long_length_per_bin < 1L) {
LOG.error("at least --jobs or --size must be specified.");
return -1;
}
if (this.number_of_jobs > 0 && this.long_length_per_bin > 0) {
LOG.error(" --jobs OR --size must be specified. Not both.");
return -1;
}
final List<Cluster> clusters = new ArrayList<>();
try (CloseableIterator<SetFileRecord> iter = openSetFileIterator(args)) {
final List<SetFileRecord> records = iter.stream().filter(R -> !R.isEmpty()).sorted((A, B) -> Long.compare(B.getLongSumOfLengthOnReference(), A.getLongSumOfLengthOnReference())).collect(Collectors.toCollection(LinkedList::new));
while (!records.isEmpty()) {
final SetFileRecord first = records.remove(0);
if (number_of_jobs > 0) {
if (clusters.size() < this.number_of_jobs) {
final Cluster c = new Cluster();
c.add(first);
} else {
int best_idx = -1;
double best_length = -1;
for (int y = 0; y < clusters.size(); ++y) {
final double total_length = clusters.get(y).getSumLength(first);
if (best_idx == -1 || total_length < best_length) {
best_idx = y;
best_length = total_length;
}
}
clusters.get(best_idx).add(first);
}
} else {
int y = 0;
while (y < clusters.size()) {
final Cluster cluster = clusters.get(y);
if (cluster.getSumLength(first) <= this.long_length_per_bin) {
cluster.add(first);
break;
}
y++;
}
if (y == clusters.size()) {
final Cluster cluster = new Cluster();
cluster.add(first);
clusters.add(cluster);
}
}
}
// end wile !records.isEmpty
}
// end open
int clusterid = 0;
try (final ArchiveFactory archive = ArchiveFactory.open(this.outputFile)) {
for (final Cluster cluster : clusters) {
Collections.sort(cluster.records, (A, B) -> {
final Locatable s1 = A.get(0);
final Locatable s2 = B.get(0);
final int i = this.theSorter.compare(s1, s2);
if (i != 0)
return i;
return A.getName().compareTo(B.getName());
});
final String filename = String.format("cluster.%05d" + SetFileRecord.FILE_EXTENSION, clusterid);
try (PrintWriter pw = archive.openWriter(filename)) {
for (final SetFileRecord rec : cluster.records) {
print(pw, rec);
}
pw.flush();
}
LOG.info(filename + " " + cluster.getSumLength(null) + "bp");
++clusterid;
}
}
return 0;
}
use of com.github.lindenb.jvarkit.setfile.SetFileRecord in project jvarkit by lindenb.
the class SetFileTools method openSetFileIterator.
private CloseableIterator<SetFileRecord> openSetFileIterator(final List<String> args) throws IOException {
CloseableIterator<SetFileRecord> iter = null;
final String input = oneFileOrNull(args);
final SetFileReaderFactory srf = new SetFileReaderFactory(this.theDict);
if (input == null) {
iter = srf.open(IOUtils.openStdinForBufferedReader());
} else {
iter = srf.open(IOUtils.openURIForBufferedReading(input));
}
if (!StringUtils.isBlank(this.extendStr)) {
final IntervalExtender xtExtender = IntervalExtender.of(this.theDict, this.extendStr);
if (xtExtender.isChanging()) {
iter = new ExtenderIterator(iter, xtExtender);
}
}
if (intersectBedPath != null) {
iter = new IntersectBedIterator(iter, this.intersectBedPath);
}
if (!intersectVcfPath.isEmpty()) {
iter = new IntersectVcfIterator(iter, IOUtils.unrollPaths(this.intersectVcfPath));
}
if (!disable_uniq) {
iter = new UniqNameIterator(iter);
}
return iter;
}
Aggregations