use of com.github.lindenb.jvarkit.fastq.FastqPairedWriter in project jvarkit by lindenb.
the class FastqSplitInterleaved method doWork.
@Override
public int doWork(final List<String> args) {
if (this.fileA.equals(this.fileB)) {
LOG.error("R1 file==R2.file.");
return -1;
}
CloseableIterator<FastqRecordPair> iter1 = null;
FastqPairedWriter pairedWriter = null;
try {
final String input = oneFileOrNull(args);
final FastqPairedReaderFactory fqprf = new FastqPairedReaderFactory().setValidateReadNames(this.validate_read_names);
if (input == null) {
iter1 = fqprf.open(stdin());
} else {
iter1 = fqprf.open(Paths.get(input));
}
final FastqPairedWriterFactory fqwf = new FastqPairedWriterFactory().setCreateMd5(this.write_md5).setAsyncIo(this.with_asynio);
pairedWriter = fqwf.open(fileA, fileB);
while (iter1.hasNext()) {
pairedWriter.write(iter1.next());
}
iter1.close();
pairedWriter.close();
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter1);
CloserUtil.close(pairedWriter);
}
}
use of com.github.lindenb.jvarkit.fastq.FastqPairedWriter in project jvarkit by lindenb.
the class BamToFastq method processInput.
@Override
protected int processInput(final SAMFileHeader header, final CloseableIterator<SAMRecord> iter0) {
final Comparator<SAMRecord> queryNameComparator = (A, B) -> A.getReadName().compareTo(B.getReadName());
SortingCollection<SAMRecord> sortingSAMRecord = null;
final ArrayList<SAMRecord> buffer = new ArrayList<>(50_000);
final CountIn<FastqRecord> singleCounter = new CountIn<>("single-end");
final CountIn<FastqRecord> unpairedCounter = new CountIn<>("unpaired");
final CountIn<FastqRecord> pairedCounter = new CountIn<>("paired");
final CountIn<SAMRecord> sortingCounter = new CountIn<>("sorting");
final PeekableIterator<SAMRecord> iter = new PeekableIterator<>(iter0);
try {
if (!SAMFileHeader.SortOrder.coordinate.equals(header.getSortOrder())) {
LOG.error("Input is not sorted on coordinate. got : " + header.getSortOrder());
return -1;
}
if (singleFastq != null)
FastqUtils.validateFastqFilename(singleFastq);
if (unpairedFile1 != null)
FastqUtils.validateFastqFilename(unpairedFile1);
if (unpairedFile2 != null)
FastqUtils.validateFastqFilename(unpairedFile2);
try (FastqWriter singleEndWriter = this.singleFastq == null ? new NullFastqWriter() : new BasicFastqWriter(this.singleFastq);
FastqWriter unpairedWriter1 = this.unpairedFile1 == null ? new NullFastqWriter() : new BasicFastqWriter(this.unpairedFile1);
FastqWriter unpairedWriter2 = this.unpairedFile2 == null ? new NullFastqWriter() : new BasicFastqWriter(this.unpairedFile2);
FastqPairedWriter R1R2writer = openFastqPairedWriter()) {
sortingSAMRecord = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), queryNameComparator, sortingCollection.getMaxRecordsInRam(), sortingCollection.getTmpPaths());
sortingSAMRecord.setDestructiveIteration(true);
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
if (rec.isSecondaryOrSupplementary())
continue;
if (!rec.getReadPairedFlag()) {
singleEndWriter.write(singleCounter.apply(toFastq(rec)));
continue;
}
if ((rec.getReadUnmappedFlag() || rec.getMateUnmappedFlag()) && iter.hasNext()) {
final SAMRecord rec2 = iter.peek();
if (!rec2.isSecondaryOrSupplementary() && queryNameComparator.compare(rec, rec2) == 0) {
if (rec2.getFirstOfPairFlag() && rec.getSecondOfPairFlag()) {
// consumme
iter.next();
R1R2writer.write(pairedCounter.apply(toFastq(rec2)), pairedCounter.apply(toFastq(rec)));
continue;
} else if (rec.getFirstOfPairFlag() && rec2.getSecondOfPairFlag()) {
// consumme
iter.next();
R1R2writer.write(pairedCounter.apply(toFastq(rec)), pairedCounter.apply(toFastq(rec2)));
continue;
}
}
}
if (rec.getReadUnmappedFlag() || rec.getMateUnmappedFlag() || !rec.getReferenceName().equals(rec.getMateReferenceName()) || Math.abs(rec.getInferredInsertSize()) > this.distance) {
sortingSAMRecord.add(sortingCounter.apply(rec));
continue;
}
while (!buffer.isEmpty() && !buffer.get(0).getReferenceName().equals(rec.getReferenceName())) {
sortingSAMRecord.add(sortingCounter.apply(buffer.remove(0)));
}
while (!buffer.isEmpty() && (rec.getAlignmentStart() - buffer.get(0).getAlignmentStart()) > this.distance) {
sortingSAMRecord.add(sortingCounter.apply(buffer.remove(0)));
}
if (rec.getAlignmentStart() < rec.getMateAlignmentStart()) {
buffer.add(rec);
continue;
}
SAMRecord mate = null;
int i = 0;
while (i < buffer.size()) {
final SAMRecord rec2 = buffer.get(i);
if (queryNameComparator.compare(rec2, rec) == 0) {
mate = rec2;
buffer.remove(i);
break;
}
if (rec2.getAlignmentStart() > rec.getMateAlignmentStart()) {
break;
}
++i;
}
if (mate == null) {
(rec.getFirstOfPairFlag() ? unpairedWriter1 : unpairedWriter2).write(unpairedCounter.apply(toFastq(rec)));
} else if (mate.getFirstOfPairFlag() && rec.getSecondOfPairFlag()) {
R1R2writer.write(pairedCounter.apply(toFastq(mate)), pairedCounter.apply(toFastq(rec)));
} else if (rec.getFirstOfPairFlag() && mate.getSecondOfPairFlag()) {
R1R2writer.write(pairedCounter.apply(toFastq(rec)), pairedCounter.apply(toFastq(mate)));
} else {
(rec.getFirstOfPairFlag() ? unpairedWriter1 : unpairedWriter2).write(unpairedCounter.apply(toFastq(rec)));
(mate.getFirstOfPairFlag() ? unpairedWriter1 : unpairedWriter2).write(unpairedCounter.apply(toFastq(mate)));
}
}
// end while
for (final SAMRecord rec : buffer) {
sortingSAMRecord.add(sortingCounter.apply(rec));
}
buffer.clear();
sortingSAMRecord.doneAdding();
try (CloseableIterator<SAMRecord> iter2 = sortingSAMRecord.iterator()) {
try (EqualIterator<SAMRecord> eq = new EqualIterator<>(iter2, queryNameComparator)) {
while (eq.hasNext()) {
final List<SAMRecord> L = eq.next();
if (L.size() == 2) {
if (L.get(0).getFirstOfPairFlag() && L.get(1).getSecondOfPairFlag()) {
R1R2writer.write(pairedCounter.apply(toFastq(L.get(0))), pairedCounter.apply(toFastq(L.get(1))));
} else if (L.get(1).getFirstOfPairFlag() && L.get(0).getSecondOfPairFlag()) {
R1R2writer.write(pairedCounter.apply(toFastq(L.get(1))), pairedCounter.apply(toFastq(L.get(0))));
} else {
(L.get(0).getFirstOfPairFlag() ? unpairedWriter1 : unpairedWriter2).write(unpairedCounter.apply(toFastq(L.get(0))));
(L.get(1).getFirstOfPairFlag() ? unpairedWriter1 : unpairedWriter2).write(unpairedCounter.apply(toFastq(L.get(1))));
}
} else {
for (SAMRecord rec2 : L) {
(rec2.getFirstOfPairFlag() ? unpairedWriter1 : unpairedWriter2).write(unpairedCounter.apply(toFastq(rec2)));
}
}
}
}
}
}
sortingSAMRecord.cleanup();
unpairedCounter.log();
singleCounter.log();
pairedCounter.log();
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
iter.close();
}
}
use of com.github.lindenb.jvarkit.fastq.FastqPairedWriter in project jvarkit by lindenb.
the class OnePassFastqLauncher method runPairedEnd.
@Override
protected int runPairedEnd(final CloseableIterator<FastqRecordPair> iter) throws IOException {
int ret = 0;
FastqPairedWriter fws = null;
try {
final FastqPairedWriterFactory fpwf = new FastqPairedWriterFactory();
fpwf.setCreateMd5(this.write_md5);
if (outputFile1 != null && outputFile2 != null) {
fws = fpwf.open(outputFile1, outputFile2);
} else if (outputFile1 != null && outputFile2 == null) {
fws = fpwf.open(outputFile1);
} else if (outputFile1 == null && outputFile2 == null) {
fws = fpwf.open(new PrintStream(new BufferedOutputStream(stdout())));
} else {
getLogger().error("bad output declaration.");
return -1;
}
ret = runPairedEnd(iter, fws);
fws.close();
return ret;
} catch (final Throwable err) {
getLogger().error(err);
return -1;
} finally {
if (fws != null)
fws.close();
}
}
use of com.github.lindenb.jvarkit.fastq.FastqPairedWriter in project jvarkit by lindenb.
the class FastqSplit method openPairedWriter.
private FastqPairedWriter openPairedWriter(int i, PrintWriter manifest) throws IOException {
final FastqPairedWriterFactory fqpwf = new FastqPairedWriterFactory().setCreateMd5(write_md5).setAsyncIo(with_asynio);
final FastqPairedWriter w;
if (this.output_is_interleaved) {
final String tag = String.format("%09d.R12", (i + 1));
final String filename = this.basename.replace(TAG, tag);
final File file = new File(filename);
if (file.getParentFile() != null)
file.getParentFile().mkdirs();
w = fqpwf.open(file);
manifest.println(file);
} else {
final File[] files = new File[2];
for (int j = 0; j < 2; ++j) {
final String tag = String.format("%09d.R%d", (i + 1), (j + 1));
final String filename = this.basename.replace(TAG, tag);
final File file = new File(filename);
if (file.getParentFile() != null)
file.getParentFile().mkdirs();
files[j] = file;
}
w = fqpwf.open(files[0], files[1]);
manifest.print(files[0]);
manifest.print("\t");
manifest.println(files[1]);
}
return w;
}
use of com.github.lindenb.jvarkit.fastq.FastqPairedWriter in project jvarkit by lindenb.
the class FastqSplit method doWork.
@Override
public int doWork(final List<String> args) {
if (this.per_file_number < 1 && this.split_number < 1) {
LOG.error("Option -n or -s are undefined");
return -1;
}
if (this.per_file_number > 0 && this.split_number > 0) {
LOG.error("Both Options -n and -s are defined");
return -1;
}
if (!this.basename.contains(TAG)) {
LOG.error("basename doesn't contain " + TAG + ": " + basename);
return -1;
}
PrintWriter manifest = null;
try {
if (this.manifestPath == null) {
manifest = new PrintWriter(new NullOuputStream());
} else {
manifest = super.openPathOrStdoutAsPrintWriter(this.manifestPath);
}
if (args.size() == 2 || (this.input_is_interleaved && (args.isEmpty() || args.size() == 1))) {
final List<FastqPairedWriter> fastqWriters = new ArrayList<>();
FastqPairedWriter previous = null;
int count_files = 0;
long n = 0L;
try (final CloseableIterator<FastqRecordPair> iter = new FastqPairedReaderFactory().setValidateReadNames(this.validate_read_names).open(args)) {
while (iter.hasNext()) {
final FastqRecordPair pair = iter.next();
final FastqPairedWriter w;
/* split by number of reads per file */
if (this.per_file_number > 0) {
if (previous == null || n % this.per_file_number == 0) {
if (previous != null)
previous.close();
previous = openPairedWriter(count_files, manifest);
count_files++;
n = 0L;
}
w = previous;
} else /* split by file */
{
final int idx = (int) (n % this.split_number);
if (idx >= fastqWriters.size()) {
w = openPairedWriter(idx, manifest);
fastqWriters.add(w);
} else {
w = fastqWriters.get(idx);
}
}
w.write(pair);
n++;
}
}
if (previous != null)
previous.close();
for (final FastqPairedWriter w : fastqWriters) w.close();
} else if (args.isEmpty() || args.size() == 1) {
if (this.output_is_interleaved) {
LOG.error("Cannot set output is interleaved if input is not paired.");
return -1;
}
final List<FastqWriter> fastqWriters = new ArrayList<>();
FastqWriter previous = null;
long n = 0L;
int count_files = 0;
final FastqReader iter;
if (args.size() == 1) {
iter = new FastqReader(new File(args.get(0)));
} else {
iter = new FastqReader(IOUtils.openStreamForBufferedReader(stdin()));
}
while (iter.hasNext()) {
final FastqRecord rec = iter.next();
final FastqWriter w;
/* split by number of reads per file */
if (this.per_file_number > 0) {
if (previous == null || n % this.per_file_number == 0) {
if (previous != null)
previous.close();
previous = this.openSingleWriter(count_files, manifest);
count_files++;
n = 0L;
}
w = previous;
} else /* split by file */
{
final int idx = (int) (n % this.split_number);
if (idx >= fastqWriters.size()) {
w = this.openSingleWriter(idx, manifest);
fastqWriters.add(w);
} else {
w = fastqWriters.get(idx);
}
}
w.write(rec);
n++;
}
iter.close();
for (final FastqWriter w : fastqWriters) w.close();
} else {
LOG.error("Illegal number of arguments.");
return -1;
}
manifest.flush();
manifest.close();
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(manifest);
}
}
Aggregations