use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class SamColorTag method doWork.
@Override
public int doWork(final List<String> args) {
SAMRecordIterator iter = null;
SamReader samFileReader = null;
SAMFileWriter sw = null;
final ColorUtils colorUtils = new ColorUtils();
try {
final CompiledScript script = super.compileJavascript(this.jsExpression, this.jsFile);
samFileReader = openSamReader(oneFileOrNull(args));
final SAMFileHeader srcheader = samFileReader.getFileHeader();
final SAMFileHeader header = srcheader.clone();
header.addComment(ColorUtils.YC_TAG + " attribute added with " + getProgramName() + " " + getProgramCommandLine());
sw = this.writingBamArgs.openSAMFileWriter(outputFile, header, true);
final Bindings bindings = script.getEngine().createBindings();
bindings.put("header", samFileReader.getFileHeader());
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
iter = samFileReader.iterator();
while (iter.hasNext()) {
final SAMRecord record = progress.watch(iter.next());
bindings.put("record", record);
final Color color;
Object result;
try {
result = script.eval(bindings);
} catch (final Exception err) {
if (!ignoreErrors) {
LOG.error(err);
return -1;
}
result = null;
}
if (result == null) {
color = null;
} else if (result instanceof Color) {
color = Color.class.cast(result);
} else if (result instanceof String) {
final String s = (String) result;
Color c2 = null;
try {
if (s.trim().isEmpty()) {
c2 = null;
} else {
c2 = colorUtils.parse(s);
}
} catch (final Exception err) {
if (!ignoreErrors) {
LOG.error(err);
return -1;
}
c2 = null;
}
color = c2;
} else {
if (!ignoreErrors) {
LOG.error("Cannot cast to color a " + result.getClass());
return -1;
}
color = null;
}
if (color != null) {
record.setAttribute(ColorUtils.YC_TAG, ColorUtils.colorToSamAttribute(color));
} else {
// clear attribute
record.setAttribute(ColorUtils.YC_TAG, null);
}
sw.addAlignment(record);
}
sw.close();
sw = null;
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(samFileReader);
CloserUtil.close(sw);
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class SamJdk method doWork.
@Override
public int doWork(final List<String> args) {
SAMRecordIterator iter = null;
SamReader samFileReader = null;
SAMFileWriter sw = null;
try {
final String code;
if (this.scriptFile != null) {
code = IOUtil.slurp(this.scriptFile);
} else if (!StringUtil.isBlank(this.scriptExpr)) {
code = this.scriptExpr;
} else {
LOG.error("Option -e or -f are required. The content of those empty mut be not empty");
return -1;
}
final Random rand = new Random(System.currentTimeMillis());
final String javaClassName = SamJdk.class.getSimpleName() + "Custom" + Math.abs(rand.nextInt());
final StringWriter codeWriter = new StringWriter();
final PrintWriter pw = new PrintWriter(codeWriter);
pw.println("import java.util.*;");
pw.println("import java.util.stream.*;");
pw.println("import java.util.function.*;");
pw.println("import htsjdk.samtools.*;");
pw.println("import htsjdk.samtools.util.*;");
pw.println("import javax.annotation.Generated;");
pw.println("@Generated(value=\"" + SamJdk.class.getSimpleName() + "\",date=\"" + new Iso8601Date(new Date()) + "\")");
pw.println("public class " + javaClassName + " extends " + (this.pair_mode ? AbstractListFilter.class : AbstractFilter.class).getName().replace('$', '.') + " {");
pw.println(" public " + javaClassName + "(final SAMFileHeader header) {");
pw.println(" super(header);");
pw.println(" }");
if (user_code_is_body) {
pw.println(" //user's code starts here");
pw.println(code);
pw.println(" // user's code ends here");
} else {
pw.println(" @Override");
pw.println(" public Object apply(final " + (this.pair_mode ? "List<SAMRecord> records" : "SAMRecord record") + ") {");
pw.println(" /** user's code starts here */");
pw.println(code);
pw.println("/** user's code ends here */");
pw.println(" }");
}
pw.println("}");
pw.flush();
if (!hideGeneratedCode) {
LOG.debug(" Compiling :\n" + InMemoryCompiler.beautifyCode(codeWriter.toString()));
}
if (this.saveCodeInDir != null) {
PrintWriter cw = null;
try {
IOUtil.assertDirectoryIsWritable(this.saveCodeInDir);
cw = new PrintWriter(new File(this.saveCodeInDir, javaClassName + ".java"));
cw.write(codeWriter.toString());
cw.flush();
cw.close();
cw = null;
LOG.info("saved " + javaClassName + ".java in " + this.saveCodeInDir);
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(cw);
}
}
final InMemoryCompiler inMemoryCompiler = new InMemoryCompiler();
final Class<?> compiledClass = inMemoryCompiler.compileClass(javaClassName, codeWriter.toString());
final Constructor<?> ctor = compiledClass.getDeclaredConstructor(SAMFileHeader.class);
samFileReader = openSamReader(oneFileOrNull(args));
final SAMFileHeader header = samFileReader.getFileHeader();
if (this.pair_mode) {
final SAMFileHeader.SortOrder order = header.getSortOrder();
if (order == null || order.equals(SAMFileHeader.SortOrder.unsorted)) {
LOG.warning("In `--pair` mode , the input BAM is expected to be sorted on queryname but current sort-order is " + order + " ... Continue...");
} else if (!order.equals(SAMFileHeader.SortOrder.queryname)) {
LOG.error("In `--pair` mode , the input BAM is expected to be sorted on queryname but I've got \"" + order + "\". " + "Use picard SortSam (not `samtools sort` https://github.com/samtools/hts-specs/issues/5 )");
return -1;
}
}
long count = 0L;
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
sw = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, true);
iter = samFileReader.iterator();
if (this.pair_mode) {
SAMRecord prev = null;
final AbstractListFilter filter = (AbstractListFilter) ctor.newInstance(header);
final List<SAMRecord> buffer = new ArrayList<>();
for (; ; ) {
int numWarnings = 100;
final Comparator<SAMRecord> nameComparator = ReadNameSortMethod.picard.get();
final SAMRecord record = (iter.hasNext() ? progress.watch(iter.next()) : null);
if (prev != null && record != null && numWarnings > 0 && nameComparator.compare(prev, record) > 0) {
LOG.warn("SamRecord doesn't look sorted on query name using a picard/htsjdk method. Got " + record + " affter " + prev + ". " + "In '--pair' mode, reads should be sorted on query name using **picard/htsjdk**. (samtools != picard) see https://github.com/samtools/hts-specs/issues/5");
--numWarnings;
}
prev = record;
if (record == null || (!buffer.isEmpty() && !buffer.get(0).getReadName().equals(record.getReadName()))) {
if (!buffer.isEmpty()) {
final Object result = filter.apply(buffer);
// result is an array of a collection of reads
if (result != null && (result.getClass().isArray() || (result instanceof Collection))) {
final Collection<?> col;
if (result.getClass().isArray()) {
final Object[] array = (Object[]) result;
col = Arrays.asList(array);
} else {
col = (Collection<?>) result;
}
// write all of reads
for (final Object item : col) {
if (item == null)
throw new JvarkitException.UserError("item in array is null");
if (!(item instanceof SAMRecord))
throw new JvarkitException.UserError("item in array is not a SAMRecord " + item.getClass());
++count;
sw.addAlignment(SAMRecord.class.cast(item));
if (this.LIMIT > 0L && count >= this.LIMIT)
break;
}
} else // result is a SAMRecord
if (result != null && (result instanceof SAMRecord)) {
++count;
sw.addAlignment(SAMRecord.class.cast(result));
} else {
boolean accept = true;
if (result == null) {
accept = false;
} else if (result instanceof Boolean) {
if (Boolean.FALSE.equals(result))
accept = false;
} else if (result instanceof Number) {
if (((Number) result).intValue() != 1)
accept = false;
} else {
LOG.warn("Script returned something that is not a boolean or a number:" + result.getClass());
accept = false;
}
if (!accept) {
for (final SAMRecord item : buffer) {
failing(item, header);
}
} else {
for (final SAMRecord item : buffer) {
++count;
sw.addAlignment(item);
}
}
}
}
// end of if !buffer.isEmpty()
if (record == null)
break;
buffer.clear();
}
// end flush flush
if (this.LIMIT > 0L && count >= this.LIMIT)
break;
buffer.add(record);
}
// infinite loop
} else {
final AbstractFilter filter = (AbstractFilter) ctor.newInstance(header);
while (iter.hasNext()) {
final SAMRecord record = progress.watch(iter.next());
final Object result = filter.apply(record);
// result is an array of a collection of reads
if (result != null && (result.getClass().isArray() || (result instanceof Collection))) {
final Collection<?> col;
if (result.getClass().isArray()) {
final Object[] array = (Object[]) result;
col = Arrays.asList(array);
} else {
col = (Collection<?>) result;
}
// write all of reads
for (final Object item : col) {
if (item == null)
throw new JvarkitException.UserError("item in array is null");
if (!(item instanceof SAMRecord))
throw new JvarkitException.UserError("item in array is not a SAMRecord " + item.getClass());
++count;
sw.addAlignment(SAMRecord.class.cast(item));
}
} else // result is a SAMRecord
if (result != null && (result instanceof SAMRecord)) {
++count;
sw.addAlignment(SAMRecord.class.cast(result));
} else {
boolean accept = true;
if (result == null) {
accept = false;
} else if (result instanceof Boolean) {
if (Boolean.FALSE.equals(result))
accept = false;
} else if (result instanceof Number) {
if (((Number) result).intValue() != 1)
accept = false;
} else {
LOG.warn("Script returned something that is not a boolean or a number:" + result.getClass());
accept = false;
}
if (!accept) {
failing(record, header);
} else {
++count;
sw.addAlignment(record);
}
}
if (this.LIMIT > 0L && count >= this.LIMIT)
break;
}
}
sw.close();
/* create empty if never called */
openFailing(header);
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(samFileReader);
CloserUtil.close(sw);
CloserUtil.close(this.failingReadsWriter);
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class Biostar173114 method doWork.
@Override
public int doWork(final List<String> args) {
if (keepQualities)
keepSequence = true;
SamReader sfr = null;
SAMFileWriter sfw = null;
SAMRecordIterator iter = null;
try {
sfr = super.openSamReader(oneFileOrNull(args));
sfw = this.writingBamArgs.openSAMFileWriter(this.outputFile, sfr.getFileHeader(), true);
iter = sfr.iterator();
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(sfr.getFileHeader()).logger(LOG);
long nReads = 0;
while (iter.hasNext()) {
final SAMRecord record = progress.watch(iter.next());
if (!this.keepAttributes) {
final SAMReadGroupRecord g = record.getReadGroup();
record.clearAttributes();
if (g != null && this.keepReadGroup) {
record.setAttribute("RG", g.getId());
}
}
record.setReadName(this.keepName ? record.getReadName() : "R" + Long.toHexString(nReads++));
if (!this.keepMate && record.getReadPairedFlag()) {
record.setReadPairedFlag(false);
record.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
record.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
record.setMateUnmappedFlag(false);
record.setMateNegativeStrandFlag(false);
record.setInferredInsertSize(0);
record.setProperPairFlag(false);
}
if (!this.keepCigar && !record.getReadUnmappedFlag() && record.getCigar() != null) {
record.setCigar(new Cigar(record.getCigar().getCigarElements().stream().filter(C -> !C.getOperator().equals(CigarOperator.H)).collect(Collectors.toList())));
}
if (!this.keepSequence) {
record.setReadBases(SAMRecord.NULL_SEQUENCE);
}
if (!this.keepQualities) {
record.setBaseQualities(SAMRecord.NULL_QUALS);
}
sfw.addAlignment(record);
}
progress.finish();
sfw.close();
sfw = null;
LOG.info("done");
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class Biostar214299 method doWork.
@Override
public int doWork(final List<String> args) {
if (this.positionFile == null) {
LOG.error("position File is not defined.");
return -1;
}
final String UNAFFECTED_SAMPLE = "UNAFFECTED";
final String AMBIGOUS_SAMPLE = "AMBIGOUS";
final String UNMAPPED = "UNMAPPED";
SamReader sfr = null;
SAMFileWriter sfw = null;
final IntervalTreeMap<Position> positionsTreeMap = new IntervalTreeMap<>();
final Set<String> samples = new HashSet<>();
try {
sfr = openSamReader(oneFileOrNull(args));
final SAMFileHeader header = sfr.getFileHeader();
final SAMSequenceDictionary dict = header.getSequenceDictionary();
if (dict == null) {
LOG.error("Dictionary missing in input sam");
return -1;
}
try (BufferedReader br = IOUtils.openFileForBufferedReading(this.positionFile)) {
String line;
while ((line = br.readLine()) != null) {
if (line.trim().isEmpty() || line.startsWith("#"))
continue;
final String[] tokens = line.split("[\t]");
if (tokens.length < 4) {
LOG.error("Not enough columns in " + line);
return -1;
}
final String contig = tokens[0];
if (dict.getSequence(contig) == null) {
LOG.error("No such contig in input's sam dictionary: " + contig);
return -1;
}
final int refpos = Integer.parseInt(tokens[1]);
final Interval interval = new Interval(contig, refpos, refpos);
Position position = positionsTreeMap.get(interval);
if (position == null) {
position = new Position();
// position.contig = contig;
position.refpos = refpos;
positionsTreeMap.put(interval, position);
}
final String bases = tokens[2].toUpperCase();
if (bases.length() != 1 || !bases.matches("[ATGC]")) {
LOG.error("in " + line + " bases should be one letter an ATGC");
return -1;
}
if (position.base2sample.containsKey(bases.charAt(0))) {
LOG.error("in " + line + " bases already defined for this position");
return -1;
}
final String sampleName = tokens[3].trim();
if (sampleName.isEmpty()) {
LOG.error("sample name cannot be empty");
return -1;
}
samples.add(sampleName);
position.base2sample.put(bases.charAt(0), sampleName);
}
} catch (final IOException err) {
LOG.error(err);
return -1;
}
if (samples.contains(UNAFFECTED_SAMPLE)) {
LOG.error("Sample cannot be named " + UNAFFECTED_SAMPLE);
return -1;
}
if (samples.contains(AMBIGOUS_SAMPLE)) {
LOG.error("Sample cannot be named " + AMBIGOUS_SAMPLE);
return -1;
}
if (samples.contains(UNMAPPED)) {
LOG.error("Sample cannot be named " + UNMAPPED);
return -1;
}
samples.add(UNAFFECTED_SAMPLE);
samples.add(AMBIGOUS_SAMPLE);
samples.add(UNMAPPED);
final SAMFileHeader newHeader = new SAMFileHeader();
newHeader.setSortOrder(header.getSortOrder());
newHeader.setSequenceDictionary(dict);
newHeader.addComment("generated with " + getProgramName() + " " + getVersion() + " Pierre Lindenbaum : " + getProgramCommandLine());
/* create groups */
for (final String sample : samples) {
final SAMReadGroupRecord rg = new SAMReadGroupRecord(sample);
rg.setSample(sample);
rg.setLibrary(sample);
newHeader.addReadGroup(rg);
}
sfw = this.writingBamArgs.openSAMFileWriter(this.outputFile, newHeader, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
final SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
rec.setAttribute("RG", null);
if (rec.getReadUnmappedFlag()) {
rec.setAttribute("RG", UNMAPPED);
sfw.addAlignment(rec);
continue;
}
final Cigar cigar = rec.getCigar();
final Collection<Position> snps = positionsTreeMap.getContained(new Interval(rec.getContig(), rec.getUnclippedStart(), rec.getUnclippedEnd()));
if (snps == null || snps.isEmpty()) {
rec.setAttribute("RG", UNAFFECTED_SAMPLE);
sfw.addAlignment(rec);
continue;
}
final Map<Integer, Position> index2pos = snps.stream().collect(Collectors.toMap(P -> P.refpos, P -> P));
final Set<String> selectedSamples = new HashSet<>();
final byte[] bases = rec.getReadBases();
if (bases == null || bases.equals(SAMRecord.NULL_SEQUENCE)) {
LOG.error("Bases missing in read " + rec);
return -1;
}
int refPos1 = rec.getUnclippedStart();
int readPos0 = 0;
for (final CigarElement ce : cigar.getCigarElements()) {
final CigarOperator op = ce.getOperator();
final boolean consummeReadBaseOrSoftClip = op.consumesReadBases() || op.equals(CigarOperator.S);
if (op.consumesReferenceBases() && consummeReadBaseOrSoftClip) {
for (int i = 0; i < ce.getLength(); ++i) {
final int nowRefPos1 = (refPos1 + i);
final int nowReadPos0 = (readPos0 + i);
final Position position = index2pos.get(nowRefPos1);
if (position == null)
continue;
if (nowReadPos0 >= bases.length)
continue;
final char base = (char) Character.toUpperCase(bases[nowReadPos0]);
final String sample = position.base2sample.get(base);
if (sample == null)
continue;
selectedSamples.add(sample);
index2pos.remove(nowRefPos1);
if (index2pos.isEmpty())
break;
}
}
if (op.consumesReferenceBases())
refPos1 += ce.getLength();
if (consummeReadBaseOrSoftClip || op.equals(CigarOperator.H)) {
readPos0 += ce.getLength();
}
}
if (selectedSamples.isEmpty()) {
rec.setAttribute("RG", UNAFFECTED_SAMPLE);
} else if (selectedSamples.size() == 1) {
rec.setAttribute("RG", selectedSamples.iterator().next());
} else {
rec.setAttribute("RG", AMBIGOUS_SAMPLE);
}
sfw.addAlignment(rec);
}
progress.finish();
LOG.info("done");
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class Biostar234081 method doWork.
@Override
public int doWork(java.util.List<String> args) {
SamReader in = null;
SAMFileWriter w = null;
SAMRecordIterator iter = null;
try {
in = super.openSamReader(oneFileOrNull(args));
w = this.writingBamArgs.openSAMFileWriter(this.outputFile, in.getFileHeader(), true);
iter = in.iterator();
while (iter.hasNext()) {
SAMRecord rec = iter.next();
if (!rec.getReadUnmappedFlag() && rec.getCigar() != null) {
final Cigar cigar = rec.getCigar();
final List<CigarElement> elements = new ArrayList<>(cigar.numCigarElements());
for (int i = 0; i < cigar.numCigarElements(); ++i) {
CigarElement ce = cigar.getCigarElement(i);
switch(ce.getOperator()) {
case X:
case EQ:
ce = new CigarElement(ce.getLength(), CigarOperator.M);
break;
default:
break;
}
if (!elements.isEmpty() && elements.get(elements.size() - 1).getOperator() == ce.getOperator()) {
elements.set(elements.size() - 1, new CigarElement(ce.getLength() + elements.get(elements.size() - 1).getLength(), ce.getOperator()));
} else {
elements.add(ce);
}
}
rec.setCigar(new Cigar(elements));
}
w.addAlignment(rec);
}
LOG.debug("done");
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(in);
CloserUtil.close(w);
}
}
Aggregations