use of com.github.lindenb.jvarkit.util.picard.GenomicSequence in project jvarkit by lindenb.
the class FindAllCoverageAtPosition method getReferenceAt.
private char getReferenceAt(final String contig, int pos1) {
if (this.indexedFastaSequenceFile == null)
return '.';
if (this.genomicSequence == null || !this.genomicSequence.getChrom().equals(contig)) {
final SAMSequenceDictionary dict = this.indexedFastaSequenceFile.getSequenceDictionary();
if (dict == null)
return '.';
final ContigNameConverter converter = ContigNameConverter.fromOneDictionary(dict);
converter.setOnNotFound(ContigNameConverter.OnNotFound.SKIP);
final String newctg = converter.apply(contig);
final SAMSequenceRecord rec = (newctg == null ? null : dict.getSequence(newctg));
if (rec != null)
genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, newctg);
}
if (genomicSequence == null || pos1 < 0 || pos1 > genomicSequence.length())
return '.';
return genomicSequence.charAt(pos1 - 1);
}
use of com.github.lindenb.jvarkit.util.picard.GenomicSequence in project jvarkit by lindenb.
the class VcfLiftOver method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
VariantContextWriter failed = null;
GenomicSequence genomicSequence = null;
try {
final VCFHeader inputHeader = in.getHeader();
final Set<VCFHeaderLine> headerLines = inputHeader.getMetaDataInInputOrder().stream().filter(V -> {
if (!(V instanceof VCFInfoHeaderLine))
return true;
final VCFInfoHeaderLine vih = VCFInfoHeaderLine.class.cast(V);
if (removeInfo.contains(vih.getID()))
return false;
return true;
}).collect(Collectors.toSet());
if (this.failedFile != null) {
final VCFHeader header2 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header2.addMetaDataLine(new VCFInfoHeaderLine(this.failedinfoTag, 1, VCFHeaderLineType.String, "Why the liftOver failed."));
failed = super.openVariantContextWriter(failedFile);
failed.writeHeader(header2);
}
final VCFHeader header3 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
header3.setSequenceDictionary(this.indexedFastaSequenceFile.getSequenceDictionary());
header3.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header3.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header3.addMetaDataLine(new VCFInfoHeaderLine(this.infoTag, 1, VCFHeaderLineType.String, "Chromosome|Position before liftOver."));
out.writeHeader(header3);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
while (in.hasNext()) {
VariantContext ctx = progress.watch(in.next());
if (!this.removeInfo.isEmpty()) {
VariantContextBuilder vcb = new VariantContextBuilder(ctx);
for (final String tag : this.removeInfo) vcb.rmAttribute(tag);
ctx = vcb.make();
}
if (ctx.isIndel() && this.ignoreIndels) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "Indel").make());
continue;
}
if (adaptivematch) {
double minAlleleLength = Math.min(0, ctx.getAlleles().stream().mapToInt(A -> A.length()).min().orElse(0));
double maxAlleleLength = Math.max(1, ctx.getAlleles().stream().mapToInt(A -> A.length()).max().orElse(1));
this.liftOver.setLiftOverMinMatch(minAlleleLength / maxAlleleLength);
}
final Interval lifted = liftOver.liftOver(new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), // negative strand
false, String.join("|", ctx.getContig(), String.valueOf(ctx.getStart()), ctx.getReference().toString())));
if (lifted == null) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "LiftOverFailed").make());
} else if (this.indexedFastaSequenceFile.getSequenceDictionary().getSequence(lifted.getContig()) == null) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "ContigMissingDictionary|" + lifted.getContig()).make());
} else {
boolean alleleAreValidatedVsRef = true;
// part of the code was copied from picard/liftovervcf
final Map<Allele, Allele> reverseComplementAlleleMap = new HashMap<>();
final List<Allele> alleles = new ArrayList<Allele>();
for (final Allele oldAllele : ctx.getAlleles()) {
final Allele fixedAllele;
if (oldAllele.isSymbolic() || oldAllele.isNoCall() || oldAllele.equals(Allele.SPAN_DEL)) {
alleles.add(oldAllele);
continue;
} else if (lifted.isPositiveStrand()) {
fixedAllele = oldAllele;
alleles.add(oldAllele);
} else {
fixedAllele = Allele.create(SequenceUtil.reverseComplement(oldAllele.getBaseString()), oldAllele.isReference());
alleles.add(fixedAllele);
reverseComplementAlleleMap.put(oldAllele, fixedAllele);
}
if (this.checkAlleleSequence) {
if (genomicSequence == null || !genomicSequence.getChrom().equals(lifted.getContig())) {
genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, lifted.getContig());
}
final String alleleStr = fixedAllele.getBaseString();
int x = 0;
while (x < alleleStr.length() && lifted.getStart() - 1 + x < genomicSequence.length()) {
final char refChar = genomicSequence.charAt(lifted.getStart() - 1 + x);
if (Character.toLowerCase(refChar) != Character.toLowerCase(alleleStr.charAt(x))) {
alleleAreValidatedVsRef = false;
break;
}
++x;
}
if (x != alleleStr.length()) {
alleleAreValidatedVsRef = false;
break;
}
}
}
if (!alleleAreValidatedVsRef) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleMismatchRef").make());
continue;
}
if (lifted.getEnd() - lifted.getStart() != ctx.getEnd() - ctx.getStart()) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleBadLength|" + lifted.length()).make());
continue;
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx.getSource(), lifted.getContig(), lifted.getStart(), lifted.getEnd(), alleles);
vcb.id(ctx.getID());
vcb.attributes(ctx.getAttributes());
vcb.attribute(this.infoTag, ctx.getContig() + "|" + ctx.getStart() + "|" + ctx.getReference().getDisplayString());
vcb.filters(ctx.getFilters());
vcb.log10PError(ctx.getLog10PError());
final GenotypesContext genotypeContext = ctx.getGenotypes();
final GenotypesContext fixedGenotypes = GenotypesContext.create(genotypeContext.size());
for (final Genotype genotype : genotypeContext) {
final List<Allele> fixedAlleles = new ArrayList<Allele>();
for (final Allele allele : genotype.getAlleles()) {
final Allele fixedAllele = reverseComplementAlleleMap.containsKey(allele) ? reverseComplementAlleleMap.get(allele) : allele;
fixedAlleles.add(fixedAllele);
}
fixedGenotypes.add(new GenotypeBuilder(genotype).alleles(fixedAlleles).make());
}
vcb.genotypes(fixedGenotypes);
out.add(vcb.make());
}
}
if (failed != null) {
failed.close();
failed = null;
}
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(failed);
}
}
use of com.github.lindenb.jvarkit.util.picard.GenomicSequence in project jvarkit by lindenb.
the class BamToSql method doWork.
@Override
public int doWork(List<String> args) {
if (this.faidxFile == null) {
LOG.error("ref sequence faidx not defined");
return -1;
}
SAMRecordIterator iter = null;
SamReader sfr = null;
PrintWriter out = null;
GenomicSequence genomicSequence = null;
IndexedFastaSequenceFile indexedFastaSequenceFile = null;
args = new ArrayList<String>(IOUtils.unrollFiles(args));
try {
out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.faidxFile);
out.println("CREATE TABLE IF NOT EXISTS SamFile");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("filename TEXT");
out.println(");");
out.println("CREATE TABLE IF NOT EXISTS Dictionary");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("name TEXT NOT NULL,");
out.println("length INT NOT NULL,");
out.println("tid INT NOT NULL,");
out.println("samfile_id INT NOT NULL,");
out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id)");
out.println(");");
out.println("CREATE TABLE IF NOT EXISTS ReadGroup");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("groupId TEXT NOT NULL,");
out.println("sample TEXT NOT NULL,");
out.println("samfile_id INT NOT NULL,");
out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id)");
out.println(");");
out.println("CREATE TABLE IF NOT EXISTS Read");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("name TEXT NOT NULL,");
out.println("flag INTEGER NOT NULL,");
if (this.printflag) {
for (final SAMFlag flg : SAMFlag.values()) {
out.println(flg.name() + " INTEGER NOT NULL,");
}
}
out.println("rname TEXT,");
out.println("pos INTEGER,");
out.println("mapq INTEGER NOT NULL,");
out.println("cigar TEXT,");
out.println("rnext TEXT,");
out.println("pnext INTEGER,");
out.println("tlen INTEGER,");
out.println("sequence TEXT NOT NULL,");
out.println("qualities TEXT NOT NULL,");
out.println("samfile_id INT NOT NULL,");
out.println("group_id INT,");
out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id),");
out.println("FOREIGN KEY(group_id) REFERENCES ReadGroup(id)");
out.println(");");
out.println("CREATE TABLE IF NOT EXISTS Cigar");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("read_pos INT ,");
out.println("read_base TEXT,");
out.println("read_qual INT ,");
out.println("ref_pos INT ,");
out.println("ref_base TEXT,");
out.println("operator TEXT NOT NULL,");
out.println("read_id INT NOT NULL,");
out.println("FOREIGN KEY(read_id) REFERENCES Read(id)");
out.println(");");
out.println("begin transaction;");
int samIndex = 0;
do {
final String inputName;
if (samIndex == 0 && args.isEmpty()) {
sfr = openSamReader(null);
inputName = "<stdin>";
} else {
inputName = args.get(samIndex);
sfr = openSamReader(inputName);
}
final SAMFileHeader header1 = sfr.getFileHeader();
if (header1 == null) {
throw new JvarkitException.FileFormatError("File header missing");
}
final SAMSequenceDictionary dict = header1.getSequenceDictionary();
if (dict == null) {
throw new JvarkitException.DictionaryMissing("No Dictionary in input");
}
final IntervalParser intervalParser = new IntervalParser(dict);
final Interval userInterval;
iter = null;
if (this.regionStr == null || this.regionStr.isEmpty()) {
LOG.warn("You're currently scanning the whole BAM ???!!!");
iter = sfr.iterator();
userInterval = null;
} else {
userInterval = intervalParser.parse(this.regionStr);
if (userInterval == null) {
throw new JvarkitException.UserError("cannot parse interval " + this.regionStr);
}
iter = sfr.query(userInterval.getContig(), userInterval.getStart(), userInterval.getEnd(), false);
}
out.println(String.join(" ", "insert into SamFile(filename) values(", quote(inputName), ");"));
for (int i = 0; i < dict.size(); ++i) {
final SAMSequenceRecord ssr = dict.getSequence(i);
out.println("insert into Dictionary(name,length,tid,samfile_id) select " + quote(inputName) + "," + ssr.getSequenceLength() + "," + i + ",max(id) from SamFile;");
}
for (final SAMReadGroupRecord g : header1.getReadGroups()) {
out.println("insert into ReadGroup(groupId,sample,samfile_id) select " + quote(g.getId()) + "," + quote(g.getSample()) + "," + "max(id) from SamFile;");
}
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
final StringBuilder sql = new StringBuilder();
sql.append("insert into Read(" + "name,flag,");
if (this.printflag) {
for (final SAMFlag flg : SAMFlag.values()) {
sql.append(flg.name()).append(",");
}
}
sql.append("rname,pos,mapq,cigar,rnext,pnext,tlen,sequence,qualities,group_id,samfile_id) select ");
sql.append(quote(rec.getReadName())).append(",");
sql.append(rec.getFlags()).append(",");
if (this.printflag) {
for (final SAMFlag flg : SAMFlag.values()) {
sql.append(flg.isSet(rec.getFlags()) ? 1 : 0);
sql.append(",");
}
}
if (rec.getReferenceName() == null || rec.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
sql.append("NULL,NULL");
} else {
sql.append(quote(rec.getReferenceName()));
sql.append(",");
sql.append(rec.getAlignmentStart());
}
sql.append(",");
sql.append(rec.getMappingQuality());
sql.append(",");
// cigar
if (rec.getCigarString() == null || rec.getCigarString().equals(SAMRecord.NO_ALIGNMENT_CIGAR)) {
sql.append("NULL");
} else {
sql.append(quote(rec.getCigarString()));
}
sql.append(",");
// rnext
if (rec.getMateReferenceName() == null || rec.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
sql.append("NULL,NULL");
} else {
sql.append(quote(rec.getMateReferenceName()));
sql.append(",");
sql.append(rec.getMateAlignmentStart());
}
sql.append(",");
// tlen
sql.append(rec.getInferredInsertSize());
sql.append(",");
// sequence
sql.append(quote(rec.getReadString()));
sql.append(",");
// qualities
sql.append(quote(rec.getBaseQualityString()));
sql.append(",");
if (rec.getReadGroup() == null) {
sql.append("NULL");
} else {
sql.append("G.id");
}
sql.append(",F.id FROM SamFile as F");
if (rec.getReadGroup() != null) {
sql.append(" , ReadGroup as G where G.groupId=").append(quote(rec.getReadGroup().getId())).append(" and F.id = G.samfile_id ");
}
sql.append(" ORDER BY F.id DESC LIMIT 1;");
out.println(sql.toString());
if (this.printcigar && !rec.getReadUnmappedFlag() && rec.getCigar() != null) {
if (genomicSequence == null || !genomicSequence.getChrom().equals(rec.getReferenceName())) {
genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
}
int ref = rec.getUnclippedStart();
final byte[] bases = rec.getReadBases();
final byte[] quals = rec.getBaseQualities();
int read = 0;
for (final CigarElement ce : rec.getCigar()) {
final CigarOperator op = ce.getOperator();
if (op.equals(CigarOperator.P))
continue;
for (int i = 0; i < ce.getLength(); ++i) {
sql.setLength(0);
boolean in_user_interval = true;
sql.append("insert into Cigar(operator,read_pos,read_base,read_qual,ref_pos,ref_base,read_id) ");
sql.append("select '");
sql.append(op.name());
sql.append("',");
if (userInterval != null && !(rec.getReferenceName().equals(userInterval.getContig()) && ref >= userInterval.getStart() && ref <= userInterval.getEnd())) {
in_user_interval = false;
}
switch(op) {
case I:
{
sql.append(read);
sql.append(",");
sql.append("'" + (char) bases[read] + "',");
sql.append("" + quals[read] + "");
sql.append(",");
sql.append("NULL,NULL");
read++;
break;
}
case D:
case N:
case // yes H (hard clip)
H:
{
sql.append("NULL,NULL,NULL,");
sql.append(ref);
sql.append(",'");
sql.append((ref < 1 || ref - 1 >= genomicSequence.length()) ? '*' : genomicSequence.charAt(ref - 1));
sql.append("'");
ref++;
break;
}
case M:
case X:
case EQ:
case // yes S, soft clip
S:
{
sql.append(read);
sql.append(",");
sql.append("'" + (char) bases[read] + "',");
sql.append("" + quals[read] + "");
sql.append(",");
sql.append(ref);
sql.append(",'");
sql.append((ref < 1 || ref - 1 >= genomicSequence.length()) ? '*' : genomicSequence.charAt(ref - 1));
sql.append("'");
ref++;
read++;
break;
}
default:
throw new IllegalStateException();
}
sql.append(", id from Read ORDER BY id DESC LIMIT 1;");
if (in_user_interval)
out.println(sql.toString());
}
}
}
}
iter.close();
iter = null;
sfr.close();
sfr = null;
progress.finish();
samIndex++;
} while (samIndex < args.size());
out.println("COMMIT;");
out.flush();
out.close();
LOG.info("done");
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(sfr);
CloserUtil.close(out);
CloserUtil.close(indexedFastaSequenceFile);
}
}
use of com.github.lindenb.jvarkit.util.picard.GenomicSequence in project jvarkit by lindenb.
the class VcfJaspar method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
final String ATT = "JASPAR";
GenomicSequence genomicSequence = null;
final VCFHeader header = new VCFHeader(in.getHeader());
addMetaData(header);
header.addMetaDataLine(new VCFInfoHeaderLine(ATT, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Jaspar pattern overlapping: Format: (Name|length|Score/1000|pos|strand)"));
out.writeHeader(header);
while (in.hasNext()) {
VariantContext var = in.next();
if (genomicSequence == null || !genomicSequence.getChrom().equals(var.getContig())) {
LOG.info("Loading sequence " + var.getContig());
genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, var.getContig());
}
final Set<String> hits = new HashSet<String>();
for (final Matrix matrix : this.jasparDb) {
int start0 = Math.max(0, var.getStart() - matrix.length());
for (int y = start0; y < var.getStart() && y + matrix.length() <= genomicSequence.length(); ++y) {
CharSequence forward = new SubSequence(genomicSequence, y, y + matrix.length());
CharSequence revcomp = new RevCompCharSequence(forward);
// run each strand
for (int strand = 0; strand < 2; ++strand) {
double score = matrix.score(strand == 0 ? forward : revcomp);
if (score <= 0)
continue;
if (score >= matrix.max() * this.fraction_of_max) {
StringBuilder b = new StringBuilder("(");
b.append(matrix.getName().replaceAll("[ \t;=]+", "/"));
b.append("|");
b.append(matrix.length());
b.append("|");
b.append((int) (1000.0 * (score / matrix.max())));
b.append("|");
b.append(y + 1);
b.append("|");
b.append(strand == 0 ? '+' : '-');
b.append(")");
hits.add(b.toString());
break;
}
}
}
}
if (hits.isEmpty()) {
out.add(var);
continue;
}
final VariantContextBuilder vcb = new VariantContextBuilder(var);
vcb.attribute(ATT, hits.toArray(new String[hits.size()]));
out.add(vcb.make());
}
return RETURN_OK;
}
use of com.github.lindenb.jvarkit.util.picard.GenomicSequence in project jvarkit by lindenb.
the class SamSlop method doWork.
@Override
public int doWork(List<String> args) {
if (this.faidx == null) {
LOG.error("Reference was not specified.");
return -1;
}
if (this.defaultQual.length() != 1) {
LOG.error("default quality should have length==1 " + this.defaultQual);
}
GenomicSequence genomicSequence = null;
SamReader sfr = null;
SAMFileWriter sfw = null;
IndexedFastaSequenceFile indexedFastaSequenceFile = null;
final char defaultQUAL = this.defaultQual.charAt(0);
try {
final String inputName = oneFileOrNull(args);
LOG.info("Loading reference");
indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
sfr = openSamReader(inputName);
final SAMFileHeader header = sfr.getFileHeader();
header.setSortOrder(SortOrder.unsorted);
sfw = writingBamArgs.openSAMFileWriter(outputFile, header, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
final SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
final Cigar cigar = rec.getCigar();
if (rec.getReadUnmappedFlag() || cigar == null || cigar.isEmpty() || rec.getReadBases() == SAMRecord.NULL_SEQUENCE || (this.extend5 <= 0 && this.extend3 <= 0)) {
sfw.addAlignment(rec);
continue;
}
final StringBuilder sbs = new StringBuilder(rec.getReadString());
final StringBuilder sbq = new StringBuilder(rec.getBaseQualityString());
if (genomicSequence == null || genomicSequence.getSAMSequenceRecord().getSequenceIndex() != rec.getReferenceIndex()) {
genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
}
final int refPos1 = (this.removeClip ? rec.getAlignmentStart() : rec.getUnclippedStart());
final int endAlignmend1 = (this.removeClip ? rec.getAlignmentEnd() : rec.getUnclippedEnd());
final List<CigarElement> cl = new ArrayList<>(cigar.getCigarElements());
if (!this.removeClip) {
// replace clip S/H by match M
for (int i = 0; i < cl.size(); ++i) {
final CigarElement ce = cl.get(i);
if (ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H) {
cl.set(i, new CigarElement(ce.getLength(), CigarOperator.M));
}
}
}
if (this.extend5 > 0 && refPos1 > 1) {
if (this.removeClip) {
// /remove hard + soft clip 5'
while (!cl.isEmpty()) {
// first
final CigarElement ce = cl.get(0);
// not a clip
if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
break;
}
if (ce.getOperator() == CigarOperator.S) {
sbs.replace(0, ce.getLength(), "");
sbq.replace(0, ce.getLength(), "");
}
// remove first
cl.remove(0);
}
}
final StringBuilder prefix = new StringBuilder(this.extend5);
// /append + soft clip 5'
for (int i = 0; i < this.extend5; ++i) {
int x1 = (refPos1 - 1) - i;
// break if out of genome
if (x1 < 1)
break;
prefix.insert(0, genomicSequence.charAt(x1 - 1));
}
sbs.insert(0, prefix.toString());
// preprend quality
for (int i = 0; i < prefix.length(); ++i) sbq.insert(0, defaultQUAL);
// prepend cigar
cl.add(0, new CigarElement(prefix.length(), CigarOperator.M));
// update start pos
rec.setAlignmentStart(refPos1 - prefix.length());
}
if (this.extend3 > 0 && rec.getAlignmentEnd() < genomicSequence.length()) {
if (this.removeClip) {
// /remove hard + soft clip 3'
while (!cl.isEmpty()) {
// last
final CigarElement ce = cl.get(cl.size() - 1);
// not a clip
if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
break;
}
if (ce.getOperator() == CigarOperator.S) {
sbs.setLength(sbs.length() - ce.getLength());
sbq.setLength(sbq.length() - ce.getLength());
}
// remove last
cl.remove(cl.size() - 1);
}
}
int extend = 0;
for (int pos1 = endAlignmend1 + 1; pos1 <= (endAlignmend1 + this.extend3) && pos1 <= genomicSequence.length(); ++pos1) {
sbs.append(genomicSequence.charAt(pos1 - 1));
sbq.append(defaultQUAL);
++extend;
}
// append cigar
cl.add(new CigarElement(extend, CigarOperator.M));
}
// simplify cigar
int idx = 0;
while (idx + 1 < cl.size()) {
final CigarElement ce1 = cl.get(idx);
final CigarElement ce2 = cl.get(idx + 1);
if (ce1.getOperator() == ce2.getOperator()) {
cl.set(idx, new CigarElement(ce1.getLength() + ce2.getLength(), ce1.getOperator()));
cl.remove(idx + 1);
} else {
idx++;
}
}
rec.setCigar(new Cigar(cl));
rec.setReadString(sbs.toString());
rec.setBaseQualityString(sbq.toString());
final List<SAMValidationError> errors = rec.isValid();
if (errors != null && !errors.isEmpty()) {
for (SAMValidationError err : errors) {
LOG.error(err.getMessage());
}
}
// info("changed "+rec.getCigarString()+" to "+newCigarStr+" "+rec.getReadName()+" "+rec.getReadString());
sfw.addAlignment(rec);
}
progress.finish();
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(indexedFastaSequenceFile);
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
Aggregations