use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.
the class BedLiftOver method doWork.
@Override
public int doWork(List<String> args) {
if (liftOverFile == null) {
LOG.error("LiftOver file is undefined.");
return -1;
}
this.liftOver = new LiftOver(liftOverFile);
this.liftOver.setLiftOverMinMatch(this.userMinMatch);
PrintWriter out = null;
PrintWriter failed = null;
try {
if (!this.ignoreLiftOverValidation) {
IndexedFastaSequenceFile ref = new IndexedFastaSequenceFile(faidx);
this.liftOver.validateToSequences(ref.getSequenceDictionary());
ref.close();
}
out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
if (this.failedFile != null) {
failed = super.openFileOrStdoutAsPrintWriter(failedFile);
}
if (args.isEmpty()) {
BufferedReader r = openBufferedReader(null);
scan(r, out, failed);
CloserUtil.close(r);
} else {
for (final String filename : args) {
BufferedReader r = openBufferedReader(filename);
scan(r, out, failed);
CloserUtil.close(r);
}
}
out.flush();
out.close();
out = null;
if (failed != null) {
failed.flush();
failed.close();
failed = null;
}
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(out);
CloserUtil.close(failed);
}
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.
the class VcfLiftOver method doWork.
@Override
public int doWork(List<String> args) {
if (this.liftOverFile == null) {
LOG.error("LiftOver file is undefined.");
return -1;
}
if (this.faidx == null) {
LOG.error("reference file is undefined.");
return -1;
}
try {
this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.faidx);
this.liftOver = new LiftOver(this.liftOverFile);
this.liftOver.setLiftOverMinMatch(this.userMinMatch);
if (!this.ignoreLiftOverValidation) {
this.liftOver.validateToSequences(this.indexedFastaSequenceFile.getSequenceDictionary());
}
return doVcfToVcf(args, outputFile);
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(this.indexedFastaSequenceFile);
}
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.
the class AddLinearIndexToBed method doWork.
@Override
public int doWork(final List<String> args) {
if (refFile == null) {
LOG.error("Reference file undefined");
return -1;
}
PrintStream out = null;
try {
htsjdk.samtools.reference.IndexedFastaSequenceFile ref = new IndexedFastaSequenceFile(this.refFile);
this.dictionary = ref.getSequenceDictionary();
ref.close();
if (this.dictionary == null) {
throw new JvarkitException.FastaDictionaryMissing(this.refFile);
}
this.tid2offset = new long[this.dictionary.size()];
Arrays.fill(this.tid2offset, 0L);
for (int i = 1; i < this.dictionary.size(); ++i) {
this.tid2offset[i] = this.tid2offset[i - 1] + this.dictionary.getSequence(i - 1).getSequenceLength();
}
out = openFileOrStdoutAsPrintStream(this.outputFile);
if (args.isEmpty()) {
LOG.info("reading stdin");
doWork(stdin(), out);
} else {
for (final String arg : args) {
LOG.info("opening " + arg);
InputStream in = IOUtils.openURIForReading(arg);
doWork(in, out);
CloserUtil.close(out);
}
}
out.flush();
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(out);
dictionary = null;
tid2offset = null;
}
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.
the class BamToSql method doWork.
@Override
public int doWork(List<String> args) {
if (this.faidxFile == null) {
LOG.error("ref sequence faidx not defined");
return -1;
}
SAMRecordIterator iter = null;
SamReader sfr = null;
PrintWriter out = null;
GenomicSequence genomicSequence = null;
IndexedFastaSequenceFile indexedFastaSequenceFile = null;
args = new ArrayList<String>(IOUtils.unrollFiles(args));
try {
out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.faidxFile);
out.println("CREATE TABLE IF NOT EXISTS SamFile");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("filename TEXT");
out.println(");");
out.println("CREATE TABLE IF NOT EXISTS Dictionary");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("name TEXT NOT NULL,");
out.println("length INT NOT NULL,");
out.println("tid INT NOT NULL,");
out.println("samfile_id INT NOT NULL,");
out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id)");
out.println(");");
out.println("CREATE TABLE IF NOT EXISTS ReadGroup");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("groupId TEXT NOT NULL,");
out.println("sample TEXT NOT NULL,");
out.println("samfile_id INT NOT NULL,");
out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id)");
out.println(");");
out.println("CREATE TABLE IF NOT EXISTS Read");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("name TEXT NOT NULL,");
out.println("flag INTEGER NOT NULL,");
if (this.printflag) {
for (final SAMFlag flg : SAMFlag.values()) {
out.println(flg.name() + " INTEGER NOT NULL,");
}
}
out.println("rname TEXT,");
out.println("pos INTEGER,");
out.println("mapq INTEGER NOT NULL,");
out.println("cigar TEXT,");
out.println("rnext TEXT,");
out.println("pnext INTEGER,");
out.println("tlen INTEGER,");
out.println("sequence TEXT NOT NULL,");
out.println("qualities TEXT NOT NULL,");
out.println("samfile_id INT NOT NULL,");
out.println("group_id INT,");
out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id),");
out.println("FOREIGN KEY(group_id) REFERENCES ReadGroup(id)");
out.println(");");
out.println("CREATE TABLE IF NOT EXISTS Cigar");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("read_pos INT ,");
out.println("read_base TEXT,");
out.println("read_qual INT ,");
out.println("ref_pos INT ,");
out.println("ref_base TEXT,");
out.println("operator TEXT NOT NULL,");
out.println("read_id INT NOT NULL,");
out.println("FOREIGN KEY(read_id) REFERENCES Read(id)");
out.println(");");
out.println("begin transaction;");
int samIndex = 0;
do {
final String inputName;
if (samIndex == 0 && args.isEmpty()) {
sfr = openSamReader(null);
inputName = "<stdin>";
} else {
inputName = args.get(samIndex);
sfr = openSamReader(inputName);
}
final SAMFileHeader header1 = sfr.getFileHeader();
if (header1 == null) {
throw new JvarkitException.FileFormatError("File header missing");
}
final SAMSequenceDictionary dict = header1.getSequenceDictionary();
if (dict == null) {
throw new JvarkitException.DictionaryMissing("No Dictionary in input");
}
final IntervalParser intervalParser = new IntervalParser(dict);
final Interval userInterval;
iter = null;
if (this.regionStr == null || this.regionStr.isEmpty()) {
LOG.warn("You're currently scanning the whole BAM ???!!!");
iter = sfr.iterator();
userInterval = null;
} else {
userInterval = intervalParser.parse(this.regionStr);
if (userInterval == null) {
throw new JvarkitException.UserError("cannot parse interval " + this.regionStr);
}
iter = sfr.query(userInterval.getContig(), userInterval.getStart(), userInterval.getEnd(), false);
}
out.println(String.join(" ", "insert into SamFile(filename) values(", quote(inputName), ");"));
for (int i = 0; i < dict.size(); ++i) {
final SAMSequenceRecord ssr = dict.getSequence(i);
out.println("insert into Dictionary(name,length,tid,samfile_id) select " + quote(inputName) + "," + ssr.getSequenceLength() + "," + i + ",max(id) from SamFile;");
}
for (final SAMReadGroupRecord g : header1.getReadGroups()) {
out.println("insert into ReadGroup(groupId,sample,samfile_id) select " + quote(g.getId()) + "," + quote(g.getSample()) + "," + "max(id) from SamFile;");
}
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
final StringBuilder sql = new StringBuilder();
sql.append("insert into Read(" + "name,flag,");
if (this.printflag) {
for (final SAMFlag flg : SAMFlag.values()) {
sql.append(flg.name()).append(",");
}
}
sql.append("rname,pos,mapq,cigar,rnext,pnext,tlen,sequence,qualities,group_id,samfile_id) select ");
sql.append(quote(rec.getReadName())).append(",");
sql.append(rec.getFlags()).append(",");
if (this.printflag) {
for (final SAMFlag flg : SAMFlag.values()) {
sql.append(flg.isSet(rec.getFlags()) ? 1 : 0);
sql.append(",");
}
}
if (rec.getReferenceName() == null || rec.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
sql.append("NULL,NULL");
} else {
sql.append(quote(rec.getReferenceName()));
sql.append(",");
sql.append(rec.getAlignmentStart());
}
sql.append(",");
sql.append(rec.getMappingQuality());
sql.append(",");
// cigar
if (rec.getCigarString() == null || rec.getCigarString().equals(SAMRecord.NO_ALIGNMENT_CIGAR)) {
sql.append("NULL");
} else {
sql.append(quote(rec.getCigarString()));
}
sql.append(",");
// rnext
if (rec.getMateReferenceName() == null || rec.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
sql.append("NULL,NULL");
} else {
sql.append(quote(rec.getMateReferenceName()));
sql.append(",");
sql.append(rec.getMateAlignmentStart());
}
sql.append(",");
// tlen
sql.append(rec.getInferredInsertSize());
sql.append(",");
// sequence
sql.append(quote(rec.getReadString()));
sql.append(",");
// qualities
sql.append(quote(rec.getBaseQualityString()));
sql.append(",");
if (rec.getReadGroup() == null) {
sql.append("NULL");
} else {
sql.append("G.id");
}
sql.append(",F.id FROM SamFile as F");
if (rec.getReadGroup() != null) {
sql.append(" , ReadGroup as G where G.groupId=").append(quote(rec.getReadGroup().getId())).append(" and F.id = G.samfile_id ");
}
sql.append(" ORDER BY F.id DESC LIMIT 1;");
out.println(sql.toString());
if (this.printcigar && !rec.getReadUnmappedFlag() && rec.getCigar() != null) {
if (genomicSequence == null || !genomicSequence.getChrom().equals(rec.getReferenceName())) {
genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
}
int ref = rec.getUnclippedStart();
final byte[] bases = rec.getReadBases();
final byte[] quals = rec.getBaseQualities();
int read = 0;
for (final CigarElement ce : rec.getCigar()) {
final CigarOperator op = ce.getOperator();
if (op.equals(CigarOperator.P))
continue;
for (int i = 0; i < ce.getLength(); ++i) {
sql.setLength(0);
boolean in_user_interval = true;
sql.append("insert into Cigar(operator,read_pos,read_base,read_qual,ref_pos,ref_base,read_id) ");
sql.append("select '");
sql.append(op.name());
sql.append("',");
if (userInterval != null && !(rec.getReferenceName().equals(userInterval.getContig()) && ref >= userInterval.getStart() && ref <= userInterval.getEnd())) {
in_user_interval = false;
}
switch(op) {
case I:
{
sql.append(read);
sql.append(",");
sql.append("'" + (char) bases[read] + "',");
sql.append("" + quals[read] + "");
sql.append(",");
sql.append("NULL,NULL");
read++;
break;
}
case D:
case N:
case // yes H (hard clip)
H:
{
sql.append("NULL,NULL,NULL,");
sql.append(ref);
sql.append(",'");
sql.append((ref < 1 || ref - 1 >= genomicSequence.length()) ? '*' : genomicSequence.charAt(ref - 1));
sql.append("'");
ref++;
break;
}
case M:
case X:
case EQ:
case // yes S, soft clip
S:
{
sql.append(read);
sql.append(",");
sql.append("'" + (char) bases[read] + "',");
sql.append("" + quals[read] + "");
sql.append(",");
sql.append(ref);
sql.append(",'");
sql.append((ref < 1 || ref - 1 >= genomicSequence.length()) ? '*' : genomicSequence.charAt(ref - 1));
sql.append("'");
ref++;
read++;
break;
}
default:
throw new IllegalStateException();
}
sql.append(", id from Read ORDER BY id DESC LIMIT 1;");
if (in_user_interval)
out.println(sql.toString());
}
}
}
}
iter.close();
iter = null;
sfr.close();
sfr = null;
progress.finish();
samIndex++;
} while (samIndex < args.size());
out.println("COMMIT;");
out.flush();
out.close();
LOG.info("done");
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(sfr);
CloserUtil.close(out);
CloserUtil.close(indexedFastaSequenceFile);
}
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.
the class VcfJaspar method doWork.
@Override
public int doWork(List<String> args) {
if (this.jasparUri == null) {
LOG.error("Undefined jaspar-uri");
return -1;
}
if (this.fasta == null) {
LOG.error("Undefined fasta sequence");
return -1;
}
try {
LOG.info("Reading JASPAR: " + jasparUri);
LineIterator liter = IOUtils.openURIForLineIterator(this.jasparUri);
Iterator<Matrix> miter = Matrix.iterator(liter);
while (miter.hasNext()) {
Matrix matrix = miter.next();
this.jasparDb.add(matrix.convertToPWM());
}
CloserUtil.close(liter);
LOG.info("JASPAR size: " + this.jasparDb.size());
if (jasparDb.isEmpty()) {
LOG.warn("JASPAR IS EMPTY");
}
LOG.info("opening " + fasta);
this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(fasta);
return doVcfToVcf(oneFileOrNull(args), outputFile);
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(this.indexedFastaSequenceFile);
this.indexedFastaSequenceFile = null;
}
}
Aggregations