use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class MiniCaller method doWork.
@Override
public int doWork(final List<String> args) {
ConcatSam.ConcatSamIterator iter = null;
try {
if (this.fastaFile == null) {
LOG.error("no REF");
return -1;
}
/* load faid */
final ReferenceGenomeFactory referenceGenomeFactory = new ReferenceGenomeFactory();
this.referenceGenome = referenceGenomeFactory.openFastaFile(this.fastaFile);
this.dictionary = this.referenceGenome.getDictionary();
if (this.dictionary == null) {
LOG.error(JvarkitException.FastaDictionaryMissing.getMessage(this.fastaFile.getPath()));
}
/* create sam record iterator */
iter = new ConcatSam.Factory().addInterval(this.rgnStr).setEnableUnrollList(true).open(args);
final SAMFileHeader samFileheader = iter.getFileHeader();
final SAMSequenceDictionary dict = samFileheader.getSequenceDictionary();
if (dict == null) {
LOG.error(JvarkitException.BamDictionaryMissing.getMessage(String.join(", ", args)));
return -1;
}
if (!SequenceUtil.areSequenceDictionariesEqual(dict, this.dictionary)) {
LOG.error(JvarkitException.DictionariesAreNotTheSame.getMessage(dict, this.dictionary));
return -1;
}
final List<SAMReadGroupRecord> groups = samFileheader.getReadGroups();
if (groups == null || groups.isEmpty()) {
LOG.error("No group defined in input");
return -1;
}
final Set<String> sampleSet = groups.stream().map(srgr -> this.samRecordPartition.apply(srgr, samRecordPartition.name())).collect(Collectors.toSet());
/* create VCF metadata */
final Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY));
metaData.add(new VCFFormatHeaderLine("DPG", // one value of each genotype
VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Depth for each allele"));
metaData.add(new VCFFormatHeaderLine("DP4", 4, VCFHeaderLineType.Integer, "Depth ReforAlt|Strand : RF,RR,AF,AR"));
metaData.add(new VCFInfoHeaderLine("INDEL", 1, VCFHeaderLineType.Flag, "Variant is indel"));
// addMetaData(metaData);
final VCFHeader vcfHeader = new VCFHeader(metaData, sampleSet);
vcfHeader.setSequenceDictionary(this.dictionary);
/* create variant context */
this.variantContextWriter = super.openVariantContextWriter(outputFile);
this.variantContextWriter.writeHeader(vcfHeader);
ReferenceContig genomicSeq = null;
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(this.dictionary);
for (; ; ) {
SAMRecord rec = null;
if (iter.hasNext()) {
rec = progress.watch(iter.next());
if (rec.getReadUnmappedFlag())
continue;
if (this.readFilter.filterOut(rec))
continue;
/* flush buffer if needed */
while (!this.buffer.isEmpty() && (this.buffer.get(0).tid < rec.getReferenceIndex() || (this.buffer.get(0).tid == rec.getReferenceIndex() && (this.buffer.get(0).getEnd()) < rec.getAlignmentStart()))) {
this.buffer.remove(0).print();
}
/* get genomic sequence at this position */
if (genomicSeq == null || !genomicSeq.getContig().equals(rec.getContig())) {
genomicSeq = this.referenceGenome.getContig(rec.getContig());
}
final Cigar cigar = rec.getCigar();
if (cigar == null)
continue;
int readPos = 0;
// 0 based-reference
int refPos0 = rec.getAlignmentStart() - 1;
final byte[] bases = rec.getReadBases();
final byte[] quals = rec.getBaseQualities();
final String sampleName = this.samRecordPartition.getPartion(rec, samRecordPartition.name());
for (final CigarElement ce : cigar.getCigarElements()) {
final CigarOperator op = ce.getOperator();
switch(op) {
case P:
break;
case H:
break;
case S:
readPos += ce.getLength();
break;
// go
case N:
case D:
{
if (// we need base before deletion
refPos0 > 0) {
char refBase = genomicSeq.charAt(refPos0 - 1);
/* we use base before deletion */
final StringBuilder sb = new StringBuilder(ce.getLength());
sb.append(refBase);
for (int i = 0; i < ce.getLength(); ++i) {
sb.append(genomicSeq.charAt(refPos0 + i));
}
findContext(rec.getReferenceIndex(), // we use base *before deletion */
refPos0 - 1, Allele.create(sb.toString(), true)).getSample(sampleName).getAllele(Allele.create(String.valueOf(refBase), false)).incr(rec.getReadNegativeStrandFlag());
}
refPos0 += ce.getLength();
break;
}
case I:
{
if (refPos0 > 0) {
// float qual=0;
char refBase = Character.toUpperCase(genomicSeq.charAt(refPos0 - 1));
final StringBuilder sb = new StringBuilder(1 + ce.getLength());
sb.append(refBase);
for (int i = 0; i < ce.getLength(); ++i) {
sb.append((char) bases[readPos + i]);
// qual+=(readPos + i < quals.length?quals[ readPos + i]:0);
}
findContext(rec.getReferenceIndex(), // we use base *before deletion */
refPos0 - 1, Allele.create(String.valueOf(refBase), true)).getSample(sampleName).getAllele(Allele.create(sb.toString().toUpperCase(), false)).incr(rec.getReadNegativeStrandFlag());
}
readPos += ce.getLength();
break;
}
case EQ:
case M:
case X:
{
for (int i = 0; i < ce.getLength(); ++i) {
findContext(rec.getReferenceIndex(), refPos0 + i, Allele.create(String.valueOf(genomicSeq.charAt(refPos0 + i)), true)).getSample(sampleName).getAllele(Allele.create(String.valueOf((char) bases[readPos + i]), false)).incr(rec.getReadNegativeStrandFlag());
}
readPos += ce.getLength();
refPos0 += ce.getLength();
break;
}
default:
throw new IllegalStateException("Case statement didn't deal with cigar op: " + op);
}
}
} else {
break;
}
}
while (!buffer.isEmpty()) buffer.remove(0).print();
progress.finish();
iter.close();
iter = null;
this.variantContextWriter.close();
this.variantContextWriter = null;
return RETURN_OK;
} catch (Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(this.referenceGenome);
CloserUtil.close(this.variantContextWriter);
}
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class EvsToVcf method doWork.
@Override
public int doWork(List<String> args) {
VariantContextWriter out = null;
try {
if (!args.isEmpty()) {
LOG.error("Illegal number of arguments");
return -1;
}
JAXBContext jc = JAXBContext.newInstance(SnpData.class);
Unmarshaller unmarshaller = jc.createUnmarshaller();
out = VCFUtils.createVariantContextWriterToStdout();
SAMSequenceDictionary dict = new SAMSequenceDictionary();
_fillDict(dict, "1", 249250621);
_fillDict(dict, "2", 243199373);
_fillDict(dict, "3", 198022430);
_fillDict(dict, "4", 191154276);
_fillDict(dict, "5", 180915260);
_fillDict(dict, "6", 171115067);
_fillDict(dict, "7", 159138663);
_fillDict(dict, "8", 146364022);
_fillDict(dict, "9", 141213431);
_fillDict(dict, "10", 135534747);
_fillDict(dict, "11", 135006516);
_fillDict(dict, "12", 133851895);
_fillDict(dict, "13", 115169878);
_fillDict(dict, "14", 107349540);
_fillDict(dict, "15", 102531392);
_fillDict(dict, "16", 90354753);
_fillDict(dict, "17", 81195210);
_fillDict(dict, "18", 78077248);
_fillDict(dict, "19", 59128983);
_fillDict(dict, "20", 63025520);
_fillDict(dict, "21", 48129895);
_fillDict(dict, "22", 51304566);
_fillDict(dict, "X", 155270560);
_fillDict(dict, "Y", 59373566);
_fillDict(dict, "MT", 16569);
VCFHeader header = new VCFHeader();
header.setSequenceDictionary(dict);
header.addMetaDataLine(new VCFInfoHeaderLine("CONS", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScore"));
header.addMetaDataLine(new VCFInfoHeaderLine("GERP", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
header.addMetaDataLine(new VCFInfoHeaderLine("uaMAF", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
header.addMetaDataLine(new VCFInfoHeaderLine("aaMAF", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
header.addMetaDataLine(new VCFInfoHeaderLine("totalMAF", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
header.addMetaDataLine(new VCFInfoHeaderLine("DP", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Integer, "conservationScoreGERP"));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
out.writeHeader(header);
Pattern comma = Pattern.compile("[,]");
XMLInputFactory xif = XMLInputFactory.newFactory();
xif.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false);
XMLEventReader xmlr = xif.createXMLEventReader(System.in);
while (xmlr.hasNext() && !System.out.checkError()) {
XMLEvent evt = xmlr.peek();
if (!evt.isStartElement() || !evt.asStartElement().getName().getLocalPart().equals("snpList")) {
xmlr.nextEvent();
continue;
}
SnpData snpData = unmarshaller.unmarshal(xmlr, SnpData.class).getValue();
VariantContextBuilder vcb = new VariantContextBuilder();
Set<Allele> alleles = new HashSet<Allele>();
alleles.add(Allele.create(snpData.getRefAllele(), true));
for (String s : comma.split(snpData.getAltAlleles())) {
if (isEmpty(s))
continue;
alleles.add(Allele.create(s, false));
}
vcb.chr(snpData.getChromosome());
vcb.start(snpData.getChrPosition());
vcb.stop(snpData.getChrPosition() + snpData.getRefAllele().length() - 1);
if (!isEmpty(snpData.getRsIds()) && !snpData.getRsIds().equals("none")) {
vcb.id(snpData.getRsIds());
}
vcb.alleles(alleles);
Float d = parseDouble(snpData.getConservationScore());
if (d != null) {
vcb.attribute("CONS", d);
}
d = parseDouble(snpData.getConservationScoreGERP());
if (d != null) {
vcb.attribute("GERP", d);
}
vcb.attribute("uaMAF", (float) snpData.getUaMAF());
vcb.attribute("aaMAF", (float) snpData.getAaMAF());
vcb.attribute("totalMAF", (float) snpData.getTotalMAF());
vcb.attribute("DP", snpData.getAvgSampleReadDepth());
out.add(vcb.make());
}
xmlr.close();
out.close();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(out);
}
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class VcfStage method buildInfoHeaderTab.
/**
* build a table describing the INFO column
*/
private Tab buildInfoHeaderTab(final VCFHeader header) {
final TableView<VCFInfoHeaderLine> table = new TableView<>(FXCollections.observableArrayList(header.getInfoHeaderLines()));
table.getColumns().add(makeColumn("ID", F -> F.getID()));
table.getColumns().add(makeColumn("Type", F -> F.getType() == null ? null : F.getType().name()));
table.getColumns().add(makeColumn("Count", F -> F.isFixedCount() ? F.getCount() : null));
table.getColumns().add(makeColumn("Description", F -> F.getDescription()));
final Tab tab = new Tab("INFO", table);
tab.setClosable(false);
table.setPlaceholder(new Label("No INFO defined."));
return tab;
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class VCFPredictions method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VcfIterator r, VariantContextWriter w) {
ReferenceContig genomicSequence = null;
try {
LOG.info("opening REF:" + this.referenceGenomeSource);
this.referenceGenome = new ReferenceGenomeFactory().open(this.referenceGenomeSource);
loadKnownGenesFromUri();
final VCFHeader header = (VCFHeader) r.getHeader();
final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(this.referenceGenome.getDictionary());
contigNameConverter.setOnNotFound(OnNotFound.SKIP);
final VCFHeader h2 = new VCFHeader(header);
addMetaData(h2);
switch(this.outputSyntax) {
case Vep:
{
h2.addMetaDataLine(new VCFInfoHeaderLine("CSQ", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Consequence type as predicted by VEP" + ". Format: Allele|Feature|Feature_type|Consequence|CDS_position|Protein_position|Amino_acids|Codons"));
break;
}
case SnpEff:
{
h2.addMetaDataLine(new VCFInfoHeaderLine("ANN", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Functional annotations: 'Allele | Annotation | Annotation_Impact | Gene_Name | Gene_ID | Feature_Type | Feature_ID | Transcript_BioType | Rank | HGVS.c | HGVS.p | cDNA.pos / cDNA.length | CDS.pos / CDS.length | AA.pos / AA.length | Distance | ERRORS / WARNINGS / INFO'"));
break;
}
default:
{
final StringBuilder format = new StringBuilder();
for (FORMAT1 f : FORMAT1.values()) {
if (format.length() > 0)
format.append("|");
format.append(f.name());
}
h2.addMetaDataLine(new VCFInfoHeaderLine(TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Prediction from " + getClass().getSimpleName() + ". Format: " + format));
break;
}
}
w.writeHeader(h2);
final SequenceOntologyTree soTree = SequenceOntologyTree.getInstance();
final SequenceOntologyTree.Term so_intron = soTree.getTermByAcn("SO:0001627");
final SequenceOntologyTree.Term so_exon = soTree.getTermByAcn("SO:0001791");
final SequenceOntologyTree.Term so_splice_donor = soTree.getTermByAcn("SO:0001575");
final SequenceOntologyTree.Term so_splice_acceptor = soTree.getTermByAcn("SO:0001574");
final SequenceOntologyTree.Term so_5_prime_UTR_variant = soTree.getTermByAcn("SO:0001623");
final SequenceOntologyTree.Term so_3_prime_UTR_variant = soTree.getTermByAcn("SO:0001624");
final SequenceOntologyTree.Term so_splicing_variant = soTree.getTermByAcn("SO:0001568");
final SequenceOntologyTree.Term so_stop_lost = soTree.getTermByAcn("SO:0001578");
final SequenceOntologyTree.Term so_stop_gained = soTree.getTermByAcn("SO:0001587");
final SequenceOntologyTree.Term so_coding_synonymous = soTree.getTermByAcn("SO:0001819");
final SequenceOntologyTree.Term so_coding_non_synonymous = soTree.getTermByAcn("SO:0001583");
final SequenceOntologyTree.Term so_intergenic = soTree.getTermByAcn("SO:0001628");
final SequenceOntologyTree.Term so_nc_transcript_variant = soTree.getTermByAcn("SO:0001619");
final SequenceOntologyTree.Term so_non_coding_exon_variant = soTree.getTermByAcn("SO:0001792");
final SequenceOntologyTree.Term _2KB_upstream_variant = soTree.getTermByAcn("SO:0001636");
final SequenceOntologyTree.Term _5KB_upstream_variant = soTree.getTermByAcn("SO:0001635");
final SequenceOntologyTree.Term _5KB_downstream_variant = soTree.getTermByAcn("SO:0001633");
final SequenceOntologyTree.Term _500bp_downstream_variant = soTree.getTermByAcn("SO:0001634");
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
while (r.hasNext()) {
final VariantContext ctx = progress.watch(r.next());
final String normalizedContig = contigNameConverter.apply(ctx.getContig());
final List<KnownGene> genes = new ArrayList<>();
if (!StringUtil.isBlank(normalizedContig)) {
for (final List<KnownGene> l2 : this.knownGenes.getOverlapping(new Interval(normalizedContig, ctx.getStart(), // 1-based
ctx.getEnd()))) {
genes.addAll(l2);
}
}
final List<Annotation> ctx_annotations = new ArrayList<Annotation>();
if (genes == null || genes.isEmpty()) {
// intergenic
Annotation a = new Annotation();
a.seqont.add(so_intergenic);
ctx_annotations.add(a);
} else {
if (genomicSequence == null || !genomicSequence.hasName(normalizedContig)) {
LOG.info("getting genomic Sequence for " + normalizedContig);
genomicSequence = this.referenceGenome.getContig(normalizedContig);
if (genomicSequence == null)
throw new JvarkitException.ContigNotFoundInDictionary(normalizedContig, this.referenceGenome.getDictionary());
}
for (final KnownGene gene : genes) {
final GeneticCode geneticCode = GeneticCode.getStandard();
for (final Allele alt2 : ctx.getAlternateAlleles()) {
if (alt2.isNoCall())
continue;
if (alt2.isSymbolic()) {
LOG.warn("symbolic allele are not handled... " + alt2.getDisplayString());
continue;
}
if (alt2.isReference())
continue;
final Annotation annotations = new Annotation();
annotations.kg = gene;
annotations.alt2 = alt2;
if (gene.isNonCoding()) {
annotations.seqont.add(so_nc_transcript_variant);
continue;
}
ctx_annotations.add(annotations);
StringBuilder wildRNA = null;
ProteinCharSequence wildProt = null;
ProteinCharSequence mutProt = null;
MutedSequence mutRNA = null;
int position_in_cds = -1;
final int position = ctx.getStart() - 1;
if (!String.valueOf(genomicSequence.charAt(position)).equalsIgnoreCase(ctx.getReference().getBaseString())) {
if (isSimpleBase(ctx.getReference())) {
LOG.warn("Warning REF!=GENOMIC SEQ!!! at " + position + "/" + ctx.getReference());
}
continue;
}
if (gene.isPositiveStrand()) {
if (position < gene.getTxStart() - 2000) {
annotations.seqont.add(_5KB_upstream_variant);
} else if (position < gene.getTxStart()) {
annotations.seqont.add(_2KB_upstream_variant);
} else if (position >= gene.getTxEnd() + 500) {
annotations.seqont.add(_5KB_downstream_variant);
} else if (position >= gene.getTxEnd()) {
annotations.seqont.add(_500bp_downstream_variant);
} else if (position < gene.getCdsStart()) {
// UTR5
annotations.seqont.add(so_5_prime_UTR_variant);
} else if (gene.getCdsEnd() <= position) {
annotations.seqont.add(so_3_prime_UTR_variant);
} else {
int exon_index = 0;
while (exon_index < gene.getExonCount()) {
final KnownGene.Exon exon = gene.getExon(exon_index);
for (int i = exon.getStart(); i < exon.getEnd(); ++i) {
if (i == position) {
annotations.exon_name = exon.getName();
if (exon.isNonCoding()) {
annotations.seqont.add(so_non_coding_exon_variant);
}
}
if (i < gene.getTxStart())
continue;
if (i < gene.getCdsStart())
continue;
if (i >= gene.getCdsEnd())
break;
if (wildRNA == null) {
wildRNA = new StringBuilder();
mutRNA = new MutedSequence(wildRNA);
}
if (i == position) {
annotations.seqont.add(so_exon);
annotations.exon_name = exon.getName();
position_in_cds = wildRNA.length();
annotations.position_cds = position_in_cds;
// in splicing ?
if (exon.isSplicing(position)) {
if (exon.isSplicingAcceptor(position)) {
// SPLICING_ACCEPTOR
annotations.seqont.add(so_splice_acceptor);
} else if (exon.isSplicingDonor(position)) {
// SPLICING_DONOR
annotations.seqont.add(so_splice_donor);
} else // ??
{
annotations.seqont.add(so_splicing_variant);
}
}
}
wildRNA.append(genomicSequence.charAt(i));
if (i == position && isSimpleBase(alt2) && isSimpleBase(ctx.getReference())) {
mutRNA.put(position_in_cds, alt2.getBaseString().charAt(0));
}
if (wildRNA.length() % 3 == 0 && wildRNA.length() > 0 && wildProt == null) {
wildProt = new ProteinCharSequence(geneticCode, wildRNA);
mutProt = new ProteinCharSequence(geneticCode, mutRNA);
}
}
final KnownGene.Intron intron = exon.getNextIntron();
if (intron != null && intron.contains(position)) {
annotations.intron_name = intron.getName();
annotations.seqont.add(so_intron);
if (intron.isSplicing(position)) {
if (intron.isSplicingAcceptor(position)) {
annotations.seqont.add(so_splice_acceptor);
} else if (intron.isSplicingDonor(position)) {
annotations.seqont.add(so_splice_donor);
} else // ???
{
annotations.seqont.add(so_splicing_variant);
}
}
}
++exon_index;
}
}
} else // reverse orientation
{
if (position >= gene.getTxEnd() + 2000) {
annotations.seqont.add(_5KB_upstream_variant);
} else if (position >= gene.getTxEnd()) {
annotations.seqont.add(_2KB_upstream_variant);
} else if (position < gene.getTxStart() - 500) {
annotations.seqont.add(_5KB_downstream_variant);
} else if (position < gene.getTxStart()) {
annotations.seqont.add(_500bp_downstream_variant);
} else if (position < gene.getCdsStart()) {
annotations.seqont.add(so_3_prime_UTR_variant);
} else if (gene.getCdsEnd() <= position) {
annotations.seqont.add(so_5_prime_UTR_variant);
} else {
int exon_index = gene.getExonCount() - 1;
while (exon_index >= 0) {
final KnownGene.Exon exon = gene.getExon(exon_index);
for (int i = exon.getEnd() - 1; i >= exon.getStart(); --i) {
if (i == position) {
annotations.exon_name = exon.getName();
if (exon.isNonCoding()) {
annotations.seqont.add(so_non_coding_exon_variant);
}
}
if (i >= gene.getCdsEnd())
continue;
if (i < gene.getCdsStart())
break;
if (wildRNA == null) {
wildRNA = new StringBuilder();
mutRNA = new MutedSequence(wildRNA);
}
if (i == position) {
annotations.seqont.add(so_exon);
position_in_cds = wildRNA.length();
annotations.position_cds = position_in_cds;
// in splicing ?
if (exon.isSplicing(position)) {
if (exon.isSplicingAcceptor(position)) {
annotations.seqont.add(so_splice_acceptor);
} else if (exon.isSplicingDonor(position)) {
annotations.seqont.add(so_splice_donor);
} else // ?
{
annotations.seqont.add(so_splicing_variant);
}
}
if (isSimpleBase(alt2) && isSimpleBase(ctx.getReference())) {
mutRNA.put(position_in_cds, AcidNucleics.complement(alt2.getBaseString().charAt(0)));
}
}
wildRNA.append(AcidNucleics.complement(genomicSequence.charAt(i)));
if (wildRNA.length() % 3 == 0 && wildRNA.length() > 0 && wildProt == null) {
wildProt = new ProteinCharSequence(geneticCode, wildRNA);
mutProt = new ProteinCharSequence(geneticCode, mutRNA);
}
}
final KnownGene.Intron intron = exon.getPrevIntron();
if (intron != null && intron.contains(position)) {
annotations.intron_name = intron.getName();
annotations.seqont.add(so_intron);
if (intron.isSplicing(position)) {
if (intron.isSplicingAcceptor(position)) {
annotations.seqont.add(so_splice_acceptor);
} else if (intron.isSplicingDonor(position)) {
annotations.seqont.add(so_splice_donor);
} else // ?
{
annotations.seqont.add(so_splicing_variant);
}
}
}
--exon_index;
}
}
}
if (isSimpleBase(alt2) && isSimpleBase(ctx.getReference()) && wildProt != null && mutProt != null && position_in_cds >= 0) {
final int pos_aa = position_in_cds / 3;
final int mod = position_in_cds % 3;
annotations.wildCodon = ("" + wildRNA.charAt(position_in_cds - mod + 0) + wildRNA.charAt(position_in_cds - mod + 1) + wildRNA.charAt(position_in_cds - mod + 2));
annotations.mutCodon = ("" + mutRNA.charAt(position_in_cds - mod + 0) + mutRNA.charAt(position_in_cds - mod + 1) + mutRNA.charAt(position_in_cds - mod + 2));
annotations.position_protein = (pos_aa + 1);
annotations.wildAA = String.valueOf(wildProt.charAt(pos_aa));
annotations.mutAA = (String.valueOf(mutProt.charAt(pos_aa)));
annotations.seqont.remove(so_exon);
if (isStop(wildProt.charAt(pos_aa)) && !isStop(mutProt.charAt(pos_aa))) {
annotations.seqont.add(so_stop_lost);
} else if (!isStop(wildProt.charAt(pos_aa)) && isStop(mutProt.charAt(pos_aa))) {
annotations.seqont.add(so_stop_gained);
} else if (wildProt.charAt(pos_aa) == mutProt.charAt(pos_aa)) {
annotations.seqont.add(so_coding_synonymous);
} else {
annotations.seqont.add(so_coding_non_synonymous);
}
}
}
}
}
final Set<String> info = new HashSet<String>(ctx_annotations.size());
for (final Annotation a : ctx_annotations) {
info.add(a.toString());
}
final VariantContextBuilder vb = new VariantContextBuilder(ctx);
final String thetag;
switch(this.outputSyntax) {
case Vep:
thetag = "CSQ";
break;
case SnpEff:
thetag = "ANN";
break;
default:
thetag = TAG;
break;
}
vb.attribute(thetag, info.toArray());
w.add(vb.make());
}
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(this.referenceGenome);
}
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class VcfUcsc method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
final String TAG;
if (StringUtil.isBlank(this.infoTag)) {
TAG = "UCSC_" + database.toUpperCase() + "_" + table.toUpperCase();
} else {
TAG = this.infoTag;
}
VCFHeader header = in.getHeader();
final VCFHeader h2 = new VCFHeader(header);
h2.addMetaDataLine(new VCFInfoHeaderLine(TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, database + "." + table));
if (!StringUtil.isBlank(this.filterIn)) {
h2.addMetaDataLine(new VCFFilterHeaderLine(this.filterIn, "Set by " + this.getClass().getName()));
} else if (!StringUtil.isBlank(this.filterOut)) {
h2.addMetaDataLine(new VCFFilterHeaderLine(this.filterOut, "Set by " + this.getClass().getName()));
}
h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
out.writeHeader(h2);
final Map<Integer, PreparedStatement> bin2pstmt = new HashMap<>();
try {
if (!this.has_bin_column) {
bin2pstmt.put(0, createPreparedStatement(0));
}
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
while (in.hasNext()) {
final VariantContext ctx = progress.watch(in.next());
int start0, end0;
if (// mutation starts *after* the base
ctx.isIndel()) {
start0 = ctx.getStart();
end0 = ctx.getEnd();
} else {
start0 = ctx.getStart() - 1;
end0 = ctx.getEnd();
}
// extends left/right
start0 = Math.max(0, start0 - this.extend_bases);
end0 += this.extend_bases;
final Set<String> atts = new HashSet<String>();
if (this.has_bin_column) {
final List<Integer> binList = reg2bins(start0, end0);
PreparedStatement pstmt = bin2pstmt.get(binList.size());
if (pstmt == null) {
LOG.debug("create prepared statemement for bin.size=" + binList.size() + "[" + start0 + ":" + end0 + "]");
pstmt = createPreparedStatement(binList.size());
bin2pstmt.put(binList.size(), pstmt);
}
initPstmt(pstmt, ctx.getContig(), start0, end0);
for (int x = 0; x < binList.size(); ++x) {
pstmt.setInt(4 + x, binList.get(x));
}
select(atts, pstmt);
} else {
// already defined
final PreparedStatement pstmt = bin2pstmt.get(0);
initPstmt(pstmt, ctx.getContig(), start0, end0);
select(atts, pstmt);
}
if (atts.isEmpty() && StringUtil.isBlank(this.filterIn) && StringUtil.isBlank(this.filterOut)) {
out.add(ctx);
continue;
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
if (!StringUtil.isBlank(this.filterIn) && !atts.isEmpty()) {
vcb.filter(this.filterIn);
} else if (!StringUtil.isBlank(this.filterOut) && atts.isEmpty()) {
vcb.filter(this.filterOut);
}
if (!atts.isEmpty()) {
vcb.attribute(TAG, atts.toArray());
}
out.add(vcb.make());
}
progress.finish();
return 0;
} catch (final SQLException err) {
LOG.error(err);
throw new RuntimeException("SQLError", err);
} finally {
for (final PreparedStatement pstmt : bin2pstmt.values()) {
CloserUtil.close(pstmt);
}
bin2pstmt.clear();
}
}
Aggregations