use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.
the class FastGenotypeGVCFs method doWork.
@Override
public int doWork(final List<String> args) {
VariantContextWriter w = null;
try {
this.gvcfSources.addAll(IOUtil.unrollFiles(args.stream().map(S -> new File(S)).collect(Collectors.toSet()), ".g.vcf", ".g.vcf.gz").stream().map(F -> new Source(F)).collect(Collectors.toList()));
if (args.isEmpty()) {
LOG.error("No gvcf file was given");
return -1;
}
this.gvcfSources.stream().forEach(S -> S.open());
this.dictionary = this.gvcfSources.get(0).vcfFileReader.getFileHeader().getSequenceDictionary();
if (this.dictionary == null) {
LOG.error("Dict missing in " + this.gvcfSources.get(0).gvcfFile);
return -1;
}
this.gvcfSources.stream().map(S -> S.vcfFileReader.getFileHeader().getSequenceDictionary()).forEach(D -> {
if (D == null || !SequenceUtil.areSequenceDictionariesEqual(D, dictionary)) {
throw new JvarkitException.UserError("dict missing or dict are not the same");
}
});
if (gvcfSources.stream().map(S -> S.sampleName).collect(Collectors.toSet()).stream().count() != this.gvcfSources.size()) {
LOG.error("Duplicate sample name. check input");
return -1;
}
final Set<VCFHeaderLine> metaData = new HashSet<>();
VCFStandardHeaderLines.addStandardFormatLines(metaData, true, VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.GENOTYPE_PL_KEY);
metaData.addAll(gvcfSources.stream().flatMap(S -> S.vcfFileReader.getFileHeader().getFormatHeaderLines().stream()).collect(Collectors.toSet()));
final VCFHeader header = new VCFHeader(metaData, gvcfSources.stream().map(S -> S.sampleName).collect(Collectors.toList()));
w = super.openVariantContextWriter(outputFile);
w.writeHeader(header);
int contigTid = 0;
while (contigTid < dictionary.size()) {
final SAMSequenceRecord ssr = this.dictionary.getSequence(contigTid);
int pos = 0;
while (pos < ssr.getSequenceLength()) {
// LOG.debug(ssr.getSequenceName()+" "+pos+" "+this.gvcfSources.size());
GvcfVariant variantAtThisPos = null;
int minEnd = ssr.getSequenceLength();
// cleanup
for (final Source src : this.gvcfSources) {
for (; ; ) {
final GvcfThing gvcfthing = src.get();
// LOG.debug(""+gvcfthing+" "+src.sampleName+" "+ssr.getSequenceName()+":"+pos);
if (gvcfthing == null) {
// no more variant avaialble
break;
} else if (contigTid > gvcfthing.getTid()) {
// observed contig is after gvcfthing.contig
src.current = null;
continue;
} else if (contigTid < gvcfthing.getTid()) {
// observed contig is before gvcfthing.contig
break;
} else if (gvcfthing.getEnd() < pos) {
// variant information is before observed pos
src.current = null;
continue;
} else if (gvcfthing.getStart() > pos) {
// variant information is after observed pos
minEnd = Math.min(minEnd, gvcfthing.getStart() - 1);
break;
} else if (gvcfthing.isVariant()) {
if (variantAtThisPos == null || variantContigPosRefComparator.compare(GvcfVariant.class.cast(gvcfthing).ctx, variantAtThisPos.ctx) < 0) {
variantAtThisPos = GvcfVariant.class.cast(gvcfthing);
}
break;
} else if (gvcfthing.isBlock()) {
minEnd = Math.min(minEnd, gvcfthing.getEnd());
break;
} else {
LOG.debug("??");
}
}
}
if (variantAtThisPos == null) {
pos = minEnd + 1;
} else {
final VariantContext archetype = variantAtThisPos.ctx;
final List<VariantContext> allVariants = this.gvcfSources.stream().map(S -> S.get()).filter(G -> G != null && G.isVariant()).map(G -> GvcfVariant.class.cast(G).ctx).filter(V -> variantContigPosRefComparator.compare(V, archetype) == 0).collect(Collectors.toList());
;
final Set<Allele> alleles = allVariants.stream().flatMap(V -> V.getGenotypes().stream()).flatMap(G -> G.getAlleles().stream()).filter(A -> !(A.equals(NON_REF) || A.isNoCall())).collect(Collectors.toSet());
alleles.add(archetype.getReference());
final VariantContextBuilder vcb = new VariantContextBuilder(getClass().getName(), archetype.getContig(), archetype.getStart(), archetype.getEnd(), alleles);
if (archetype.hasID()) {
vcb.id(archetype.getID());
}
final List<Genotype> genotypes = new ArrayList<>(allVariants.size());
for (VariantContext ctx : allVariants) {
Genotype genotype = ctx.getGenotype(0);
GenotypeBuilder gb = new GenotypeBuilder(genotype);
genotypes.add(gb.make());
}
vcb.genotypes(genotypes);
final VariantContext genotypedVariant;
try {
genotypedVariant = vcb.make();
} catch (Exception err2) {
LOG.debug(allVariants);
LOG.error(err2);
return -1;
}
w.add(genotypedVariant);
// reset source for the current variant
for (final Source src : this.gvcfSources) {
if (src.current != null && variantContigPosRefComparator.compare(src.current.ctx, archetype) == 0) {
src.current = null;
}
}
}
}
++contigTid;
}
w.close();
w = null;
this.gvcfSources.stream().forEach(S -> S.close());
this.gvcfSources.clear();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
for (final Source src : this.gvcfSources) src.close();
CloserUtil.close(w);
}
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.
the class VcfMultiToOne method doWork.
@Override
public int doWork(final List<String> arguments) {
VariantContextWriter out = null;
Set<String> args = IOUtils.unrollFiles(arguments);
List<VcfIterator> inputs = new ArrayList<>(args.size() + 1);
List<String> inputFiles = new ArrayList<>(args.size() + 1);
try {
if (args.isEmpty() && arguments.isEmpty()) {
inputs.add(VCFUtils.createVcfIteratorStdin());
inputFiles.add("stdin");
} else if (args.isEmpty()) {
LOG.error("No vcf provided");
return -1;
} else {
for (final String vcfFile : args) {
inputs.add(VCFUtils.createVcfIterator(vcfFile));
inputFiles.add(VCFUtils.escapeInfoField(vcfFile));
}
}
SAMSequenceDictionary dict = null;
final Set<String> sampleNames = new HashSet<String>();
final Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
for (final VcfIterator in : inputs) {
final VCFHeader header = in.getHeader();
if (dict == null) {
dict = header.getSequenceDictionary();
} else if (header.getSequenceDictionary() == null) {
LOG.error("No Dictionary in vcf");
return -1;
} else if (!SequenceUtil.areSequenceDictionariesEqual(dict, header.getSequenceDictionary())) {
LOG.error("Not the same dictionary between vcfs");
return -1;
}
metaData.addAll(in.getHeader().getMetaDataInInputOrder());
sampleNames.addAll(in.getHeader().getSampleNamesInOrder());
}
final Comparator<VariantContext> comparator = (dict == null ? VCFUtils.createChromPosRefComparator() : VCFUtils.createTidPosRefComparator(dict));
// addMetaData(metaData);
metaData.add(new VCFInfoHeaderLine(DEFAULT_SAMPLE_TAGID, 1, VCFHeaderLineType.String, "Sample Name from multi-sample vcf"));
metaData.add(new VCFInfoHeaderLine(DEFAULT_SAMPLE_FILETAGID, 1, VCFHeaderLineType.String, "Origin of sample"));
for (final String sample : sampleNames) {
metaData.add(new VCFHeaderLine(SAMPLE_HEADER_DECLARATION, sample));
}
final VCFHeader h2 = new VCFHeader(metaData, Collections.singleton(DEFAULT_VCF_SAMPLE_NAME));
recalculator.setHeader(h2);
out = super.openVariantContextWriter(this.outputFile);
out.writeHeader(h2);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
for (; ; ) {
if (out.checkError())
break;
/* get 'smallest' variant */
VariantContext smallest = null;
int idx = 0;
int best_idx = -1;
while (idx < inputs.size()) {
final VcfIterator in = inputs.get(idx);
if (!in.hasNext()) {
CloserUtil.close(in);
inputs.remove(idx);
inputFiles.remove(idx);
} else {
final VariantContext ctx = in.peek();
if (smallest == null || comparator.compare(smallest, ctx) > 0) {
smallest = ctx;
best_idx = idx;
}
++idx;
}
}
if (smallest == null)
break;
final VariantContext ctx = progress.watch(inputs.get(best_idx).next());
if (ctx.getNSamples() == 0) {
if (!this.discard_no_call) {
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.attribute(DEFAULT_SAMPLE_FILETAGID, inputFiles.get(best_idx));
vcb.genotypes(GenotypeBuilder.createMissing(DEFAULT_VCF_SAMPLE_NAME, 2));
out.add(this.recalculator.apply(vcb.make()));
}
continue;
}
for (int i = 0; i < ctx.getNSamples(); ++i) {
final Genotype g = ctx.getGenotype(i);
final String sample = g.getSampleName();
if (!g.isCalled() && this.discard_no_call)
continue;
if (!g.isAvailable() && this.discard_non_available)
continue;
if (g.isHomRef() && this.discard_hom_ref)
continue;
final GenotypeBuilder gb = new GenotypeBuilder(g);
gb.name(DEFAULT_VCF_SAMPLE_NAME);
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.attribute(DEFAULT_SAMPLE_TAGID, sample);
vcb.attribute(DEFAULT_SAMPLE_FILETAGID, inputFiles.get(best_idx));
vcb.genotypes(gb.make());
out.add(this.recalculator.apply(vcb.make()));
}
}
progress.finish();
LOG.debug("done");
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(inputs);
CloserUtil.close(out);
}
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.
the class VcfGeneSplitter method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, File outputFile) {
SortingCollection<KeyAndLine> sortingcollection = null;
BufferedReader in = null;
FileOutputStream fos = null;
ZipOutputStream zout = null;
CloseableIterator<KeyAndLine> iter = null;
PrintWriter pw = null;
try {
in = inputName == null ? IOUtils.openStreamForBufferedReader(stdin()) : IOUtils.openURIForBufferedReading(inputName);
final VCFUtils.CodecAndHeader cah = VCFUtils.parseHeader(in);
/**
* find splitter by name
*/
final VepPredictionParser vepPredictionParser = new VepPredictionParserFactory().header(cah.header).get();
sortingcollection = SortingCollection.newInstance(KeyAndLine.class, new KeyAndLineCodec(), new KeyAndLineComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
sortingcollection.setDestructiveIteration(true);
// read variants
final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(cah.header);
String line;
while ((line = in.readLine()) != null) {
final VariantContext ctx = progess.watch(cah.codec.decode(line));
// no check for ctx.ifFiltered here, we do this later.
for (final String key : this.getVariantKeys(vepPredictionParser, ctx)) {
sortingcollection.add(new KeyAndLine(key, line));
}
}
progess.finish();
sortingcollection.doneAdding();
LOG.info("creating zip " + outputFile);
fos = new FileOutputStream(outputFile);
zout = new ZipOutputStream(fos);
final File tmpReportFile = File.createTempFile("_tmp.", ".txt", writingSortingCollection.getTmpDirectories().get(0));
tmpReportFile.deleteOnExit();
pw = IOUtils.openFileForPrintWriter(tmpReportFile);
pw.println("#chrom\tstart\tend\tkey\tCount_Variants");
iter = sortingcollection.iterator();
final EqualRangeIterator<KeyAndLine> eqiter = new EqualRangeIterator<>(iter, new Comparator<KeyAndLine>() {
@Override
public int compare(final KeyAndLine o1, final KeyAndLine o2) {
return o1.key.compareTo(o2.key);
}
});
while (eqiter.hasNext()) {
final List<KeyAndLine> buffer = eqiter.next();
final KeyAndLine first = buffer.get(0);
LOG.info(first.key);
final List<VariantContext> variants = new ArrayList<>(buffer.size());
String contig = null;
int chromStart = Integer.MAX_VALUE;
int chromEnd = 0;
for (final KeyAndLine kal : buffer) {
final VariantContext ctx = cah.codec.decode(kal.ctx);
variants.add(ctx);
contig = ctx.getContig();
chromStart = Math.min(chromStart, ctx.getStart());
chromEnd = Math.max(chromEnd, ctx.getEnd());
}
pw.println(contig + "\t" + (chromStart - 1) + // -1 for bed compatibility
"\t" + chromEnd + "\t" + first.key + "\t" + variants.size());
// save vcf file
final ZipEntry ze = new ZipEntry(this.baseZipDir + "/" + first.key + ".vcf");
zout.putNextEntry(ze);
final VariantContextWriter out = VCFUtils.createVariantContextWriterToOutputStream(IOUtils.uncloseableOutputStream(zout));
final VCFHeader header2 = addMetaData(new VCFHeader(cah.header));
header2.addMetaDataLine(new VCFHeaderLine("VcfGeneSplitter.Name", String.valueOf(first.key)));
out.writeHeader(header2);
for (final VariantContext ctx : variants) {
out.add(ctx);
}
// yes because wrapped into IOUtils.encloseableOutputSream
out.close();
zout.closeEntry();
}
eqiter.close();
iter.close();
iter = null;
progess.finish();
LOG.info("saving report");
pw.flush();
pw.close();
final ZipEntry entry = new ZipEntry(this.baseZipDir + "/manifest.bed");
zout.putNextEntry(entry);
IOUtils.copyTo(tmpReportFile, zout);
zout.closeEntry();
zout.finish();
zout.close();
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
if (sortingcollection != null)
sortingcollection.cleanup();
CloserUtil.close(in);
CloserUtil.close(fos);
CloserUtil.close(pw);
}
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.
the class VcfSpringFilter method doWork.
@Override
public int doWork(final List<String> args) {
VcfIterator vcfIn = null;
VariantContextWriter vcfOut = null;
try {
if (this.springCongigFiles.isEmpty()) {
LOG.error("no spring config file was provided");
return -1;
}
this.springApplicationContext = new FileSystemXmlApplicationContext(springCongigFiles.toArray(new String[springCongigFiles.size()]));
if (!this.springApplicationContext.containsBean(this.mainBeanName)) {
LOG.error("cannot get bean " + mainBeanName + " in " + this.springCongigFiles);
return -1;
}
final Object o = this.springApplicationContext.getBean(mainBeanName);
if (o == null || !(o instanceof VariantContextWriter)) {
LOG.error("bean " + mainBeanName + " is not a VcfChain but " + (o == null ? "null" : o.getClass().getName()));
return -1;
}
final VariantContextWriter writer = VariantContextWriter.class.cast(o);
final String inputFile = oneFileOrNull(args);
vcfIn = super.openVcfIterator(inputFile);
return ret;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(vcfIn);
CloserUtil.close(vcfOut);
this.springApplicationContext = null;
}
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.
the class MsaToVcf method doWork.
@Override
public int doWork(final List<String> args) {
VariantContextWriter w = null;
LineIterator r = null;
try {
final String inputName = oneFileOrNull(args);
if (inputName == null) {
LOG.info("Reading from stdin");
r = IOUtils.openStreamForLineIterator(stdin());
} else {
LOG.info("Reading from " + inputName);
r = IOUtils.openURIForLineIterator(inputName);
}
Format format = Format.None;
/**
* try to guess format
*/
while (r.hasNext() && format == Format.None) {
final String line = r.peek();
if (line.trim().isEmpty()) {
r.next();
continue;
}
if (line.startsWith("CLUSTAL")) {
format = Format.Clustal;
// consume
r.next();
break;
} else if (line.startsWith(">")) {
format = Format.Fasta;
break;
} else {
LOG.error("MSA format not recognized in " + line);
return -1;
}
}
LOG.info("format : " + format);
/**
* parse lines as FASTA
*/
if (Format.Fasta.equals(format)) {
this.consensus = new FastaConsensus();
AlignSequence curr = null;
while (r.hasNext()) {
String line = r.next();
if (line.startsWith(">")) {
curr = new AlignSequence();
curr.name = line.substring(1).trim();
if (sample2sequence.containsKey(curr.name)) {
LOG.error("Sequence ID " + curr.name + " defined twice");
return -1;
}
sample2sequence.put(curr.name, curr);
} else if (curr != null) {
curr.seq.append(line.trim());
this.align_length = Math.max(this.align_length, curr.seq.length());
}
}
/*
//remove heading & trailing '-'
for(final String sample:this.sample2sequence.keySet())
{
final AlignSequence seq = this.sample2sequence.get(sample);
int i=0;
while(i<this.align_length && seq.at(i)==DELETION)
{
seq.seq.setCharAt(i, CLIPPING);
++i;
}
i= this.align_length-1;
while(i>=0 && seq.at(i)==DELETION)
{
seq.seq.setCharAt(i, CLIPPING);
--i;
}
}*/
} else /**
* parse lines as CLUSTAL
*/
if (Format.Clustal.equals(format)) {
ClustalConsensus clustalconsensus = new ClustalConsensus();
this.consensus = clustalconsensus;
AlignSequence curr = null;
int columnStart = -1;
while (r.hasNext()) {
String line = r.next();
if (line.trim().isEmpty() || line.startsWith("CLUSTAL W")) {
columnStart = -1;
continue;
}
if (line.charAt(0) == ' ') {
if (columnStart == -1) {
LOG.error("illegal consensus line for " + line);
return -1;
}
/* if consensus doesn't exist in the first rows */
while (clustalconsensus.seq.length() < (this.align_length - (line.length() - columnStart))) {
clustalconsensus.seq.append(" ");
}
clustalconsensus.seq.append(line.substring(columnStart));
} else {
if (columnStart == -1) {
columnStart = line.indexOf(' ');
if (columnStart == -1) {
LOG.error("no whithespace in " + line);
return -1;
}
while (columnStart < line.length() && line.charAt(columnStart) == ' ') {
columnStart++;
}
}
String seqname = line.substring(0, columnStart).trim();
curr = this.sample2sequence.get(seqname);
if (curr == null) {
curr = new AlignSequence();
curr.name = seqname;
this.sample2sequence.put(curr.name, curr);
}
int columnEnd = line.length();
// remove blanks and digit at the end
while (columnEnd - 1 > columnStart && (line.charAt(columnEnd - 1) == ' ' || Character.isDigit(line.charAt(columnEnd - 1)))) {
columnEnd--;
}
curr.seq.append(line.substring(columnStart, columnEnd));
this.align_length = Math.max(align_length, curr.seq.length());
}
}
} else {
LOG.error("Undefined input format");
return -1;
}
CloserUtil.close(r);
/* sequence consensus was set*/
if (consensusRefName != null) {
AlignSequence namedSequence = null;
if ((namedSequence = sample2sequence.get(consensusRefName)) == null) {
LOG.error("Cannot find consensus sequence \"" + consensusRefName + "\" in list of sequences: " + this.sample2sequence.keySet().toString());
return -1;
}
this.consensus = new NamedConsensus(namedSequence);
}
/**
* we're done, print VCF
*/
/**
* first, print header
*/
Set<VCFHeaderLine> vcfHeaderLines = new HashSet<VCFHeaderLine>();
vcfHeaderLines.add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth."));
vcfHeaderLines.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
vcfHeaderLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth"));
// super.addMetaData(vcfHeaderLines);
Map<String, String> mapping = new HashMap<String, String>();
mapping.put("ID", REF);
mapping.put("length", String.valueOf(this.align_length));
vcfHeaderLines.add(new VCFContigHeaderLine(mapping, 1));
Set<String> samples = new TreeSet<String>(this.sample2sequence.keySet());
VCFHeader vcfHeader = new VCFHeader(vcfHeaderLines, samples);
w = super.openVariantContextWriter(this.outputFile);
w.writeHeader(vcfHeader);
/**
* loop over data, print header
*/
int pos1 = 0;
while (pos1 < align_length) {
// is it a real variation or print all sites
boolean is_variation;
if (consensus.at(pos1) == MATCH) {
if (this.printAllSites) {
is_variation = false;
} else {
++pos1;
continue;
}
} else {
is_variation = true;
}
int pos2 = pos1 + 1;
// don't extend if no variation and printAllSites
while (is_variation && pos2 < align_length && consensus.at(pos2) != MATCH) {
++pos2;
}
boolean is_subsitution = (pos1 + 1 == pos2);
if (// need pos1>0 because ALT contains prev base.
is_subsitution && pos1 != 0 && is_variation) {
for (Sequence seq : this.sample2sequence.values()) {
if (seq.at(pos1) == DELETION) {
is_subsitution = false;
break;
}
}
}
Set<Allele> alleles = new HashSet<Allele>();
VariantContextBuilder vcb = new VariantContextBuilder();
List<Genotype> genotypes = new ArrayList<Genotype>(samples.size());
/* longest variant */
String longest = null;
Counter<String> countAlleles = new Counter<String>();
Map<String, String> sample2genotype = new HashMap<String, String>(samples.size());
String namedConsensusRefAllele = "N";
/* loop over the sequences of each seample */
for (String sample : samples) {
Sequence seq = this.sample2sequence.get(sample);
String al = null;
if (is_subsitution) {
if (seq.at(pos1) == CLIPPING)
continue;
al = String.valueOf(seq.at(pos1));
} else {
StringBuilder sb = new StringBuilder(pos2 - pos1);
for (// yes -1
int i = Math.max(0, pos1 - 1); i < pos2; ++i) {
if (seq.at(i) == CLIPPING)
continue;
if (seq.at(i) == DELETION)
continue;
sb.append(seq.at(i));
}
if (sb.length() == 0)
continue;
al = sb.toString();
}
/* did we find the longest allele ?*/
if (longest == null || longest.length() < al.length()) {
// reset count of most frequent, we'll use the longest indel or subst
countAlleles = new Counter<String>();
longest = al;
}
countAlleles.incr(al);
sample2genotype.put(sample, al);
/* if consensus sequence name was defined , record this allele for future use */
if (consensusRefName != null && sample.equals(consensusRefName)) {
namedConsensusRefAllele = al;
}
}
if (countAlleles.isEmpty()) {
if (// printAllSites=false
printAllSites == false) {
continue;
}
/* no a real variation, just add a dummy 'N' */
countAlleles.incr("N");
}
String refAllStr;
if (consensusRefName == null) {
refAllStr = countAlleles.getMostFrequent();
} else {
refAllStr = namedConsensusRefAllele;
}
final Allele refAllele = Allele.create(refAllStr.replaceAll("[^ATGCatgc]", "N"), true);
alleles.add(refAllele);
/* loop over samples, , build each genotype */
for (String sample : sample2genotype.keySet()) {
Allele al = null;
if (!sample2genotype.containsKey(sample)) {
// nothing
} else if (sample2genotype.get(sample).equals(refAllStr)) {
al = refAllele;
} else {
al = Allele.create(sample2genotype.get(sample).replaceAll("[^ATGCatgc]", "N"), false);
alleles.add(al);
}
if (al != null) {
final GenotypeBuilder gb = new GenotypeBuilder(sample);
final List<Allele> sampleAlleles = new ArrayList<Allele>(2);
sampleAlleles.add(al);
if (!haploid)
sampleAlleles.add(al);
gb.alleles(sampleAlleles);
gb.DP(1);
genotypes.add(gb.make());
} else {
genotypes.add(GenotypeBuilder.createMissing(sample, haploid ? 1 : 2));
}
}
// got to 1-based ref if subst, for indel with use pos(base)-1
final int start = pos1 + (is_subsitution ? 1 : 0);
vcb.start(start);
vcb.stop(start + (refAllStr.length() - 1));
vcb.chr(REF);
HashMap<String, Object> atts = new HashMap<String, Object>();
atts.put(VCFConstants.DEPTH_KEY, genotypes.size());
vcb.attributes(atts);
vcb.alleles(alleles);
vcb.genotypes(genotypes);
w.add(vcb.make());
pos1 = pos2;
}
w.close();
if (outFasta != null) {
final PrintWriter fasta = super.openFileOrStdoutAsPrintWriter(outFasta);
for (final String sample : samples) {
fasta.println(">" + sample);
final Sequence seq = this.sample2sequence.get(sample);
for (int i = 0; i < align_length; ++i) {
fasta.print(seq.at(i));
}
fasta.println();
}
fasta.println(">CONSENSUS");
for (int i = 0; i < align_length; ++i) {
fasta.print(consensus.at(i));
}
fasta.println();
fasta.flush();
fasta.close();
}
LOG.info("Done");
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
CloserUtil.close(w);
}
}
Aggregations