use of htsjdk.variant.variantcontext.VariantContextComparator in project gatk-protected by broadinstitute.
the class AbstractConcordanceWalker method onStartup.
// ********** The basic traversal structure of GATKTool
@Override
protected final void onStartup() {
super.onStartup();
initializeTruthVariantsIfNecessary();
evalVariants = new FeatureDataSource<>(new FeatureInput<>(evalVariantsFile, "eval"), CACHE_LOOKAHEAD, VariantContext.class);
if (hasIntervals()) {
truthVariants.setIntervalsForTraversal(intervalsForTraversal);
evalVariants.setIntervalsForTraversal(intervalsForTraversal);
}
dict = getBestAvailableSequenceDictionary();
variantContextComparator = new VariantContextComparator(dict);
}
use of htsjdk.variant.variantcontext.VariantContextComparator in project gatk by broadinstitute.
the class CreateSomaticPanelOfNormals method doWork.
public Object doWork() {
final List<File> inputVcfs = new ArrayList<>(vcfs);
final Collection<CloseableIterator<VariantContext>> iterators = new ArrayList<>(inputVcfs.size());
final Collection<VCFHeader> headers = new HashSet<>(inputVcfs.size());
final VCFHeader headerOfFirstVcf = new VCFFileReader(inputVcfs.get(0), false).getFileHeader();
final SAMSequenceDictionary sequenceDictionary = headerOfFirstVcf.getSequenceDictionary();
final VariantContextComparator comparator = headerOfFirstVcf.getVCFRecordComparator();
for (final File vcf : inputVcfs) {
final VCFFileReader reader = new VCFFileReader(vcf, false);
iterators.add(reader.iterator());
final VCFHeader header = reader.getFileHeader();
Utils.validateArg(comparator.isCompatible(header.getContigLines()), () -> vcf.getAbsolutePath() + " has incompatible contigs.");
headers.add(header);
}
final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(outputVcf, sequenceDictionary, false, Options.INDEX_ON_THE_FLY);
writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false)));
final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(comparator, iterators);
SimpleInterval currentPosition = new SimpleInterval("FAKE", 1, 1);
final List<VariantContext> variantsAtThisPosition = new ArrayList<>(20);
while (mergingIterator.hasNext()) {
final VariantContext vc = mergingIterator.next();
if (!currentPosition.overlaps(vc)) {
processVariantsAtSamePosition(variantsAtThisPosition, writer);
variantsAtThisPosition.clear();
currentPosition = new SimpleInterval(vc.getContig(), vc.getStart(), vc.getStart());
}
variantsAtThisPosition.add(vc);
}
mergingIterator.close();
writer.close();
return "SUCCESS";
}
use of htsjdk.variant.variantcontext.VariantContextComparator in project gatk by broadinstitute.
the class MergeVcfs method doWork.
@Override
protected Object doWork() {
final ProgressLogger progress = new ProgressLogger(logger, 10000);
final List<String> sampleList = new ArrayList<>();
final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<>(INPUT.size());
final Collection<VCFHeader> headers = new HashSet<>(INPUT.size());
VariantContextComparator variantContextComparator = null;
SAMSequenceDictionary sequenceDictionary = null;
if (SEQUENCE_DICTIONARY != null) {
sequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SEQUENCE_DICTIONARY).getFileHeader().getSequenceDictionary();
}
for (final File file : INPUT) {
IOUtil.assertFileIsReadable(file);
final VCFFileReader fileReader = new VCFFileReader(file, false);
final VCFHeader fileHeader = fileReader.getFileHeader();
if (variantContextComparator == null) {
variantContextComparator = fileHeader.getVCFRecordComparator();
} else {
if (!variantContextComparator.isCompatible(fileHeader.getContigLines())) {
throw new IllegalArgumentException("The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others.");
}
}
if (sequenceDictionary == null)
sequenceDictionary = fileHeader.getSequenceDictionary();
if (sampleList.isEmpty()) {
sampleList.addAll(fileHeader.getSampleNamesInOrder());
} else {
if (!sampleList.equals(fileHeader.getSampleNamesInOrder())) {
throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files.");
}
}
headers.add(fileHeader);
iteratorCollection.add(fileReader.iterator());
}
if (CREATE_INDEX && sequenceDictionary == null) {
throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
}
final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(sequenceDictionary).clearOptions();
if (CREATE_INDEX) {
builder.setOption(Options.INDEX_ON_THE_FLY);
}
try (final VariantContextWriter writer = builder.build()) {
writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList));
final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(variantContextComparator, iteratorCollection);
while (mergingIterator.hasNext()) {
final VariantContext context = mergingIterator.next();
writer.add(context);
progress.record(context.getContig(), context.getStart());
}
CloserUtil.close(mergingIterator);
}
return null;
}
use of htsjdk.variant.variantcontext.VariantContextComparator in project hmftools by hartwigmedical.
the class BreakPointInspectorApplication method main.
public static void main(final String... args) throws IOException {
final AnalysisBuilder analysisBuilder = new AnalysisBuilder();
final Options options = createOptions();
try {
final CommandLine cmd = createCommandLine(options, args);
final String refPath = cmd.getOptionValue(REF_PATH);
final String refSlicePath = cmd.getOptionValue(REF_SLICE);
final String tumorPath = cmd.getOptionValue(TUMOR_PATH);
final String tumorSlicePath = cmd.getOptionValue(TUMOR_SLICE);
final String vcfPath = cmd.getOptionValue(VCF);
if (cmd.hasOption(PROXIMITY)) {
analysisBuilder.setRange(Integer.parseInt(cmd.getOptionValue(PROXIMITY, "500")));
}
if (cmd.hasOption(CONTAMINATION)) {
analysisBuilder.setContaminationFraction(Float.parseFloat(cmd.getOptionValue(CONTAMINATION, "0")));
}
if (refPath == null || tumorPath == null || vcfPath == null) {
printHelpAndExit(options);
return;
}
final File tumorBAM = new File(tumorPath);
final SamReader tumorReader = SamReaderFactory.makeDefault().open(tumorBAM);
final File refBAM = new File(refPath);
final SamReader refReader = SamReaderFactory.makeDefault().open(refBAM);
final File vcfFile = new File(vcfPath);
final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false);
final List<String> samples = vcfReader.getFileHeader().getGenotypeSamples();
if (samples.size() != 2) {
System.err.println("could not determine tumor and sample from VCF");
System.exit(1);
return;
}
TSVOutput.PrintHeaders();
final Analysis analysis = analysisBuilder.setRefReader(refReader).setTumorReader(tumorReader).createAnalysis();
final List<QueryInterval> combinedQueryIntervals = Lists.newArrayList();
final Map<String, VariantContext> variantMap = new HashMap<>();
final List<VariantContext> variants = Lists.newArrayList();
for (VariantContext variant : vcfReader) {
variantMap.put(variant.getID(), variant);
final VariantContext mateVariant = variant;
if (variant.hasAttribute("MATEID")) {
variant = variantMap.get(variant.getAttributeAsString("MATEID", ""));
if (variant == null) {
continue;
}
}
final String location = variant.getContig() + ":" + Integer.toString(variant.getStart());
final Location location1 = Location.parseLocationString(location, tumorReader.getFileHeader().getSequenceDictionary());
final Range uncertainty1 = extractCIPOS(variant);
final List<Integer> CIEND = variant.getAttributeAsIntList("CIEND", 0);
Range uncertainty2 = CIEND.size() == 2 ? new Range(CIEND.get(0), CIEND.get(1)) : null;
final boolean IMPRECISE = variant.hasAttribute("IMPRECISE");
HMFVariantType svType;
final Location location2;
switch(variant.getStructuralVariantType()) {
case INS:
svType = HMFVariantType.INS;
location2 = location1.set(variant.getAttributeAsInt("END", 0));
break;
case INV:
if (variant.hasAttribute("INV3")) {
svType = HMFVariantType.INV3;
} else if (variant.hasAttribute("INV5")) {
svType = HMFVariantType.INV5;
} else {
System.err.println(variant.getID() + " : expected either INV3 or INV5 flag");
continue;
}
location2 = location1.add(Math.abs(variant.getAttributeAsInt("SVLEN", 0)));
break;
case DEL:
svType = HMFVariantType.DEL;
location2 = location1.add(Math.abs(variant.getAttributeAsInt("SVLEN", 0)));
break;
case DUP:
svType = HMFVariantType.DUP;
location2 = location1.add(Math.abs(variant.getAttributeAsInt("SVLEN", 0)));
break;
case BND:
// process the breakend string
final String call = variant.getAlternateAllele(0).getDisplayString();
final String[] leftSplit = call.split("\\]");
final String[] rightSplit = call.split("\\[");
if (leftSplit.length >= 2) {
location2 = Location.parseLocationString(leftSplit[1], tumorReader.getFileHeader().getSequenceDictionary());
if (leftSplit[0].length() > 0) {
svType = HMFVariantType.INV3;
uncertainty2 = Range.invert(uncertainty1);
} else {
svType = HMFVariantType.DUP;
uncertainty2 = uncertainty1;
}
} else if (rightSplit.length >= 2) {
location2 = Location.parseLocationString(rightSplit[1], tumorReader.getFileHeader().getSequenceDictionary());
if (rightSplit[0].length() > 0) {
svType = HMFVariantType.DEL;
uncertainty2 = uncertainty1;
} else {
svType = HMFVariantType.INV5;
uncertainty2 = Range.invert(uncertainty1);
}
} else {
System.err.println(variant.getID() + " : could not parse breakpoint");
continue;
}
if (IMPRECISE) {
uncertainty2 = extractCIPOS(mateVariant);
}
break;
default:
System.err.println(variant.getID() + " : UNEXPECTED SVTYPE=" + variant.getStructuralVariantType());
continue;
}
final HMFVariantContext ctx = new HMFVariantContext(variant.getID(), location1, location2, svType, IMPRECISE);
ctx.Filter.addAll(variant.getFilters().stream().filter(s -> !s.startsWith("BPI")).collect(Collectors.toSet()));
ctx.Uncertainty1 = uncertainty1;
ctx.Uncertainty2 = ObjectUtils.firstNonNull(uncertainty2, fixup(uncertainty1, IMPRECISE, svType == HMFVariantType.INV3 || svType == HMFVariantType.INV5));
ctx.HomologySequence = variant.getAttributeAsString("HOMSEQ", "");
if (variant.hasAttribute("LEFT_SVINSSEQ") && variant.hasAttribute("RIGHT_SVINSSEQ")) {
ctx.InsertSequence = variant.getAttributeAsString("LEFT_SVINSSEQ", "") + "..." + variant.getAttributeAsString("RIGHT_SVINSSEQ", "");
} else {
ctx.InsertSequence = variant.getAttributeAsString("SVINSSEQ", "");
}
ctx.BND = variant.getStructuralVariantType() == StructuralVariantType.BND;
switch(ctx.Type) {
case INS:
case DEL:
ctx.OrientationBP1 = 1;
ctx.OrientationBP2 = -1;
break;
case INV3:
ctx.OrientationBP1 = 1;
ctx.OrientationBP2 = 1;
break;
case INV5:
ctx.OrientationBP1 = -1;
ctx.OrientationBP2 = -1;
break;
case DUP:
ctx.OrientationBP1 = -1;
ctx.OrientationBP2 = 1;
break;
}
final StructuralVariantResult result = analysis.processStructuralVariant(ctx);
combinedQueryIntervals.addAll(asList(result.QueryIntervals));
TSVOutput.print(variant, ctx, result);
final BiConsumer<VariantContext, Boolean> vcfUpdater = (v, swap) -> {
final Set<String> filters = v.getCommonInfo().getFiltersMaybeNull();
if (filters != null) {
filters.clear();
}
// we will map BreakpointError to a flag
if (result.Filters.contains(Filter.Filters.BreakpointError.toString())) {
v.getCommonInfo().putAttribute("BPI_AMBIGUOUS", true, true);
} else {
v.getCommonInfo().addFilters(result.Filters);
}
if (result.Filters.isEmpty()) {
final List<Double> af = asList(result.AlleleFrequency.getLeft(), result.AlleleFrequency.getRight());
v.getCommonInfo().putAttribute(AlleleFrequency.VCF_INFO_TAG, swap ? Lists.reverse(af) : af, true);
}
if (result.Breakpoints.getLeft() != null) {
v.getCommonInfo().putAttribute(swap ? "BPI_END" : "BPI_START", result.Breakpoints.getLeft().Position, true);
}
if (result.Breakpoints.getRight() != null) {
v.getCommonInfo().putAttribute(swap ? "BPI_START" : "BPI_END", result.Breakpoints.getRight().Position, true);
}
// remove CIPOS / CIEND when we have an insert sequence
if (!v.hasAttribute("IMPRECISE") && v.hasAttribute("SVINSSEQ")) {
v.getCommonInfo().removeAttribute("CIPOS");
v.getCommonInfo().removeAttribute("CIEND");
}
variants.add(v);
};
vcfUpdater.accept(variant, false);
if (mateVariant != variant) {
vcfUpdater.accept(mateVariant, true);
}
}
// TODO: update START, END with BPI values and save Manta values in new attributes
final String vcfOutputPath = cmd.getOptionValue(VCF_OUT);
if (vcfOutputPath != null) {
final VCFHeader header = vcfReader.getFileHeader();
header.addMetaDataLine(new VCFInfoHeaderLine("BPI_START", 1, VCFHeaderLineType.Integer, "BPI adjusted breakend location"));
header.addMetaDataLine(new VCFInfoHeaderLine("BPI_END", 1, VCFHeaderLineType.Integer, "BPI adjusted breakend location"));
header.addMetaDataLine(new VCFInfoHeaderLine("BPI_AMBIGUOUS", 0, VCFHeaderLineType.Flag, "BPI could not determine the breakpoints, inspect manually"));
header.addMetaDataLine(new VCFHeaderLine("bpiVersion", BreakPointInspectorApplication.class.getPackage().getImplementationVersion()));
Filter.UpdateVCFHeader(header);
AlleleFrequency.UpdateVCFHeader(header);
// setup VCF
final VariantContextWriter writer = new VariantContextWriterBuilder().setReferenceDictionary(header.getSequenceDictionary()).setOutputFile(vcfOutputPath).build();
writer.writeHeader(header);
// write variants
variants.sort(new VariantContextComparator(header.getSequenceDictionary()));
variants.forEach(writer::add);
writer.close();
}
final QueryInterval[] optimizedIntervals = QueryInterval.optimizeIntervals(combinedQueryIntervals.toArray(new QueryInterval[combinedQueryIntervals.size()]));
if (tumorSlicePath != null) {
writeToSlice(tumorSlicePath, tumorReader, optimizedIntervals);
}
if (refSlicePath != null) {
writeToSlice(refSlicePath, refReader, optimizedIntervals);
}
refReader.close();
tumorReader.close();
} catch (ParseException e) {
printHelpAndExit(options);
System.exit(1);
}
}
use of htsjdk.variant.variantcontext.VariantContextComparator in project gatk-protected by broadinstitute.
the class CreateSomaticPanelOfNormals method doWork.
public Object doWork() {
final List<File> inputVcfs = new ArrayList<>(vcfs);
final Collection<CloseableIterator<VariantContext>> iterators = new ArrayList<>(inputVcfs.size());
final Collection<VCFHeader> headers = new HashSet<>(inputVcfs.size());
final VCFHeader headerOfFirstVcf = new VCFFileReader(inputVcfs.get(0), false).getFileHeader();
final SAMSequenceDictionary sequenceDictionary = headerOfFirstVcf.getSequenceDictionary();
final VariantContextComparator comparator = headerOfFirstVcf.getVCFRecordComparator();
for (final File vcf : inputVcfs) {
final VCFFileReader reader = new VCFFileReader(vcf, false);
iterators.add(reader.iterator());
final VCFHeader header = reader.getFileHeader();
Utils.validateArg(comparator.isCompatible(header.getContigLines()), () -> vcf.getAbsolutePath() + " has incompatible contigs.");
headers.add(header);
}
final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(outputVcf, sequenceDictionary, false, Options.INDEX_ON_THE_FLY);
writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false)));
final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(comparator, iterators);
SimpleInterval currentPosition = new SimpleInterval("FAKE", 1, 1);
final List<VariantContext> variantsAtThisPosition = new ArrayList<>(20);
while (mergingIterator.hasNext()) {
final VariantContext vc = mergingIterator.next();
if (!currentPosition.overlaps(vc)) {
processVariantsAtSamePosition(variantsAtThisPosition, writer);
variantsAtThisPosition.clear();
currentPosition = new SimpleInterval(vc.getContig(), vc.getStart(), vc.getStart());
}
variantsAtThisPosition.add(vc);
}
mergingIterator.close();
writer.close();
return "SUCCESS";
}
Aggregations