use of org.apache.commons.lang3.Range in project hmftools by hartwigmedical.
the class Analysis method processStructuralVariant.
StructuralVariantResult processStructuralVariant(final HMFVariantContext ctx) throws IOException {
final QueryInterval[] intervals = QueryInterval.optimizeIntervals(new QueryInterval[] { new QueryInterval(ctx.MantaBP1.ReferenceIndex, Math.max(0, ctx.MantaBP1.Position + ctx.Uncertainty1.Start - range), ctx.MantaBP1.Position + ctx.Uncertainty1.End + range), new QueryInterval(ctx.MantaBP2.ReferenceIndex, Math.max(0, ctx.MantaBP2.Position + ctx.Uncertainty2.Start - range), ctx.MantaBP2.Position + ctx.Uncertainty2.End + range) });
final File TEMP_REF_BAM = queryNameSortedBAM(refReader, intervals, "ref");
final File TEMP_TUMOR_BAM = queryNameSortedBAM(tumorReader, intervals, "tumor");
final SamReader SORTED_REF_READER = SamReaderFactory.makeDefault().open(TEMP_REF_BAM);
final SamReader SORTED_TUMOR_READER = SamReaderFactory.makeDefault().open(TEMP_TUMOR_BAM);
final BreakpointResult breakpoints = determineBreakpoints(ctx, SORTED_TUMOR_READER);
final StructuralVariantResult result = new StructuralVariantResult();
result.Breakpoints = breakpoints.Breakpoints;
result.QueryIntervals = intervals;
if (breakpoints.Error != BreakpointError.NONE) {
result.Filters = Filter.getErrorFilter();
} else {
result.TumorStats = collectEvidence(ctx, SORTED_TUMOR_READER, result.Breakpoints);
result.RefStats = collectEvidence(ctx, SORTED_REF_READER, result.Breakpoints);
result.AlleleFrequency = AlleleFrequency.calculate(result.TumorStats);
// load sample clipping
SORTED_TUMOR_READER.forEach(r -> Clipping.getClips(r).forEach(c -> result.TumorStats.Sample_Clipping.add(c)));
SORTED_REF_READER.forEach(r -> Clipping.getClips(r).forEach(c -> result.RefStats.Sample_Clipping.add(c)));
result.Filters = Filter.getFilters(ctx, result.TumorStats, result.RefStats, result.Breakpoints, contamination);
// adjust for homology
final Location bp1 = result.Breakpoints.getLeft().add(ctx.OrientationBP1 > 0 ? 0 : -1);
final Location bp2;
if (!ctx.isInsert() && ctx.InsertSequence.isEmpty()) {
bp2 = result.Breakpoints.getRight().add(-ctx.OrientationBP2 * ctx.HomologySequence.length()).add(ctx.OrientationBP2 > 0 ? 0 : -1);
} else {
bp2 = result.Breakpoints.getRight().add(ctx.OrientationBP2 > 0 ? 0 : -1);
}
result.Breakpoints = Pair.of(bp1, bp2);
}
result.FilterString = result.Filters.isEmpty() ? "PASS" : String.join(";", result.Filters);
// clean up
SORTED_REF_READER.close();
SORTED_TUMOR_READER.close();
if (!TEMP_REF_BAM.delete()) {
LOGGER.error("couldn't delete {}", TEMP_REF_BAM);
}
if (!TEMP_TUMOR_BAM.delete()) {
LOGGER.error("couldn't delete {}", TEMP_TUMOR_BAM);
}
return result;
}
use of org.apache.commons.lang3.Range in project hmftools by hartwigmedical.
the class BreakPointInspectorApplication method main.
public static void main(final String... args) throws IOException {
final AnalysisBuilder analysisBuilder = new AnalysisBuilder();
final Options options = createOptions();
try {
final CommandLine cmd = createCommandLine(options, args);
final String refPath = cmd.getOptionValue(REF_PATH);
final String refSlicePath = cmd.getOptionValue(REF_SLICE);
final String tumorPath = cmd.getOptionValue(TUMOR_PATH);
final String tumorSlicePath = cmd.getOptionValue(TUMOR_SLICE);
final String vcfPath = cmd.getOptionValue(VCF);
if (cmd.hasOption(PROXIMITY)) {
analysisBuilder.setRange(Integer.parseInt(cmd.getOptionValue(PROXIMITY, "500")));
}
if (cmd.hasOption(CONTAMINATION)) {
analysisBuilder.setContaminationFraction(Float.parseFloat(cmd.getOptionValue(CONTAMINATION, "0")));
}
if (refPath == null || tumorPath == null || vcfPath == null) {
printHelpAndExit(options);
return;
}
final File tumorBAM = new File(tumorPath);
final SamReader tumorReader = SamReaderFactory.makeDefault().open(tumorBAM);
final File refBAM = new File(refPath);
final SamReader refReader = SamReaderFactory.makeDefault().open(refBAM);
final File vcfFile = new File(vcfPath);
final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false);
final List<String> samples = vcfReader.getFileHeader().getGenotypeSamples();
if (samples.size() != 2) {
System.err.println("could not determine tumor and sample from VCF");
System.exit(1);
return;
}
TSVOutput.PrintHeaders();
final Analysis analysis = analysisBuilder.setRefReader(refReader).setTumorReader(tumorReader).createAnalysis();
final List<QueryInterval> combinedQueryIntervals = Lists.newArrayList();
final Map<String, VariantContext> variantMap = new HashMap<>();
final List<VariantContext> variants = Lists.newArrayList();
for (VariantContext variant : vcfReader) {
variantMap.put(variant.getID(), variant);
final VariantContext mateVariant = variant;
if (variant.hasAttribute("MATEID")) {
variant = variantMap.get(variant.getAttributeAsString("MATEID", ""));
if (variant == null) {
continue;
}
}
final String location = variant.getContig() + ":" + Integer.toString(variant.getStart());
final Location location1 = Location.parseLocationString(location, tumorReader.getFileHeader().getSequenceDictionary());
final Range uncertainty1 = extractCIPOS(variant);
final List<Integer> CIEND = variant.getAttributeAsIntList("CIEND", 0);
Range uncertainty2 = CIEND.size() == 2 ? new Range(CIEND.get(0), CIEND.get(1)) : null;
final boolean IMPRECISE = variant.hasAttribute("IMPRECISE");
HMFVariantType svType;
final Location location2;
switch(variant.getStructuralVariantType()) {
case INS:
svType = HMFVariantType.INS;
location2 = location1.set(variant.getAttributeAsInt("END", 0));
break;
case INV:
if (variant.hasAttribute("INV3")) {
svType = HMFVariantType.INV3;
} else if (variant.hasAttribute("INV5")) {
svType = HMFVariantType.INV5;
} else {
System.err.println(variant.getID() + " : expected either INV3 or INV5 flag");
continue;
}
location2 = location1.add(Math.abs(variant.getAttributeAsInt("SVLEN", 0)));
break;
case DEL:
svType = HMFVariantType.DEL;
location2 = location1.add(Math.abs(variant.getAttributeAsInt("SVLEN", 0)));
break;
case DUP:
svType = HMFVariantType.DUP;
location2 = location1.add(Math.abs(variant.getAttributeAsInt("SVLEN", 0)));
break;
case BND:
// process the breakend string
final String call = variant.getAlternateAllele(0).getDisplayString();
final String[] leftSplit = call.split("\\]");
final String[] rightSplit = call.split("\\[");
if (leftSplit.length >= 2) {
location2 = Location.parseLocationString(leftSplit[1], tumorReader.getFileHeader().getSequenceDictionary());
if (leftSplit[0].length() > 0) {
svType = HMFVariantType.INV3;
uncertainty2 = Range.invert(uncertainty1);
} else {
svType = HMFVariantType.DUP;
uncertainty2 = uncertainty1;
}
} else if (rightSplit.length >= 2) {
location2 = Location.parseLocationString(rightSplit[1], tumorReader.getFileHeader().getSequenceDictionary());
if (rightSplit[0].length() > 0) {
svType = HMFVariantType.DEL;
uncertainty2 = uncertainty1;
} else {
svType = HMFVariantType.INV5;
uncertainty2 = Range.invert(uncertainty1);
}
} else {
System.err.println(variant.getID() + " : could not parse breakpoint");
continue;
}
if (IMPRECISE) {
uncertainty2 = extractCIPOS(mateVariant);
}
break;
default:
System.err.println(variant.getID() + " : UNEXPECTED SVTYPE=" + variant.getStructuralVariantType());
continue;
}
final HMFVariantContext ctx = new HMFVariantContext(variant.getID(), location1, location2, svType, IMPRECISE);
ctx.Filter.addAll(variant.getFilters().stream().filter(s -> !s.startsWith("BPI")).collect(Collectors.toSet()));
ctx.Uncertainty1 = uncertainty1;
ctx.Uncertainty2 = ObjectUtils.firstNonNull(uncertainty2, fixup(uncertainty1, IMPRECISE, svType == HMFVariantType.INV3 || svType == HMFVariantType.INV5));
ctx.HomologySequence = variant.getAttributeAsString("HOMSEQ", "");
if (variant.hasAttribute("LEFT_SVINSSEQ") && variant.hasAttribute("RIGHT_SVINSSEQ")) {
ctx.InsertSequence = variant.getAttributeAsString("LEFT_SVINSSEQ", "") + "..." + variant.getAttributeAsString("RIGHT_SVINSSEQ", "");
} else {
ctx.InsertSequence = variant.getAttributeAsString("SVINSSEQ", "");
}
ctx.BND = variant.getStructuralVariantType() == StructuralVariantType.BND;
switch(ctx.Type) {
case INS:
case DEL:
ctx.OrientationBP1 = 1;
ctx.OrientationBP2 = -1;
break;
case INV3:
ctx.OrientationBP1 = 1;
ctx.OrientationBP2 = 1;
break;
case INV5:
ctx.OrientationBP1 = -1;
ctx.OrientationBP2 = -1;
break;
case DUP:
ctx.OrientationBP1 = -1;
ctx.OrientationBP2 = 1;
break;
}
final StructuralVariantResult result = analysis.processStructuralVariant(ctx);
combinedQueryIntervals.addAll(asList(result.QueryIntervals));
TSVOutput.print(variant, ctx, result);
final BiConsumer<VariantContext, Boolean> vcfUpdater = (v, swap) -> {
final Set<String> filters = v.getCommonInfo().getFiltersMaybeNull();
if (filters != null) {
filters.clear();
}
// we will map BreakpointError to a flag
if (result.Filters.contains(Filter.Filters.BreakpointError.toString())) {
v.getCommonInfo().putAttribute("BPI_AMBIGUOUS", true, true);
} else {
v.getCommonInfo().addFilters(result.Filters);
}
if (result.Filters.isEmpty()) {
final List<Double> af = asList(result.AlleleFrequency.getLeft(), result.AlleleFrequency.getRight());
v.getCommonInfo().putAttribute(AlleleFrequency.VCF_INFO_TAG, swap ? Lists.reverse(af) : af, true);
}
if (result.Breakpoints.getLeft() != null) {
v.getCommonInfo().putAttribute(swap ? "BPI_END" : "BPI_START", result.Breakpoints.getLeft().Position, true);
}
if (result.Breakpoints.getRight() != null) {
v.getCommonInfo().putAttribute(swap ? "BPI_START" : "BPI_END", result.Breakpoints.getRight().Position, true);
}
// remove CIPOS / CIEND when we have an insert sequence
if (!v.hasAttribute("IMPRECISE") && v.hasAttribute("SVINSSEQ")) {
v.getCommonInfo().removeAttribute("CIPOS");
v.getCommonInfo().removeAttribute("CIEND");
}
variants.add(v);
};
vcfUpdater.accept(variant, false);
if (mateVariant != variant) {
vcfUpdater.accept(mateVariant, true);
}
}
// TODO: update START, END with BPI values and save Manta values in new attributes
final String vcfOutputPath = cmd.getOptionValue(VCF_OUT);
if (vcfOutputPath != null) {
final VCFHeader header = vcfReader.getFileHeader();
header.addMetaDataLine(new VCFInfoHeaderLine("BPI_START", 1, VCFHeaderLineType.Integer, "BPI adjusted breakend location"));
header.addMetaDataLine(new VCFInfoHeaderLine("BPI_END", 1, VCFHeaderLineType.Integer, "BPI adjusted breakend location"));
header.addMetaDataLine(new VCFInfoHeaderLine("BPI_AMBIGUOUS", 0, VCFHeaderLineType.Flag, "BPI could not determine the breakpoints, inspect manually"));
header.addMetaDataLine(new VCFHeaderLine("bpiVersion", BreakPointInspectorApplication.class.getPackage().getImplementationVersion()));
Filter.UpdateVCFHeader(header);
AlleleFrequency.UpdateVCFHeader(header);
// setup VCF
final VariantContextWriter writer = new VariantContextWriterBuilder().setReferenceDictionary(header.getSequenceDictionary()).setOutputFile(vcfOutputPath).build();
writer.writeHeader(header);
// write variants
variants.sort(new VariantContextComparator(header.getSequenceDictionary()));
variants.forEach(writer::add);
writer.close();
}
final QueryInterval[] optimizedIntervals = QueryInterval.optimizeIntervals(combinedQueryIntervals.toArray(new QueryInterval[combinedQueryIntervals.size()]));
if (tumorSlicePath != null) {
writeToSlice(tumorSlicePath, tumorReader, optimizedIntervals);
}
if (refSlicePath != null) {
writeToSlice(refSlicePath, refReader, optimizedIntervals);
}
refReader.close();
tumorReader.close();
} catch (ParseException e) {
printHelpAndExit(options);
System.exit(1);
}
}
use of org.apache.commons.lang3.Range in project incubator-gobblin by apache.
the class SalesforceSource method getHistogramByProbing.
/**
* Get a histogram for the time range by probing to break down large buckets. Use count instead of
* querying if it is non-negative.
*/
private Histogram getHistogramByProbing(TableCountProbingContext probingContext, int count, long startEpoch, long endEpoch) {
Histogram histogram = new Histogram();
Map<String, String> values = new HashMap<>();
values.put("table", probingContext.entity);
values.put("column", probingContext.watermarkColumn);
values.put("greater", ">=");
values.put("less", "<");
StrSubstitutor sub = new StrSubstitutor(values);
getHistogramRecursively(probingContext, histogram, sub, values, count, startEpoch, endEpoch);
return histogram;
}
use of org.apache.commons.lang3.Range in project webanno by webanno.
the class TypeSystemAnalysis method analyzeRelationLayer.
private Optional<RelationDetails> analyzeRelationLayer(TypeSystem aTS, Type aType) {
// A UIMA type can be a relation layer if...
// ... there are exactly two non-primitive features
// ... both have the same range
// ... the range is a span layer
List<Feature> nonPrimitiveFeatures = aType.getFeatures().stream().filter(f -> !isBuiltInFeature(f)).filter(f -> !f.getRange().isPrimitive()).collect(Collectors.toList());
// ... there are exactly two non-primitive features
if (nonPrimitiveFeatures.size() != 2) {
return Optional.empty();
}
Feature ref1 = nonPrimitiveFeatures.get(0);
Feature ref2 = nonPrimitiveFeatures.get(1);
// Relations must use the names "Governor" and "Dependent" for its references because
// these names are hardcoded throughout WebAnno.
List<String> validNames = asList(FEAT_REL_SOURCE, FEAT_REL_TARGET);
if (!validNames.contains(ref1.getShortName()) || !validNames.contains(ref2.getShortName())) {
return Optional.empty();
}
// ... both have the same range
if (!ref1.getRange().getName().equals(ref2.getRange().getName())) {
return Optional.empty();
}
// Annotation should be fine.
if (!aTS.subsumes(aTS.getType(CAS.TYPE_NAME_ANNOTATION), ref1.getRange())) {
return Optional.empty();
}
RelationDetails details = new RelationDetails();
details.attachLayer = ref1.getRange().getName();
details.sourceFeature = WebAnnoConst.FEAT_REL_SOURCE;
details.targetFeature = WebAnnoConst.FEAT_REL_TARGET;
// Hm, ok, so this looks like a relation layer.
return Optional.of(details);
}
use of org.apache.commons.lang3.Range in project photon-model by vmware.
the class IPAddressAllocationTaskService method createNewIpAddressResource.
/**
* Creates new IP address resource with the specified IP address and moves the task state to completed,
* once it is done. Then it creates IP Address resource, it first creates it with AVAILABLE state. It then
* changes the state to ALLOCATED. This is done in two steps, due to concurrency issues. When multiple allocation
* requests are invoked at the same time, they end up creating single IP Address resource. Only one of them
* succeeds in their PATCH and the other one will retry the allocation operation.
*
* @param ipAddress IP address to use for the new IP address resource.
* @param subnetRangeResourceLink Subnet range resource link to use for the new IP address resource.
* @param connectedResourceLink Link to the resource this IP is assigned to.
*/
private DeferredResult<IPAddressState> createNewIpAddressResource(String ipAddress, String subnetRangeResourceLink, String connectedResourceLink, IPAddressAllocationContext context) {
IPAddressState ipAddressState = new IPAddressState();
ipAddressState.ipAddressStatus = IPAddressState.IPAddressStatus.AVAILABLE;
ipAddressState.ipAddress = ipAddress;
ipAddressState.subnetRangeLink = subnetRangeResourceLink;
ipAddressState.documentSelfLink = generateIPAddressDocumentSelfLink(subnetRangeResourceLink, ipAddress);
logInfo("Creating IPAddressState with IP %s, subnet %s, for connected resource " + "%s", ipAddress, subnetRangeResourceLink, connectedResourceLink);
return sendWithDeferredResult(Operation.createPost(this, IPAddressService.FACTORY_LINK).setBody(ipAddressState)).thenApply((out) -> {
IPAddressState availableIPAddress = out.getBody(IPAddressState.class);
availableIPAddress.ipAddressStatus = IPAddressState.IPAddressStatus.ALLOCATED;
availableIPAddress.connectedResourceLink = connectedResourceLink;
return availableIPAddress;
}).thenCompose((availableIPAddress) -> (updateExistingIpAddressResource(availableIPAddress, context)));
}
Aggregations