use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project Charset by CharsetMC.
the class ModPathIterator method getValidPaths.
public static Collection<Pair<String, Path>> getValidPaths(String prefix) {
List<Pair<String, Path>> paths = new ArrayList<>();
for (ModContainer container : Loader.instance().getActiveModList()) {
File file = container.getSource();
try {
if (file.exists()) {
if (file.isDirectory()) {
File f = new File(file, prefix.replaceAll("%1", container.getModId()));
if (f.exists()) {
paths.add(Pair.of(container.getModId(), f.toPath()));
}
} else {
FileSystem fileSystem = FileSystems.newFileSystem(file.toPath(), null);
Path p = fileSystem.getPath(prefix.replaceAll("%1", container.getModId()));
if (Files.exists(p)) {
paths.add(Pair.of(container.getModId(), p));
}
}
}
} catch (NoSuchFileException | FileSystemNotFoundException e) {
// Don't worry~
} catch (Exception e) {
e.printStackTrace();
}
}
return paths;
}
use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project hmftools by hartwigmedical.
the class Filter method getFilters.
static Collection<String> getFilters(final HMFVariantContext ctx, final SampleStats tumorStats, final SampleStats refStats, final Pair<Location, Location> breakpoints, final float contamination) {
final int MIN_ANCHOR_LENGTH = 30;
final List<Filters> filters = Lists.newArrayList();
if (Stream.of(tumorStats.BP1_Stats, tumorStats.BP2_Stats).mapToInt(s -> s.PR_Only_Normal + s.PR_SR_Normal + s.PR_Only_Support + s.PR_SR_Support).anyMatch(i -> i < 10)) {
filters.add(Filters.MinDepth);
}
final int tumor_SR = Stream.of(tumorStats.BP1_Stats, tumorStats.BP2_Stats).mapToInt(Filter::supportSR).sum();
if (ctx.isInsert()) {
// no PR/SR checks
} else if (ctx.isShortVariant()) {
// short variant logic
final boolean bothSidesHaveSR = Stream.of(tumorStats.BP1_Stats, tumorStats.BP2_Stats).allMatch(s -> supportSR(s) > 0);
final boolean anchorLengthOkay = tumorStats.SR_Evidence.stream().anyMatch(p -> Stream.of(p.getLeft(), p.getRight()).anyMatch(r -> r.getAlignmentEnd() - r.getAlignmentStart() >= MIN_ANCHOR_LENGTH));
if (!bothSidesHaveSR) {
filters.add(Filters.SRSupportZero);
} else if (!anchorLengthOkay) {
filters.add(Filters.MinAnchorLength);
}
// must not have SR support in normal
final int ref_SR = Stream.of(refStats.BP1_Stats, refStats.BP2_Stats).mapToInt(Filter::supportSR).sum();
final int allowableNormalSupport = (int) (contamination * tumor_SR);
if (ref_SR > allowableNormalSupport) {
filters.add(Filters.SRNormalSupport);
}
} else {
// we only need to check BP1 as BP1 PR+PRSR == BP2 PR+PRSR
final int allowableNormalSupport = (int) (contamination * supportPR(tumorStats.BP1_Stats));
if (supportPR(refStats.BP1_Stats) > allowableNormalSupport) {
filters.add(Filters.PRNormalSupport);
}
final boolean anchorLengthOkay = tumorStats.PR_Evidence.stream().anyMatch(p -> Stream.of(p.getLeft(), p.getRight()).allMatch(r -> r.getAlignmentEnd() - r.getAlignmentStart() >= MIN_ANCHOR_LENGTH));
// only applicable for longer variants
final int tumor_PR = Stream.of(tumorStats.BP1_Stats, tumorStats.BP2_Stats).mapToInt(Filter::supportPR).sum();
if (tumor_PR == 0) {
filters.add(Filters.PRSupportZero);
} else if (!anchorLengthOkay) {
filters.add(Filters.MinAnchorLength);
}
}
// we must adjust from Manta breakpoint convention to our clipping position convention
final List<Location> adjusted_bp = Arrays.asList(breakpoints.getLeft().add(ctx.OrientationBP1), breakpoints.getRight().add(ctx.OrientationBP2));
final Set<String> concordant_reads = Sets.newHashSet();
for (final Location bp : adjusted_bp) {
for (final ClipStats t : tumorStats.Sample_Clipping.getSequencesAt(bp)) {
if (t.LongestClipSequence.length() < 5) {
continue;
}
final String tumorSeq = t.Left ? t.LongestClipSequence.substring(t.LongestClipSequence.length() - 5) : t.LongestClipSequence.substring(0, 5);
for (final ClipStats r : refStats.Sample_Clipping.getSequencesAt(bp)) {
if (t.Left != r.Left) {
continue;
} else if (r.LongestClipSequence.length() < 5) {
continue;
}
if (t.Left) {
if (tumorSeq.equals(r.LongestClipSequence.substring(r.LongestClipSequence.length() - 5))) {
concordant_reads.addAll(r.SupportingReads);
}
} else {
if (tumorSeq.equals(r.LongestClipSequence.substring(0, 5))) {
concordant_reads.addAll(r.SupportingReads);
}
}
}
}
}
if (concordant_reads.size() > (int) (contamination * tumor_SR)) {
filters.add(Filters.ClippingConcordance);
}
final Set<String> merged = Sets.newHashSet(ctx.Filter);
merged.addAll(filters.stream().map(Filters::toString).collect(Collectors.toList()));
return merged;
}
use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project hmftools by hartwigmedical.
the class MNVDetectorApplication method processVariants.
private static void processVariants(@NotNull final String filePath, @NotNull final String outputVcf, @NotNull final String outputBed, boolean strelka) throws IOException {
final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false);
final VCFHeader outputHeader = strelka ? generateOutputHeader(vcfReader.getFileHeader(), StrelkaPostProcess.TUMOR_GENOTYPE) : vcfReader.getFileHeader();
final BufferedWriter bedWriter = new BufferedWriter(new FileWriter(outputBed, false));
final VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVcf).setReferenceDictionary(outputHeader.getSequenceDictionary()).build();
vcfWriter.writeHeader(outputHeader);
Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty());
for (final VariantContext rawVariant : vcfReader) {
final VariantContext variant = strelka ? StrelkaPostProcess.simplifyVariant(rawVariant, StrelkaPostProcess.TUMOR_GENOTYPE) : rawVariant;
final PotentialMNVRegion potentialMNVregion = outputPair.getLeft();
outputPair = MNVDetector.fitsMNVRegion(potentialMNVregion, variant);
outputPair.getRight().ifPresent(mnvRegion -> filterMnvRegion(mnvRegion).ifPresent(filteredRegion -> writeMnvRegionToFiles(filteredRegion, vcfWriter, bedWriter, "\n")));
}
filterMnvRegion(outputPair.getLeft()).ifPresent(mnvRegion -> writeMnvRegionToFiles(mnvRegion, vcfWriter, bedWriter, ""));
vcfWriter.close();
vcfReader.close();
bedWriter.close();
LOGGER.info("Written output variants to {}. Written bed regions to {}.", outputVcf, outputBed);
}
use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project hmftools by hartwigmedical.
the class BamSlicerApplication method sliceFromURLs.
private static void sliceFromURLs(@NotNull final URL indexUrl, @NotNull final URL bamUrl, @NotNull final CommandLine cmd) throws IOException {
final File indexFile = downloadIndex(indexUrl);
indexFile.deleteOnExit();
final SamReader reader = SamReaderFactory.makeDefault().open(SamInputResource.of(bamUrl).index(indexFile));
final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT)));
final BAMIndex bamIndex = new DiskBasedBAMFileIndex(indexFile, reader.getFileHeader().getSequenceDictionary(), false);
final Optional<Pair<QueryInterval[], BAMFileSpan>> queryIntervalsAndSpan = queryIntervalsAndSpan(reader, bamIndex, cmd);
final Optional<Chunk> unmappedChunk = getUnmappedChunk(bamIndex, HttpUtils.getHeaderField(bamUrl, "Content-Length"), cmd);
final List<Chunk> sliceChunks = sliceChunks(queryIntervalsAndSpan, unmappedChunk);
final SamReader cachingReader = createCachingReader(indexFile, bamUrl, cmd, sliceChunks);
queryIntervalsAndSpan.ifPresent(pair -> {
LOGGER.info("Slicing bam on bed regions...");
final CloseableIterator<SAMRecord> bedIterator = getIterator(cachingReader, pair.getKey(), pair.getValue().toCoordinateArray());
writeToSlice(writer, bedIterator);
LOGGER.info("Done writing bed slices.");
});
unmappedChunk.ifPresent(chunk -> {
LOGGER.info("Slicing unmapped reads...");
final CloseableIterator<SAMRecord> unmappedIterator = cachingReader.queryUnmapped();
writeToSlice(writer, unmappedIterator);
LOGGER.info("Done writing unmapped reads.");
});
reader.close();
writer.close();
cachingReader.close();
}
use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project systemml by apache.
the class PlanSelectionFuseCostBased method rGetPlanCosts.
private static double rGetPlanCosts(CPlanMemoTable memo, Hop current, HashSet<Pair<Long, Long>> visited, HashSet<Long> partition, ArrayList<Long> M, boolean[] plan, HashMap<Long, Double> computeCosts, CostVector costsCurrent, TemplateType currentType) {
// memoization per hop id and cost vector to account for redundant
// computation without double counting materialized results or compute
// costs of complex operation DAGs within a single fused operator
Pair<Long, Long> tag = Pair.of(current.getHopID(), (costsCurrent == null) ? 0 : costsCurrent.ID);
if (visited.contains(tag))
return 0;
visited.add(tag);
// open template if necessary, including memoization
// under awareness of current plan choice
MemoTableEntry best = null;
boolean opened = false;
if (memo.contains(current.getHopID())) {
if (currentType == null) {
best = memo.get(current.getHopID()).stream().filter(p -> p.isValid()).filter(p -> hasNoRefToMaterialization(p, M, plan)).min(new BasicPlanComparator()).orElse(null);
opened = true;
} else {
best = memo.get(current.getHopID()).stream().filter(p -> p.type == currentType || p.type == TemplateType.CELL).filter(p -> hasNoRefToMaterialization(p, M, plan)).min(Comparator.comparing(p -> 7 - ((p.type == currentType) ? 4 : 0) - p.countPlanRefs())).orElse(null);
}
}
// create new cost vector if opened, initialized with write costs
CostVector costVect = !opened ? costsCurrent : new CostVector(Math.max(current.getDim1(), 1) * Math.max(current.getDim2(), 1));
// add compute costs of current operator to costs vector
if (partition.contains(current.getHopID()))
costVect.computeCosts += computeCosts.get(current.getHopID());
// process children recursively
double costs = 0;
for (int i = 0; i < current.getInput().size(); i++) {
Hop c = current.getInput().get(i);
if (best != null && best.isPlanRef(i))
costs += rGetPlanCosts(memo, c, visited, partition, M, plan, computeCosts, costVect, best.type);
else if (best != null && isImplicitlyFused(current, i, best.type))
costVect.addInputSize(c.getInput().get(0).getHopID(), Math.max(c.getDim1(), 1) * Math.max(c.getDim2(), 1));
else {
// include children and I/O costs
costs += rGetPlanCosts(memo, c, visited, partition, M, plan, computeCosts, null, null);
if (costVect != null && c.getDataType().isMatrix())
costVect.addInputSize(c.getHopID(), Math.max(c.getDim1(), 1) * Math.max(c.getDim2(), 1));
}
}
// add costs for opened fused operator
if (partition.contains(current.getHopID())) {
if (opened) {
if (LOG.isTraceEnabled())
LOG.trace("Cost vector for fused operator (hop " + current.getHopID() + "): " + costVect);
// time for output write
costs += costVect.outSize * 8 / WRITE_BANDWIDTH;
costs += Math.max(costVect.computeCosts * costVect.getMaxInputSize() / COMPUTE_BANDWIDTH, costVect.getSumInputSizes() * 8 / READ_BANDWIDTH);
} else // add costs for non-partition read in the middle of fused operator
if (hasNonPartitionConsumer(current, partition)) {
costs += rGetPlanCosts(memo, current, visited, partition, M, plan, computeCosts, null, null);
}
}
// sanity check non-negative costs
if (costs < 0 || Double.isNaN(costs) || Double.isInfinite(costs))
throw new RuntimeException("Wrong cost estimate: " + costs);
return costs;
}
Aggregations