Search in sources :

Example 56 with Pair

use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project Charset by CharsetMC.

the class ModPathIterator method getValidPaths.

public static Collection<Pair<String, Path>> getValidPaths(String prefix) {
    List<Pair<String, Path>> paths = new ArrayList<>();
    for (ModContainer container : Loader.instance().getActiveModList()) {
        File file = container.getSource();
        try {
            if (file.exists()) {
                if (file.isDirectory()) {
                    File f = new File(file, prefix.replaceAll("%1", container.getModId()));
                    if (f.exists()) {
                        paths.add(Pair.of(container.getModId(), f.toPath()));
                    }
                } else {
                    FileSystem fileSystem = FileSystems.newFileSystem(file.toPath(), null);
                    Path p = fileSystem.getPath(prefix.replaceAll("%1", container.getModId()));
                    if (Files.exists(p)) {
                        paths.add(Pair.of(container.getModId(), p));
                    }
                }
            }
        } catch (NoSuchFileException | FileSystemNotFoundException e) {
        // Don't worry~
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    return paths;
}
Also used : ModContainer(net.minecraftforge.fml.common.ModContainer) ArrayList(java.util.ArrayList) File(java.io.File) Pair(org.apache.commons.lang3.tuple.Pair)

Example 57 with Pair

use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project hmftools by hartwigmedical.

the class Filter method getFilters.

static Collection<String> getFilters(final HMFVariantContext ctx, final SampleStats tumorStats, final SampleStats refStats, final Pair<Location, Location> breakpoints, final float contamination) {
    final int MIN_ANCHOR_LENGTH = 30;
    final List<Filters> filters = Lists.newArrayList();
    if (Stream.of(tumorStats.BP1_Stats, tumorStats.BP2_Stats).mapToInt(s -> s.PR_Only_Normal + s.PR_SR_Normal + s.PR_Only_Support + s.PR_SR_Support).anyMatch(i -> i < 10)) {
        filters.add(Filters.MinDepth);
    }
    final int tumor_SR = Stream.of(tumorStats.BP1_Stats, tumorStats.BP2_Stats).mapToInt(Filter::supportSR).sum();
    if (ctx.isInsert()) {
    // no PR/SR checks
    } else if (ctx.isShortVariant()) {
        // short variant logic
        final boolean bothSidesHaveSR = Stream.of(tumorStats.BP1_Stats, tumorStats.BP2_Stats).allMatch(s -> supportSR(s) > 0);
        final boolean anchorLengthOkay = tumorStats.SR_Evidence.stream().anyMatch(p -> Stream.of(p.getLeft(), p.getRight()).anyMatch(r -> r.getAlignmentEnd() - r.getAlignmentStart() >= MIN_ANCHOR_LENGTH));
        if (!bothSidesHaveSR) {
            filters.add(Filters.SRSupportZero);
        } else if (!anchorLengthOkay) {
            filters.add(Filters.MinAnchorLength);
        }
        // must not have SR support in normal
        final int ref_SR = Stream.of(refStats.BP1_Stats, refStats.BP2_Stats).mapToInt(Filter::supportSR).sum();
        final int allowableNormalSupport = (int) (contamination * tumor_SR);
        if (ref_SR > allowableNormalSupport) {
            filters.add(Filters.SRNormalSupport);
        }
    } else {
        // we only need to check BP1 as BP1 PR+PRSR == BP2 PR+PRSR
        final int allowableNormalSupport = (int) (contamination * supportPR(tumorStats.BP1_Stats));
        if (supportPR(refStats.BP1_Stats) > allowableNormalSupport) {
            filters.add(Filters.PRNormalSupport);
        }
        final boolean anchorLengthOkay = tumorStats.PR_Evidence.stream().anyMatch(p -> Stream.of(p.getLeft(), p.getRight()).allMatch(r -> r.getAlignmentEnd() - r.getAlignmentStart() >= MIN_ANCHOR_LENGTH));
        // only applicable for longer variants
        final int tumor_PR = Stream.of(tumorStats.BP1_Stats, tumorStats.BP2_Stats).mapToInt(Filter::supportPR).sum();
        if (tumor_PR == 0) {
            filters.add(Filters.PRSupportZero);
        } else if (!anchorLengthOkay) {
            filters.add(Filters.MinAnchorLength);
        }
    }
    // we must adjust from Manta breakpoint convention to our clipping position convention
    final List<Location> adjusted_bp = Arrays.asList(breakpoints.getLeft().add(ctx.OrientationBP1), breakpoints.getRight().add(ctx.OrientationBP2));
    final Set<String> concordant_reads = Sets.newHashSet();
    for (final Location bp : adjusted_bp) {
        for (final ClipStats t : tumorStats.Sample_Clipping.getSequencesAt(bp)) {
            if (t.LongestClipSequence.length() < 5) {
                continue;
            }
            final String tumorSeq = t.Left ? t.LongestClipSequence.substring(t.LongestClipSequence.length() - 5) : t.LongestClipSequence.substring(0, 5);
            for (final ClipStats r : refStats.Sample_Clipping.getSequencesAt(bp)) {
                if (t.Left != r.Left) {
                    continue;
                } else if (r.LongestClipSequence.length() < 5) {
                    continue;
                }
                if (t.Left) {
                    if (tumorSeq.equals(r.LongestClipSequence.substring(r.LongestClipSequence.length() - 5))) {
                        concordant_reads.addAll(r.SupportingReads);
                    }
                } else {
                    if (tumorSeq.equals(r.LongestClipSequence.substring(0, 5))) {
                        concordant_reads.addAll(r.SupportingReads);
                    }
                }
            }
        }
    }
    if (concordant_reads.size() > (int) (contamination * tumor_SR)) {
        filters.add(Filters.ClippingConcordance);
    }
    final Set<String> merged = Sets.newHashSet(ctx.Filter);
    merged.addAll(filters.stream().map(Filters::toString).collect(Collectors.toList()));
    return merged;
}
Also used : VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) Arrays(java.util.Arrays) VCFHeader(htsjdk.variant.vcf.VCFHeader) Collection(java.util.Collection) ClipStats(com.hartwig.hmftools.breakpointinspector.clipping.ClipStats) Set(java.util.Set) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) List(java.util.List) Stream(java.util.stream.Stream) Lists(com.google.common.collect.Lists) Pair(org.apache.commons.lang3.tuple.Pair) Collections(java.util.Collections) ClipStats(com.hartwig.hmftools.breakpointinspector.clipping.ClipStats)

Example 58 with Pair

use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project hmftools by hartwigmedical.

the class MNVDetectorApplication method processVariants.

private static void processVariants(@NotNull final String filePath, @NotNull final String outputVcf, @NotNull final String outputBed, boolean strelka) throws IOException {
    final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false);
    final VCFHeader outputHeader = strelka ? generateOutputHeader(vcfReader.getFileHeader(), StrelkaPostProcess.TUMOR_GENOTYPE) : vcfReader.getFileHeader();
    final BufferedWriter bedWriter = new BufferedWriter(new FileWriter(outputBed, false));
    final VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVcf).setReferenceDictionary(outputHeader.getSequenceDictionary()).build();
    vcfWriter.writeHeader(outputHeader);
    Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty());
    for (final VariantContext rawVariant : vcfReader) {
        final VariantContext variant = strelka ? StrelkaPostProcess.simplifyVariant(rawVariant, StrelkaPostProcess.TUMOR_GENOTYPE) : rawVariant;
        final PotentialMNVRegion potentialMNVregion = outputPair.getLeft();
        outputPair = MNVDetector.fitsMNVRegion(potentialMNVregion, variant);
        outputPair.getRight().ifPresent(mnvRegion -> filterMnvRegion(mnvRegion).ifPresent(filteredRegion -> writeMnvRegionToFiles(filteredRegion, vcfWriter, bedWriter, "\n")));
    }
    filterMnvRegion(outputPair.getLeft()).ifPresent(mnvRegion -> writeMnvRegionToFiles(mnvRegion, vcfWriter, bedWriter, ""));
    vcfWriter.close();
    vcfReader.close();
    bedWriter.close();
    LOGGER.info("Written output variants to {}. Written bed regions to {}.", outputVcf, outputBed);
}
Also used : PotentialMNVRegion(com.hartwig.hmftools.strelka.mnv.PotentialMNVRegion) CommandLineParser(org.apache.commons.cli.CommandLineParser) BufferedWriter(java.io.BufferedWriter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) FileWriter(java.io.FileWriter) Options(org.apache.commons.cli.Options) IOException(java.io.IOException) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) HelpFormatter(org.apache.commons.cli.HelpFormatter) File(java.io.File) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) DefaultParser(org.apache.commons.cli.DefaultParser) Logger(org.apache.logging.log4j.Logger) Pair(org.apache.commons.lang3.tuple.Pair) ParseException(org.apache.commons.cli.ParseException) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) Optional(java.util.Optional) CommandLine(org.apache.commons.cli.CommandLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) NotNull(org.jetbrains.annotations.NotNull) MNVDetector(com.hartwig.hmftools.strelka.mnv.MNVDetector) LogManager(org.apache.logging.log4j.LogManager) StrelkaPostProcessApplication.generateOutputHeader(com.hartwig.hmftools.strelka.StrelkaPostProcessApplication.generateOutputHeader) PotentialMNVRegion(com.hartwig.hmftools.strelka.mnv.PotentialMNVRegion) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) Optional(java.util.Optional) FileWriter(java.io.FileWriter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) BufferedWriter(java.io.BufferedWriter)

Example 59 with Pair

use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project hmftools by hartwigmedical.

the class BamSlicerApplication method sliceFromURLs.

private static void sliceFromURLs(@NotNull final URL indexUrl, @NotNull final URL bamUrl, @NotNull final CommandLine cmd) throws IOException {
    final File indexFile = downloadIndex(indexUrl);
    indexFile.deleteOnExit();
    final SamReader reader = SamReaderFactory.makeDefault().open(SamInputResource.of(bamUrl).index(indexFile));
    final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT)));
    final BAMIndex bamIndex = new DiskBasedBAMFileIndex(indexFile, reader.getFileHeader().getSequenceDictionary(), false);
    final Optional<Pair<QueryInterval[], BAMFileSpan>> queryIntervalsAndSpan = queryIntervalsAndSpan(reader, bamIndex, cmd);
    final Optional<Chunk> unmappedChunk = getUnmappedChunk(bamIndex, HttpUtils.getHeaderField(bamUrl, "Content-Length"), cmd);
    final List<Chunk> sliceChunks = sliceChunks(queryIntervalsAndSpan, unmappedChunk);
    final SamReader cachingReader = createCachingReader(indexFile, bamUrl, cmd, sliceChunks);
    queryIntervalsAndSpan.ifPresent(pair -> {
        LOGGER.info("Slicing bam on bed regions...");
        final CloseableIterator<SAMRecord> bedIterator = getIterator(cachingReader, pair.getKey(), pair.getValue().toCoordinateArray());
        writeToSlice(writer, bedIterator);
        LOGGER.info("Done writing bed slices.");
    });
    unmappedChunk.ifPresent(chunk -> {
        LOGGER.info("Slicing unmapped reads...");
        final CloseableIterator<SAMRecord> unmappedIterator = cachingReader.queryUnmapped();
        writeToSlice(writer, unmappedIterator);
        LOGGER.info("Done writing unmapped reads.");
    });
    reader.close();
    writer.close();
    cachingReader.close();
}
Also used : SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) QueryInterval(htsjdk.samtools.QueryInterval) Chunk(htsjdk.samtools.Chunk) SamReader(htsjdk.samtools.SamReader) DiskBasedBAMFileIndex(htsjdk.samtools.DiskBasedBAMFileIndex) SAMRecord(htsjdk.samtools.SAMRecord) BAMIndex(htsjdk.samtools.BAMIndex) File(java.io.File) Pair(org.apache.commons.lang3.tuple.Pair)

Example 60 with Pair

use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project systemml by apache.

the class PlanSelectionFuseCostBased method rGetPlanCosts.

private static double rGetPlanCosts(CPlanMemoTable memo, Hop current, HashSet<Pair<Long, Long>> visited, HashSet<Long> partition, ArrayList<Long> M, boolean[] plan, HashMap<Long, Double> computeCosts, CostVector costsCurrent, TemplateType currentType) {
    // memoization per hop id and cost vector to account for redundant
    // computation without double counting materialized results or compute
    // costs of complex operation DAGs within a single fused operator
    Pair<Long, Long> tag = Pair.of(current.getHopID(), (costsCurrent == null) ? 0 : costsCurrent.ID);
    if (visited.contains(tag))
        return 0;
    visited.add(tag);
    // open template if necessary, including memoization
    // under awareness of current plan choice
    MemoTableEntry best = null;
    boolean opened = false;
    if (memo.contains(current.getHopID())) {
        if (currentType == null) {
            best = memo.get(current.getHopID()).stream().filter(p -> p.isValid()).filter(p -> hasNoRefToMaterialization(p, M, plan)).min(new BasicPlanComparator()).orElse(null);
            opened = true;
        } else {
            best = memo.get(current.getHopID()).stream().filter(p -> p.type == currentType || p.type == TemplateType.CELL).filter(p -> hasNoRefToMaterialization(p, M, plan)).min(Comparator.comparing(p -> 7 - ((p.type == currentType) ? 4 : 0) - p.countPlanRefs())).orElse(null);
        }
    }
    // create new cost vector if opened, initialized with write costs
    CostVector costVect = !opened ? costsCurrent : new CostVector(Math.max(current.getDim1(), 1) * Math.max(current.getDim2(), 1));
    // add compute costs of current operator to costs vector
    if (partition.contains(current.getHopID()))
        costVect.computeCosts += computeCosts.get(current.getHopID());
    // process children recursively
    double costs = 0;
    for (int i = 0; i < current.getInput().size(); i++) {
        Hop c = current.getInput().get(i);
        if (best != null && best.isPlanRef(i))
            costs += rGetPlanCosts(memo, c, visited, partition, M, plan, computeCosts, costVect, best.type);
        else if (best != null && isImplicitlyFused(current, i, best.type))
            costVect.addInputSize(c.getInput().get(0).getHopID(), Math.max(c.getDim1(), 1) * Math.max(c.getDim2(), 1));
        else {
            // include children and I/O costs
            costs += rGetPlanCosts(memo, c, visited, partition, M, plan, computeCosts, null, null);
            if (costVect != null && c.getDataType().isMatrix())
                costVect.addInputSize(c.getHopID(), Math.max(c.getDim1(), 1) * Math.max(c.getDim2(), 1));
        }
    }
    // add costs for opened fused operator
    if (partition.contains(current.getHopID())) {
        if (opened) {
            if (LOG.isTraceEnabled())
                LOG.trace("Cost vector for fused operator (hop " + current.getHopID() + "): " + costVect);
            // time for output write
            costs += costVect.outSize * 8 / WRITE_BANDWIDTH;
            costs += Math.max(costVect.computeCosts * costVect.getMaxInputSize() / COMPUTE_BANDWIDTH, costVect.getSumInputSizes() * 8 / READ_BANDWIDTH);
        } else // add costs for non-partition read in the middle of fused operator
        if (hasNonPartitionConsumer(current, partition)) {
            costs += rGetPlanCosts(memo, current, visited, partition, M, plan, computeCosts, null, null);
        }
    }
    // sanity check non-negative costs
    if (costs < 0 || Double.isNaN(costs) || Double.isInfinite(costs))
        throw new RuntimeException("Wrong cost estimate: " + costs);
    return costs;
}
Also used : TemplateRow(org.apache.sysml.hops.codegen.template.TemplateRow) Arrays(java.util.Arrays) IndexingOp(org.apache.sysml.hops.IndexingOp) HashMap(java.util.HashMap) AggUnaryOp(org.apache.sysml.hops.AggUnaryOp) TemplateOuterProduct(org.apache.sysml.hops.codegen.template.TemplateOuterProduct) ParameterizedBuiltinOp(org.apache.sysml.hops.ParameterizedBuiltinOp) AggOp(org.apache.sysml.hops.Hop.AggOp) ArrayList(java.util.ArrayList) LiteralOp(org.apache.sysml.hops.LiteralOp) HashSet(java.util.HashSet) MemoTableEntry(org.apache.sysml.hops.codegen.template.CPlanMemoTable.MemoTableEntry) Pair(org.apache.commons.lang3.tuple.Pair) ReorgOp(org.apache.sysml.hops.ReorgOp) IDSequence(org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence) CollectionUtils(org.apache.commons.collections.CollectionUtils) InfrastructureAnalyzer(org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer) AggBinaryOp(org.apache.sysml.hops.AggBinaryOp) Statistics(org.apache.sysml.utils.Statistics) TernaryOp(org.apache.sysml.hops.TernaryOp) Iterator(java.util.Iterator) Collection(java.util.Collection) TemplateType(org.apache.sysml.hops.codegen.template.TemplateBase.TemplateType) BinaryOp(org.apache.sysml.hops.BinaryOp) TemplateUtils(org.apache.sysml.hops.codegen.template.TemplateUtils) Collectors(java.util.stream.Collectors) Direction(org.apache.sysml.hops.Hop.Direction) Hop(org.apache.sysml.hops.Hop) List(java.util.List) Entry(java.util.Map.Entry) DMLScript(org.apache.sysml.api.DMLScript) Log(org.apache.commons.logging.Log) LogFactory(org.apache.commons.logging.LogFactory) UtilFunctions(org.apache.sysml.runtime.util.UtilFunctions) Comparator(java.util.Comparator) Collections(java.util.Collections) HopRewriteUtils(org.apache.sysml.hops.rewrite.HopRewriteUtils) UnaryOp(org.apache.sysml.hops.UnaryOp) CPlanMemoTable(org.apache.sysml.hops.codegen.template.CPlanMemoTable) MemoTableEntry(org.apache.sysml.hops.codegen.template.CPlanMemoTable.MemoTableEntry) Hop(org.apache.sysml.hops.Hop)

Aggregations

Pair (org.apache.commons.lang3.tuple.Pair)685 ArrayList (java.util.ArrayList)209 List (java.util.List)154 Test (org.junit.Test)150 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)142 HashMap (java.util.HashMap)123 Collectors (java.util.stream.Collectors)123 Map (java.util.Map)112 Message (com.microsoft.azure.sdk.iot.device.Message)71 IOException (java.io.IOException)70 MutablePair (org.apache.commons.lang3.tuple.MutablePair)64 java.util (java.util)55 IotHubTransportMessage (com.microsoft.azure.sdk.iot.device.transport.IotHubTransportMessage)52 Set (java.util.Set)49 StringUtils (org.apache.commons.lang3.StringUtils)48 File (java.io.File)46 Optional (java.util.Optional)45 Arrays (java.util.Arrays)44 HashSet (java.util.HashSet)40 Test (org.junit.jupiter.api.Test)39