Search in sources :

Example 1 with StructuralVariant

use of com.hartwig.hmftools.common.variant.structural.StructuralVariant in project hmftools by hartwigmedical.

the class ClusterVariantLegFactory method create.

@NotNull
public static List<ClusterVariantLeg> create(@NotNull final List<StructuralVariant> variants) {
    final List<ClusterVariantLeg> positions = Lists.newArrayList();
    for (StructuralVariant variant : variants) {
        if (variant.type() != StructuralVariantType.INS) {
            final StructuralVariantLeg start = variant.start();
            positions.add(ImmutableClusterVariantLeg.builder().from(start).type(variant.type()).build());
            final StructuralVariantLeg end = variant.end();
            positions.add(ImmutableClusterVariantLeg.builder().from(end).type(variant.type()).build());
        }
    }
    Collections.sort(positions);
    return positions;
}
Also used : StructuralVariantLeg(com.hartwig.hmftools.common.variant.structural.StructuralVariantLeg) StructuralVariant(com.hartwig.hmftools.common.variant.structural.StructuralVariant) NotNull(org.jetbrains.annotations.NotNull)

Example 2 with StructuralVariant

use of com.hartwig.hmftools.common.variant.structural.StructuralVariant in project hmftools by hartwigmedical.

the class FilteredSVWriter method generateFilteredSVFile.

private void generateFilteredSVFile(final List<StructuralVariant> variants, final String sampleId) {
    try {
        if (mFileWriter == null) {
            String outputFileName = mOutputPath;
            if (!outputFileName.endsWith("/"))
                outputFileName += "/";
            outputFileName += "svs_incl_filtered.csv";
            Path outputFile = Paths.get(outputFileName);
            mFileWriter = Files.newBufferedWriter(outputFile);
            mFileWriter.write("SampleId,SvId,Type,ChrStart,PosStart,OrientStart,ChrEnd,PosEnd,OrientEnd,Filters\n");
        }
        for (final StructuralVariant var : variants) {
            String filtersStr = var.filters();
            if (filtersStr.equals("PASS") || filtersStr.equals("[]") || filtersStr.equals(".") || filtersStr.isEmpty()) {
                LOGGER.debug("var({}) was a PASS", var.id());
                filtersStr = "PASS";
            } else {
                // make tokenisable for further searches
                LOGGER.debug("var({}) has filters: {}", var.id(), var.filters());
                if (filtersStr.charAt(0) == '[')
                    filtersStr = filtersStr.substring(1);
                if (filtersStr.charAt(filtersStr.length() - 1) == ']')
                    filtersStr = filtersStr.substring(0, filtersStr.length() - 1);
                if (!filtersStr.isEmpty())
                    filtersStr = filtersStr.replace(",", ";");
            }
            mFileWriter.write(String.format("%s,%s,%s,%s,%d,%d,%s,%d,%d,%s", sampleId, var.id(), var.type(), var.chromosome(true), var.position(true), var.orientation(true), var.chromosome(false), var.position(false), var.orientation(false), filtersStr));
            mFileWriter.newLine();
        }
    } catch (final IOException e) {
        LOGGER.error("error writing to outputFile");
    }
}
Also used : Path(java.nio.file.Path) IOException(java.io.IOException) StructuralVariant(com.hartwig.hmftools.common.variant.structural.StructuralVariant)

Example 3 with StructuralVariant

use of com.hartwig.hmftools.common.variant.structural.StructuralVariant in project hmftools by hartwigmedical.

the class FilteredSVWriter method processVcfFiles.

public void processVcfFiles() {
    final List<File> vcfFiles;
    final Path root = Paths.get(mVcfFileLocation);
    try (final Stream<Path> stream = Files.walk(root, 5, FileVisitOption.FOLLOW_LINKS)) {
        vcfFiles = stream.map(p -> p.toFile()).filter(p -> !p.isDirectory()).filter(p_ -> p_.getName().endsWith("somaticSV_bpi.vcf")).collect(Collectors.toList());
        LOGGER.debug("found {} BPI VCF files", vcfFiles.size());
        // add the filtered and passed SV entries for each file
        for (final File vcfFile : vcfFiles) {
            if (vcfFile.isDirectory())
                continue;
            if (!vcfFile.getPath().contains("structuralVariants/bpi/"))
                continue;
            if (!vcfFile.getName().endsWith("somaticSV_bpi.vcf"))
                continue;
            LOGGER.debug("BPI VCF path({}) file({})", vcfFile.getPath(), vcfFile.getName());
            // extract sampleId from the directory or file name
            String[] itemsStr = vcfFile.getName().split("_");
            if (itemsStr.length != 4)
                continue;
            String sampleId = itemsStr[1];
            LOGGER.debug("sampleId({})", sampleId);
            List<StructuralVariant> variants = readFromVcf(vcfFile.getPath());
            generateFilteredSVFile(variants, sampleId);
        }
        if (mFileWriter != null)
            mFileWriter.close();
    } catch (Exception e) {
    }
}
Also used : Path(java.nio.file.Path) LineIterator(htsjdk.tribble.readers.LineIterator) Files(java.nio.file.Files) BufferedWriter(java.io.BufferedWriter) StandardOpenOption(java.nio.file.StandardOpenOption) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) StructuralVariant(com.hartwig.hmftools.common.variant.structural.StructuralVariant) File(java.io.File) List(java.util.List) AbstractFeatureReader(htsjdk.tribble.AbstractFeatureReader) Stream(java.util.stream.Stream) Logger(org.apache.logging.log4j.Logger) FileVisitOption(java.nio.file.FileVisitOption) Paths(java.nio.file.Paths) StructuralVariantFactory(com.hartwig.hmftools.common.variant.structural.StructuralVariantFactory) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFCodec(htsjdk.variant.vcf.VCFCodec) Path(java.nio.file.Path) LogManager(org.apache.logging.log4j.LogManager) File(java.io.File) StructuralVariant(com.hartwig.hmftools.common.variant.structural.StructuralVariant) IOException(java.io.IOException)

Example 4 with StructuralVariant

use of com.hartwig.hmftools.common.variant.structural.StructuralVariant in project hmftools by hartwigmedical.

the class LoadStructuralVariants method main.

public static void main(@NotNull final String[] args) throws ParseException, IOException, SQLException {
    final Options options = createBasicOptions();
    final CommandLine cmd = createCommandLine(args, options);
    boolean loadFromDB = cmd.hasOption(LOAD_FROM_DB);
    final String tumorSample = cmd.getOptionValue(SAMPLE);
    boolean runClustering = cmd.hasOption(CLUSTER_SVS);
    boolean createFilteredPON = cmd.hasOption(WRITE_FILTERED_SVS);
    boolean reannotateFromVCFs = cmd.hasOption(REANNOTATE_FROM_VCFS);
    final DatabaseAccess dbAccess = cmd.hasOption(DB_URL) ? databaseAccess(cmd) : null;
    if (cmd.hasOption(LOG_DEBUG)) {
        Configurator.setRootLevel(Level.DEBUG);
    }
    if (createFilteredPON) {
        LOGGER.info("reading VCF files including filtered SVs");
        FilteredSVWriter filteredSvWriter = new FilteredSVWriter(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
        filteredSvWriter.processVcfFiles();
        LOGGER.info("reads complete");
        return;
    }
    if (reannotateFromVCFs) {
        LOGGER.info("reading VCF files to re-annotate");
        // for now just re-read the VCFs and write out new annotations to file
        // may later on turn into update SQL once clustering does the same
        SvVCFAnnotator vcfAnnotator = new SvVCFAnnotator(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
        vcfAnnotator.processVcfFiles();
        return;
    }
    StructuralVariantClustering svClusterer = null;
    if (runClustering) {
        LOGGER.info("will run clustering logic");
        SvClusteringConfig clusteringConfig = new SvClusteringConfig();
        clusteringConfig.setOutputCsvPath(cmd.getOptionValue(DATA_OUTPUT_PATH));
        clusteringConfig.setBaseDistance(Integer.parseInt(cmd.getOptionValue(CLUSTER_BASE_DISTANCE, "0")));
        clusteringConfig.setUseCombinedOutputFile(tumorSample.equals("*"));
        clusteringConfig.setSvPONFile(cmd.getOptionValue(SV_PON_FILE, ""));
        clusteringConfig.setFragileSiteFile(cmd.getOptionValue(FRAGILE_SITE_FILE, ""));
        clusteringConfig.setLineElementFile(cmd.getOptionValue(LINE_ELEMENT_FILE, ""));
        clusteringConfig.setExternalAnnotationsFile(cmd.getOptionValue(EXTERNAL_ANNOTATIONS, ""));
        svClusterer = new StructuralVariantClustering(clusteringConfig);
    }
    if (createFilteredPON) {
        LOGGER.info("reading VCF file including filtered SVs");
        FilteredSVWriter filteredSvWriter = new FilteredSVWriter(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
        filteredSvWriter.processVcfFiles();
        LOGGER.info("reads complete");
        return;
    }
    if (!loadFromDB) {
        boolean skipAnnotations = cmd.hasOption(SKIP_ANNOTATIONS);
        LOGGER.info("reading VCF File");
        final List<StructuralVariant> variants = readFromVcf(cmd.getOptionValue(VCF_FILE), true);
        LOGGER.info("enriching structural variants based on purple data");
        final List<EnrichedStructuralVariant> enrichedVariantWithoutPrimaryId = enrichStructuralVariants(variants, dbAccess, tumorSample);
        LOGGER.info("persisting variants to database");
        dbAccess.writeStructuralVariants(tumorSample, enrichedVariantWithoutPrimaryId);
        // NEVA: We read after we write to populate the primaryId field
        final List<EnrichedStructuralVariant> enrichedVariants = dbAccess.readStructuralVariants(tumorSample);
        LOGGER.info("initialising MqSql annotator");
        final VariantAnnotator annotator = MySQLAnnotator.make("jdbc:" + cmd.getOptionValue(ENSEMBL_DB));
        LOGGER.info("loading Cosmic Fusion data");
        final CosmicFusionModel cosmicGeneFusions = CosmicFusions.readFromCSV(cmd.getOptionValue(FUSION_CSV));
        final StructuralVariantAnalyzer analyzer = new StructuralVariantAnalyzer(annotator, HmfGenePanelSupplier.hmfPanelGeneList(), cosmicGeneFusions);
        LOGGER.info("analyzing structural variants for impact via disruptions and fusions");
        final StructuralVariantAnalysis analysis = analyzer.run(enrichedVariants, skipAnnotations);
        if (runClustering) {
            svClusterer.loadFromEnrichedSVs(tumorSample, enrichedVariants);
            svClusterer.runClustering();
        }
        LOGGER.info("persisting annotations to database");
        final StructuralVariantAnnotationDAO annotationDAO = new StructuralVariantAnnotationDAO(dbAccess.context());
        annotationDAO.write(analysis);
    } else {
        // KODU: Below assert feels somewhat risky!?
        assert runClustering;
        List<String> samplesList = Lists.newArrayList();
        if (tumorSample.isEmpty() || tumorSample.equals("*")) {
            samplesList = getStructuralVariantSamplesList(dbAccess);
        } else if (tumorSample.contains(",")) {
            String[] tumorList = tumorSample.split(",");
            samplesList = Arrays.stream(tumorList).collect(Collectors.toList());
        } else {
            samplesList.add(tumorSample);
        }
        int count = 0;
        for (final String sample : samplesList) {
            ++count;
            LOGGER.info("clustering for sample({}), total({})", sample, count);
            List<SvClusterData> svClusterData = queryStructuralVariantData(dbAccess, sample);
            svClusterer.loadFromDatabase(sample, svClusterData);
            // LOGGER.info("data loaded", sample, count);
            svClusterer.runClustering();
        // LOGGER.info("clustering complete", sample, count);
        // if(count > 10)
        // break;
        }
    }
    svClusterer.close();
    LOGGER.info("run complete");
}
Also used : Options(org.apache.commons.cli.Options) EnrichedStructuralVariant(com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant) StructuralVariantAnalysis(com.hartwig.hmftools.svannotation.analysis.StructuralVariantAnalysis) StructuralVariantClustering(com.hartwig.hmftools.svannotation.analysis.StructuralVariantClustering) StructuralVariantAnalyzer(com.hartwig.hmftools.svannotation.analysis.StructuralVariantAnalyzer) StructuralVariant(com.hartwig.hmftools.common.variant.structural.StructuralVariant) EnrichedStructuralVariant(com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant) StructuralVariantAnnotationDAO(com.hartwig.hmftools.svannotation.dao.StructuralVariantAnnotationDAO) CommandLine(org.apache.commons.cli.CommandLine) CosmicFusionModel(com.hartwig.hmftools.common.cosmic.fusions.CosmicFusionModel) DatabaseAccess(com.hartwig.hmftools.patientdb.dao.DatabaseAccess) SvClusterData(com.hartwig.hmftools.svannotation.analysis.SvClusterData) SvClusteringConfig(com.hartwig.hmftools.svannotation.analysis.SvClusteringConfig)

Example 5 with StructuralVariant

use of com.hartwig.hmftools.common.variant.structural.StructuralVariant in project hmftools by hartwigmedical.

the class StructuralVariantImpliedTest method testNonSymmetricMultiPass.

@Test
public void testNonSymmetricMultiPass() {
    final StructuralVariant firstSV = sv(1001, 4001, StructuralVariantType.DEL, 0.25, 0.25);
    final StructuralVariant secondSV = sv(2001, 3001, StructuralVariantType.DEL, 1 / 3d, 1 / 3d);
    final CombinedRegion firstCN = copyNumber(1, 1000, 40, SegmentSupport.NONE);
    final CombinedRegion secondCN = copyNumber(1001, 2000, 0, SegmentSupport.DEL);
    final CombinedRegion thirdCN = copyNumber(2001, 3000, 0, SegmentSupport.DEL);
    final CombinedRegion forthCN = copyNumber(3001, 4000, 0, SegmentSupport.DEL);
    final CombinedRegion fifthCN = copyNumber(4001, 5000, 10, SegmentSupport.NONE);
    final List<StructuralVariant> svs = Lists.newArrayList(firstSV, secondSV);
    final ListMultimap<String, CombinedRegion> copyNumbers = copyNumbers(firstCN, secondCN, thirdCN, forthCN, fifthCN);
    final StructuralVariantImplied victim = new StructuralVariantImplied(PURE);
    final List<CombinedRegion> result = victim.svImpliedCopyNumber(svs, copyNumbers).get(CHROMOSOME);
    assertEquals(5, result.size());
    assertEquals(40.00, result.get(0).tumorCopyNumber(), EPSILON);
    assertEquals(33.75, result.get(1).tumorCopyNumber(), EPSILON);
    assertEquals(12.50, result.get(2).tumorCopyNumber(), EPSILON);
    assertEquals(03.75, result.get(3).tumorCopyNumber(), EPSILON);
    assertEquals(10.00, result.get(4).tumorCopyNumber(), EPSILON);
}
Also used : StructuralVariant(com.hartwig.hmftools.common.variant.structural.StructuralVariant) Test(org.junit.Test) PurpleDatamodelTest(com.hartwig.hmftools.common.purple.PurpleDatamodelTest)

Aggregations

StructuralVariant (com.hartwig.hmftools.common.variant.structural.StructuralVariant)15 PurpleDatamodelTest (com.hartwig.hmftools.common.purple.PurpleDatamodelTest)7 Test (org.junit.Test)7 IOException (java.io.IOException)4 Path (java.nio.file.Path)4 NotNull (org.jetbrains.annotations.NotNull)4 StructuralVariantLeg (com.hartwig.hmftools.common.variant.structural.StructuralVariantLeg)3 List (java.util.List)3 Collectors (java.util.stream.Collectors)3 EnrichedStructuralVariant (com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant)2 Optional (java.util.Optional)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 ArrayListMultimap (com.google.common.collect.ArrayListMultimap)1 ListMultimap (com.google.common.collect.ListMultimap)1 Lists (com.google.common.collect.Lists)1 Multimap (com.google.common.collect.Multimap)1 ProductionRunContextFactory (com.hartwig.hmftools.common.context.ProductionRunContextFactory)1 RunContext (com.hartwig.hmftools.common.context.RunContext)1 CosmicFusionModel (com.hartwig.hmftools.common.cosmic.fusions.CosmicFusionModel)1 TumorLocationDoidMapping (com.hartwig.hmftools.common.ecrf.doid.TumorLocationDoidMapping)1