use of com.hartwig.hmftools.common.variant.structural.StructuralVariant in project hmftools by hartwigmedical.
the class ClusterVariantLegFactory method create.
@NotNull
public static List<ClusterVariantLeg> create(@NotNull final List<StructuralVariant> variants) {
final List<ClusterVariantLeg> positions = Lists.newArrayList();
for (StructuralVariant variant : variants) {
if (variant.type() != StructuralVariantType.INS) {
final StructuralVariantLeg start = variant.start();
positions.add(ImmutableClusterVariantLeg.builder().from(start).type(variant.type()).build());
final StructuralVariantLeg end = variant.end();
positions.add(ImmutableClusterVariantLeg.builder().from(end).type(variant.type()).build());
}
}
Collections.sort(positions);
return positions;
}
use of com.hartwig.hmftools.common.variant.structural.StructuralVariant in project hmftools by hartwigmedical.
the class FilteredSVWriter method generateFilteredSVFile.
private void generateFilteredSVFile(final List<StructuralVariant> variants, final String sampleId) {
try {
if (mFileWriter == null) {
String outputFileName = mOutputPath;
if (!outputFileName.endsWith("/"))
outputFileName += "/";
outputFileName += "svs_incl_filtered.csv";
Path outputFile = Paths.get(outputFileName);
mFileWriter = Files.newBufferedWriter(outputFile);
mFileWriter.write("SampleId,SvId,Type,ChrStart,PosStart,OrientStart,ChrEnd,PosEnd,OrientEnd,Filters\n");
}
for (final StructuralVariant var : variants) {
String filtersStr = var.filters();
if (filtersStr.equals("PASS") || filtersStr.equals("[]") || filtersStr.equals(".") || filtersStr.isEmpty()) {
LOGGER.debug("var({}) was a PASS", var.id());
filtersStr = "PASS";
} else {
// make tokenisable for further searches
LOGGER.debug("var({}) has filters: {}", var.id(), var.filters());
if (filtersStr.charAt(0) == '[')
filtersStr = filtersStr.substring(1);
if (filtersStr.charAt(filtersStr.length() - 1) == ']')
filtersStr = filtersStr.substring(0, filtersStr.length() - 1);
if (!filtersStr.isEmpty())
filtersStr = filtersStr.replace(",", ";");
}
mFileWriter.write(String.format("%s,%s,%s,%s,%d,%d,%s,%d,%d,%s", sampleId, var.id(), var.type(), var.chromosome(true), var.position(true), var.orientation(true), var.chromosome(false), var.position(false), var.orientation(false), filtersStr));
mFileWriter.newLine();
}
} catch (final IOException e) {
LOGGER.error("error writing to outputFile");
}
}
use of com.hartwig.hmftools.common.variant.structural.StructuralVariant in project hmftools by hartwigmedical.
the class FilteredSVWriter method processVcfFiles.
public void processVcfFiles() {
final List<File> vcfFiles;
final Path root = Paths.get(mVcfFileLocation);
try (final Stream<Path> stream = Files.walk(root, 5, FileVisitOption.FOLLOW_LINKS)) {
vcfFiles = stream.map(p -> p.toFile()).filter(p -> !p.isDirectory()).filter(p_ -> p_.getName().endsWith("somaticSV_bpi.vcf")).collect(Collectors.toList());
LOGGER.debug("found {} BPI VCF files", vcfFiles.size());
// add the filtered and passed SV entries for each file
for (final File vcfFile : vcfFiles) {
if (vcfFile.isDirectory())
continue;
if (!vcfFile.getPath().contains("structuralVariants/bpi/"))
continue;
if (!vcfFile.getName().endsWith("somaticSV_bpi.vcf"))
continue;
LOGGER.debug("BPI VCF path({}) file({})", vcfFile.getPath(), vcfFile.getName());
// extract sampleId from the directory or file name
String[] itemsStr = vcfFile.getName().split("_");
if (itemsStr.length != 4)
continue;
String sampleId = itemsStr[1];
LOGGER.debug("sampleId({})", sampleId);
List<StructuralVariant> variants = readFromVcf(vcfFile.getPath());
generateFilteredSVFile(variants, sampleId);
}
if (mFileWriter != null)
mFileWriter.close();
} catch (Exception e) {
}
}
use of com.hartwig.hmftools.common.variant.structural.StructuralVariant in project hmftools by hartwigmedical.
the class LoadStructuralVariants method main.
public static void main(@NotNull final String[] args) throws ParseException, IOException, SQLException {
final Options options = createBasicOptions();
final CommandLine cmd = createCommandLine(args, options);
boolean loadFromDB = cmd.hasOption(LOAD_FROM_DB);
final String tumorSample = cmd.getOptionValue(SAMPLE);
boolean runClustering = cmd.hasOption(CLUSTER_SVS);
boolean createFilteredPON = cmd.hasOption(WRITE_FILTERED_SVS);
boolean reannotateFromVCFs = cmd.hasOption(REANNOTATE_FROM_VCFS);
final DatabaseAccess dbAccess = cmd.hasOption(DB_URL) ? databaseAccess(cmd) : null;
if (cmd.hasOption(LOG_DEBUG)) {
Configurator.setRootLevel(Level.DEBUG);
}
if (createFilteredPON) {
LOGGER.info("reading VCF files including filtered SVs");
FilteredSVWriter filteredSvWriter = new FilteredSVWriter(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
filteredSvWriter.processVcfFiles();
LOGGER.info("reads complete");
return;
}
if (reannotateFromVCFs) {
LOGGER.info("reading VCF files to re-annotate");
// for now just re-read the VCFs and write out new annotations to file
// may later on turn into update SQL once clustering does the same
SvVCFAnnotator vcfAnnotator = new SvVCFAnnotator(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
vcfAnnotator.processVcfFiles();
return;
}
StructuralVariantClustering svClusterer = null;
if (runClustering) {
LOGGER.info("will run clustering logic");
SvClusteringConfig clusteringConfig = new SvClusteringConfig();
clusteringConfig.setOutputCsvPath(cmd.getOptionValue(DATA_OUTPUT_PATH));
clusteringConfig.setBaseDistance(Integer.parseInt(cmd.getOptionValue(CLUSTER_BASE_DISTANCE, "0")));
clusteringConfig.setUseCombinedOutputFile(tumorSample.equals("*"));
clusteringConfig.setSvPONFile(cmd.getOptionValue(SV_PON_FILE, ""));
clusteringConfig.setFragileSiteFile(cmd.getOptionValue(FRAGILE_SITE_FILE, ""));
clusteringConfig.setLineElementFile(cmd.getOptionValue(LINE_ELEMENT_FILE, ""));
clusteringConfig.setExternalAnnotationsFile(cmd.getOptionValue(EXTERNAL_ANNOTATIONS, ""));
svClusterer = new StructuralVariantClustering(clusteringConfig);
}
if (createFilteredPON) {
LOGGER.info("reading VCF file including filtered SVs");
FilteredSVWriter filteredSvWriter = new FilteredSVWriter(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
filteredSvWriter.processVcfFiles();
LOGGER.info("reads complete");
return;
}
if (!loadFromDB) {
boolean skipAnnotations = cmd.hasOption(SKIP_ANNOTATIONS);
LOGGER.info("reading VCF File");
final List<StructuralVariant> variants = readFromVcf(cmd.getOptionValue(VCF_FILE), true);
LOGGER.info("enriching structural variants based on purple data");
final List<EnrichedStructuralVariant> enrichedVariantWithoutPrimaryId = enrichStructuralVariants(variants, dbAccess, tumorSample);
LOGGER.info("persisting variants to database");
dbAccess.writeStructuralVariants(tumorSample, enrichedVariantWithoutPrimaryId);
// NEVA: We read after we write to populate the primaryId field
final List<EnrichedStructuralVariant> enrichedVariants = dbAccess.readStructuralVariants(tumorSample);
LOGGER.info("initialising MqSql annotator");
final VariantAnnotator annotator = MySQLAnnotator.make("jdbc:" + cmd.getOptionValue(ENSEMBL_DB));
LOGGER.info("loading Cosmic Fusion data");
final CosmicFusionModel cosmicGeneFusions = CosmicFusions.readFromCSV(cmd.getOptionValue(FUSION_CSV));
final StructuralVariantAnalyzer analyzer = new StructuralVariantAnalyzer(annotator, HmfGenePanelSupplier.hmfPanelGeneList(), cosmicGeneFusions);
LOGGER.info("analyzing structural variants for impact via disruptions and fusions");
final StructuralVariantAnalysis analysis = analyzer.run(enrichedVariants, skipAnnotations);
if (runClustering) {
svClusterer.loadFromEnrichedSVs(tumorSample, enrichedVariants);
svClusterer.runClustering();
}
LOGGER.info("persisting annotations to database");
final StructuralVariantAnnotationDAO annotationDAO = new StructuralVariantAnnotationDAO(dbAccess.context());
annotationDAO.write(analysis);
} else {
// KODU: Below assert feels somewhat risky!?
assert runClustering;
List<String> samplesList = Lists.newArrayList();
if (tumorSample.isEmpty() || tumorSample.equals("*")) {
samplesList = getStructuralVariantSamplesList(dbAccess);
} else if (tumorSample.contains(",")) {
String[] tumorList = tumorSample.split(",");
samplesList = Arrays.stream(tumorList).collect(Collectors.toList());
} else {
samplesList.add(tumorSample);
}
int count = 0;
for (final String sample : samplesList) {
++count;
LOGGER.info("clustering for sample({}), total({})", sample, count);
List<SvClusterData> svClusterData = queryStructuralVariantData(dbAccess, sample);
svClusterer.loadFromDatabase(sample, svClusterData);
// LOGGER.info("data loaded", sample, count);
svClusterer.runClustering();
// LOGGER.info("clustering complete", sample, count);
// if(count > 10)
// break;
}
}
svClusterer.close();
LOGGER.info("run complete");
}
use of com.hartwig.hmftools.common.variant.structural.StructuralVariant in project hmftools by hartwigmedical.
the class StructuralVariantImpliedTest method testNonSymmetricMultiPass.
@Test
public void testNonSymmetricMultiPass() {
final StructuralVariant firstSV = sv(1001, 4001, StructuralVariantType.DEL, 0.25, 0.25);
final StructuralVariant secondSV = sv(2001, 3001, StructuralVariantType.DEL, 1 / 3d, 1 / 3d);
final CombinedRegion firstCN = copyNumber(1, 1000, 40, SegmentSupport.NONE);
final CombinedRegion secondCN = copyNumber(1001, 2000, 0, SegmentSupport.DEL);
final CombinedRegion thirdCN = copyNumber(2001, 3000, 0, SegmentSupport.DEL);
final CombinedRegion forthCN = copyNumber(3001, 4000, 0, SegmentSupport.DEL);
final CombinedRegion fifthCN = copyNumber(4001, 5000, 10, SegmentSupport.NONE);
final List<StructuralVariant> svs = Lists.newArrayList(firstSV, secondSV);
final ListMultimap<String, CombinedRegion> copyNumbers = copyNumbers(firstCN, secondCN, thirdCN, forthCN, fifthCN);
final StructuralVariantImplied victim = new StructuralVariantImplied(PURE);
final List<CombinedRegion> result = victim.svImpliedCopyNumber(svs, copyNumbers).get(CHROMOSOME);
assertEquals(5, result.size());
assertEquals(40.00, result.get(0).tumorCopyNumber(), EPSILON);
assertEquals(33.75, result.get(1).tumorCopyNumber(), EPSILON);
assertEquals(12.50, result.get(2).tumorCopyNumber(), EPSILON);
assertEquals(03.75, result.get(3).tumorCopyNumber(), EPSILON);
assertEquals(10.00, result.get(4).tumorCopyNumber(), EPSILON);
}
Aggregations