Search in sources :

Example 1 with CVRFusionRecord

use of org.cbioportal.cmo.pipelines.cvr.model.staging.CVRFusionRecord in project cmo-pipelines by knowledgesystems.

the class CVRFusionDataReader method processFusionsFile.

private void processFusionsFile() {
    File fusionFile = new File(stagingDirectory, CVRUtilities.FUSION_FILE);
    if (!fusionFile.exists()) {
        LOG.info("File does not exist - skipping data loading from fusion file: " + fusionFile.getName());
        return;
    }
    LOG.info("Loading fusion data from: " + fusionFile.getName());
    DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer(DelimitedLineTokenizer.DELIMITER_TAB);
    DefaultLineMapper<CVRFusionRecord> mapper = new DefaultLineMapper<>();
    mapper.setLineTokenizer(tokenizer);
    mapper.setFieldSetMapper(new CVRFusionFieldSetMapper());
    FlatFileItemReader<CVRFusionRecord> reader = new FlatFileItemReader<>();
    reader.setResource(new FileSystemResource(fusionFile));
    reader.setLineMapper(mapper);
    reader.setLinesToSkip(1);
    reader.open(new ExecutionContext());
    try {
        CVRFusionRecord to_add;
        while ((to_add = reader.read()) != null) {
            if (!cvrSampleListUtil.getNewDmpSamples().contains(to_add.getTumor_Sample_Barcode()) && to_add.getTumor_Sample_Barcode() != null) {
                String fusion = to_add.getHugo_Symbol() + "|" + to_add.getTumor_Sample_Barcode() + "|" + to_add.getFusion();
                if (!fusionsSeen.contains(fusion)) {
                    fusionRecords.add(to_add);
                    fusionsSeen.add(fusion);
                }
            }
        }
    } catch (Exception e) {
        LOG.info("Error loading data from fusion file: " + fusionFile.getName());
        throw new ItemStreamException(e);
    }
    reader.close();
}
Also used : DelimitedLineTokenizer(org.springframework.batch.item.file.transform.DelimitedLineTokenizer) FlatFileItemReader(org.springframework.batch.item.file.FlatFileItemReader) DefaultLineMapper(org.springframework.batch.item.file.mapping.DefaultLineMapper) FileSystemResource(org.springframework.core.io.FileSystemResource) CVRFusionRecord(org.cbioportal.cmo.pipelines.cvr.model.staging.CVRFusionRecord)

Example 2 with CVRFusionRecord

use of org.cbioportal.cmo.pipelines.cvr.model.staging.CVRFusionRecord in project cmo-pipelines by knowledgesystems.

the class CVRFusionFieldSetMapper method mapFieldSet.

@Override
public CVRFusionRecord mapFieldSet(FieldSet fs) throws BindException {
    CVRFusionRecord record = new CVRFusionRecord();
    List<String> fields = CVRFusionRecord.getFieldNames();
    for (int i = 0; i < fields.size(); i++) {
        String field = fields.get(i);
        try {
            record.getClass().getMethod("set" + field, String.class).invoke(record, fs.readString(i).trim());
        } catch (Exception e) {
            if (e.getClass().equals(NoSuchMethodException.class)) {
                log.info("No set method exists for " + field);
            }
        }
    }
    return record;
}
Also used : CVRFusionRecord(org.cbioportal.cmo.pipelines.cvr.model.staging.CVRFusionRecord) BindException(org.springframework.validation.BindException)

Example 3 with CVRFusionRecord

use of org.cbioportal.cmo.pipelines.cvr.model.staging.CVRFusionRecord in project cmo-pipelines by knowledgesystems.

the class GMLFusionDataReader method processGmlFusionsFile.

private void processGmlFusionsFile() {
    File gmlFusionsFile = new File(privateDirectory, CVRUtilities.FUSION_GML_FILE);
    if (!gmlFusionsFile.exists()) {
        LOG.info("File does not exist - skipping data loading from germline fusions file: " + gmlFusionsFile.getName());
        return;
    }
    LOG.info("Loading germline fusions data from: " + gmlFusionsFile.getName());
    DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer(DelimitedLineTokenizer.DELIMITER_TAB);
    DefaultLineMapper<CVRFusionRecord> mapper = new DefaultLineMapper<>();
    mapper.setLineTokenizer(tokenizer);
    mapper.setFieldSetMapper(new CVRGMLFusionFieldSetMapper());
    FlatFileItemReader<CVRFusionRecord> reader = new FlatFileItemReader<>();
    reader.setResource(new FileSystemResource(gmlFusionsFile));
    reader.setLineMapper(mapper);
    reader.setLinesToSkip(1);
    reader.open(new ExecutionContext());
    try {
        CVRFusionRecord to_add;
        while ((to_add = reader.read()) != null) {
            String patientId = cvrSampleListUtil.getSamplePatientId(to_add.getTumor_Sample_Barcode());
            // and whether patient is in new dmp germline patients (to prevent duplicates)
            if (!Strings.isNullOrEmpty(patientId) && !cvrSampleListUtil.getNewDmpGmlPatients().contains(patientId)) {
                String fusion = getGmlFusionKey(to_add);
                if (gmlFusionsSeen.add(fusion)) {
                    gmlFusionRecords.add(to_add);
                }
            }
        }
    } catch (Exception e) {
        LOG.info("Error loading data from germline fusions file: " + gmlFusionsFile.getName());
        throw new ItemStreamException(e);
    }
    reader.close();
}
Also used : DelimitedLineTokenizer(org.springframework.batch.item.file.transform.DelimitedLineTokenizer) FlatFileItemReader(org.springframework.batch.item.file.FlatFileItemReader) DefaultLineMapper(org.springframework.batch.item.file.mapping.DefaultLineMapper) FileSystemResource(org.springframework.core.io.FileSystemResource) CVRFusionRecord(org.cbioportal.cmo.pipelines.cvr.model.staging.CVRFusionRecord)

Example 4 with CVRFusionRecord

use of org.cbioportal.cmo.pipelines.cvr.model.staging.CVRFusionRecord in project cmo-pipelines by knowledgesystems.

the class CVRGMLFusionFieldSetMapper method mapFieldSet.

@Override
public CVRFusionRecord mapFieldSet(FieldSet fs) throws BindException {
    CVRFusionRecord record = new CVRFusionRecord();
    List<String> fields = CVRFusionRecord.getGermlineFieldNames();
    for (int i = 0; i < fields.size(); i++) {
        String field = fields.get(i);
        try {
            record.getClass().getMethod("set" + field, String.class).invoke(record, fs.readString(i).trim());
        } catch (Exception e) {
            if (e.getClass().equals(NoSuchMethodException.class)) {
                log.info("No set method exists for " + field);
            }
        }
    }
    return record;
}
Also used : CVRFusionRecord(org.cbioportal.cmo.pipelines.cvr.model.staging.CVRFusionRecord) BindException(org.springframework.validation.BindException)

Example 5 with CVRFusionRecord

use of org.cbioportal.cmo.pipelines.cvr.model.staging.CVRFusionRecord in project cmo-pipelines by knowledgesystems.

the class CVRFusionDataReader method processJsonFile.

private void processJsonFile() {
    CVRData cvrData = new CVRData();
    // load cvr data from cvr_data.json file
    File cvrFile = new File(privateDirectory, cvrUtilities.CVR_FILE);
    try {
        cvrData = cvrUtilities.readJson(cvrFile);
    } catch (IOException e) {
        LOG.error("Error reading file: " + cvrFile.getName());
        throw new ItemStreamException(e);
    }
    for (CVRMergedResult result : cvrData.getResults()) {
        String sampleId = result.getMetaData().getDmpSampleId();
        List<CVRSvVariant> variants = result.getSvVariants();
        for (CVRSvVariant variant : variants) {
            // skip records where both gene 1 and gene 2 are null or empty
            if (Strings.isNullOrEmpty(variant.getSite1_Gene()) && Strings.isNullOrEmpty(variant.getSite2_Gene())) {
                LOG.warn("Skipping fusion record where genes are missing for sample '" + sampleId + "'" + (Strings.isNullOrEmpty(variant.getAnnotation()) ? "" : " - record annotation: " + variant.getAnnotation().replaceAll("[\\t\\n\\r]+", " ")));
                continue;
            }
            CVRFusionRecord record = null;
            try {
                record = new CVRFusionRecord(variant, sampleId, false);
            } catch (NullPointerException e) {
                // log error if both variant gene sites are not null
                if (variant.getSite1_Gene() != null && variant.getSite2_Gene() != null) {
                    LOG.error("Error creating fusion record for sample, gene1-gene2 event: " + sampleId + ", " + variant.getSite1_Gene() + "-" + variant.getSite2_Gene());
                }
                continue;
            }
            String fusion = record.getHugo_Symbol() + "|" + record.getTumor_Sample_Barcode() + "|" + record.getFusion();
            CVRFusionRecord recordReversed = new CVRFusionRecord(variant, sampleId, true);
            if (!fusionsSeen.contains(fusion)) {
                fusionRecords.add(record);
                fusionsSeen.add(fusion);
            }
            if (!variant.getSite1_Gene().equals(variant.getSite2_Gene())) {
                fusion = recordReversed.getHugo_Symbol() + "|" + recordReversed.getTumor_Sample_Barcode() + "|" + recordReversed.getFusion();
                if (!fusionsSeen.contains(fusion)) {
                    fusionRecords.add(recordReversed);
                    fusionsSeen.add(fusion);
                }
            }
        }
    }
}
Also used : CVRFusionRecord(org.cbioportal.cmo.pipelines.cvr.model.staging.CVRFusionRecord)

Aggregations

CVRFusionRecord (org.cbioportal.cmo.pipelines.cvr.model.staging.CVRFusionRecord)6 FlatFileItemReader (org.springframework.batch.item.file.FlatFileItemReader)2 DefaultLineMapper (org.springframework.batch.item.file.mapping.DefaultLineMapper)2 DelimitedLineTokenizer (org.springframework.batch.item.file.transform.DelimitedLineTokenizer)2 FileSystemResource (org.springframework.core.io.FileSystemResource)2 BindException (org.springframework.validation.BindException)2 GMLCnvIntragenicVariant (org.cbioportal.cmo.pipelines.cvr.model.GMLCnvIntragenicVariant)1 GMLData (org.cbioportal.cmo.pipelines.cvr.model.GMLData)1 GMLResult (org.cbioportal.cmo.pipelines.cvr.model.GMLResult)1