Search in sources :

Example 6 with DefaultLineMapper

use of org.springframework.batch.item.file.mapping.DefaultLineMapper in project cmo-pipelines by knowledgesystems.

the class GMLFusionDataReader method processGmlFusionsFile.

private void processGmlFusionsFile() {
    File gmlFusionsFile = new File(privateDirectory, CVRUtilities.FUSION_GML_FILE);
    if (!gmlFusionsFile.exists()) {
        LOG.info("File does not exist - skipping data loading from germline fusions file: " + gmlFusionsFile.getName());
        return;
    }
    LOG.info("Loading germline fusions data from: " + gmlFusionsFile.getName());
    DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer(DelimitedLineTokenizer.DELIMITER_TAB);
    DefaultLineMapper<CVRFusionRecord> mapper = new DefaultLineMapper<>();
    mapper.setLineTokenizer(tokenizer);
    mapper.setFieldSetMapper(new CVRGMLFusionFieldSetMapper());
    FlatFileItemReader<CVRFusionRecord> reader = new FlatFileItemReader<>();
    reader.setResource(new FileSystemResource(gmlFusionsFile));
    reader.setLineMapper(mapper);
    reader.setLinesToSkip(1);
    reader.open(new ExecutionContext());
    try {
        CVRFusionRecord to_add;
        while ((to_add = reader.read()) != null) {
            String patientId = cvrSampleListUtil.getSamplePatientId(to_add.getTumor_Sample_Barcode());
            // and whether patient is in new dmp germline patients (to prevent duplicates)
            if (!Strings.isNullOrEmpty(patientId) && !cvrSampleListUtil.getNewDmpGmlPatients().contains(patientId)) {
                String fusion = getGmlFusionKey(to_add);
                if (gmlFusionsSeen.add(fusion)) {
                    gmlFusionRecords.add(to_add);
                }
            }
        }
    } catch (Exception e) {
        LOG.info("Error loading data from germline fusions file: " + gmlFusionsFile.getName());
        throw new ItemStreamException(e);
    }
    reader.close();
}
Also used : DelimitedLineTokenizer(org.springframework.batch.item.file.transform.DelimitedLineTokenizer) FlatFileItemReader(org.springframework.batch.item.file.FlatFileItemReader) DefaultLineMapper(org.springframework.batch.item.file.mapping.DefaultLineMapper) FileSystemResource(org.springframework.core.io.FileSystemResource) CVRFusionRecord(org.cbioportal.cmo.pipelines.cvr.model.staging.CVRFusionRecord)

Example 7 with DefaultLineMapper

use of org.springframework.batch.item.file.mapping.DefaultLineMapper in project cmo-pipelines by knowledgesystems.

the class GMLClinicalTasklet method loadClinicalDataGmlPatientSampleMapping.

private void loadClinicalDataGmlPatientSampleMapping(File clinicalFile) throws Exception {
    // load clinical file and create patient-sample mapping
    if (!clinicalFile.exists()) {
        throw new ItemStreamException("Could not find clinical file: " + clinicalFile.getName());
    } else {
        LOG.info("Loading clinical data from: " + clinicalFile.getName());
        DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer(DelimitedLineTokenizer.DELIMITER_TAB);
        DefaultLineMapper<CVRClinicalRecord> mapper = new DefaultLineMapper<>();
        mapper.setLineTokenizer(tokenizer);
        mapper.setFieldSetMapper(new CVRClinicalFieldSetMapper());
        FlatFileItemReader<CVRClinicalRecord> reader = new FlatFileItemReader<>();
        reader.setResource(new FileSystemResource(clinicalFile));
        reader.setLineMapper(mapper);
        reader.setLinesToSkip(1);
        reader.open(new ExecutionContext());
        CVRClinicalRecord to_add;
        while ((to_add = reader.read()) != null) {
            cvrSampleListUtil.updateGmlPatientSampleMap(to_add.getPATIENT_ID(), to_add.getSAMPLE_ID());
            clinicalRecords.add(to_add);
            cvrSampleListUtil.addPortalSample(to_add.getSAMPLE_ID());
        }
        reader.close();
    }
    // updates portalSamplesNotInDmpList and dmpSamplesNotInPortal sample lists
    // portalSamples list is only updated if threshold check for max num samples to remove passes
    cvrSampleListUtil.updateSampleLists();
    updateSamplesRemovedList();
}
Also used : DelimitedLineTokenizer(org.springframework.batch.item.file.transform.DelimitedLineTokenizer) FlatFileItemReader(org.springframework.batch.item.file.FlatFileItemReader) ExecutionContext(org.springframework.batch.item.ExecutionContext) CVRClinicalRecord(org.cbioportal.cmo.pipelines.cvr.model.staging.CVRClinicalRecord) DefaultLineMapper(org.springframework.batch.item.file.mapping.DefaultLineMapper) FileSystemResource(org.springframework.core.io.FileSystemResource) ItemStreamException(org.springframework.batch.item.ItemStreamException)

Example 8 with DefaultLineMapper

use of org.springframework.batch.item.file.mapping.DefaultLineMapper in project cmo-pipelines by knowledgesystems.

the class LinkedMskimpactCaseReader method loadExistingLinkedIds.

private void loadExistingLinkedIds() {
    File stagingFile = new File(stagingDirectory, cvrUtilities.CORRESPONDING_ID_FILE);
    if (!stagingFile.exists()) {
        LOG.warn("File does not exist - skipping data loading from linked ARCHER samples file: " + stagingFile.getName());
        return;
    }
    LOG.info("Loading linked ARCHER sample data from: " + stagingFile.getName());
    DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer(DelimitedLineTokenizer.DELIMITER_TAB);
    DefaultLineMapper<LinkedMskimpactCaseRecord> mapper = new DefaultLineMapper<>();
    mapper.setLineTokenizer(tokenizer);
    mapper.setFieldSetMapper(new LinkedImpactCaseFieldSetMapper());
    FlatFileItemReader<LinkedMskimpactCaseRecord> reader = new FlatFileItemReader<>();
    reader.setResource(new FileSystemResource(stagingFile));
    reader.setLineMapper(mapper);
    reader.setLinesToSkip(1);
    reader.open(new ExecutionContext());
    try {
        LinkedMskimpactCaseRecord to_add;
        while ((to_add = reader.read()) != null) {
            // only add samples that are not in the new dmp sample list
            if (!cvrSampleListUtil.getNewDmpSamples().contains(to_add.getSAMPLE_ID())) {
                compiledLinkedIdsMap.put(to_add.getSAMPLE_ID(), to_add);
            }
            // keep a backup in case JSON returned dropped all "linked_mskimpact_case" data
            existingLinkedIdsMap.put(to_add.getSAMPLE_ID(), to_add);
        }
    } catch (Exception e) {
        LOG.error("Error reading linked ARCHER sample data from file: " + stagingFile.getName());
        throw new ItemStreamException(e);
    } finally {
        reader.close();
    }
}
Also used : DelimitedLineTokenizer(org.springframework.batch.item.file.transform.DelimitedLineTokenizer) FlatFileItemReader(org.springframework.batch.item.file.FlatFileItemReader) DefaultLineMapper(org.springframework.batch.item.file.mapping.DefaultLineMapper) LinkedMskimpactCaseRecord(org.cbioportal.cmo.pipelines.cvr.model.staging.LinkedMskimpactCaseRecord) FileSystemResource(org.springframework.core.io.FileSystemResource)

Example 9 with DefaultLineMapper

use of org.springframework.batch.item.file.mapping.DefaultLineMapper in project cmo-pipelines by knowledgesystems.

the class GMLMutationDataReader method loadExistingMutationRecords.

private void loadExistingMutationRecords() throws Exception {
    log.info("Loading mutation data from: " + mutationFile.getName());
    DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer(DelimitedLineTokenizer.DELIMITER_TAB);
    DefaultLineMapper<MutationRecord> mapper = new DefaultLineMapper<>();
    mapper.setLineTokenizer(tokenizer);
    mapper.setFieldSetMapper(new CVRMutationFieldSetMapper());
    FlatFileItemReader<MutationRecord> reader = new FlatFileItemReader<>();
    reader.setResource(new FileSystemResource(mutationFile));
    reader.setLineMapper(mapper);
    reader.setLinesToSkip(1);
    reader.setSkippedLinesCallback(new LineCallbackHandler() {

        @Override
        public void handleLine(String line) {
            tokenizer.setNames(line.split("\t"));
        }
    });
    reader.open(new ExecutionContext());
    List<MutationRecord> recordsToAnnotate = new ArrayList<>();
    MutationRecord to_add;
    while ((to_add = reader.read()) != null && to_add.getTUMOR_SAMPLE_BARCODE() != null) {
        // skip if record already seen or if current record is a germline sample and record is a GERMLINE variant
        if (cvrUtilities.isDuplicateRecord(to_add, mutationMap.get(to_add.getTUMOR_SAMPLE_BARCODE())) || (germlineSamples.contains(to_add.getTUMOR_SAMPLE_BARCODE()) && to_add.getMUTATION_STATUS().equals("GERMLINE"))) {
            continue;
        }
        recordsToAnnotate.add(to_add);
    }
    reader.close();
    log.info("Loaded " + String.valueOf(recordsToAnnotate.size()) + " records from MAF");
    annotateRecordsWithPOST(recordsToAnnotate, forceAnnotation);
}
Also used : DelimitedLineTokenizer(org.springframework.batch.item.file.transform.DelimitedLineTokenizer) DefaultLineMapper(org.springframework.batch.item.file.mapping.DefaultLineMapper) FileSystemResource(org.springframework.core.io.FileSystemResource)

Example 10 with DefaultLineMapper

use of org.springframework.batch.item.file.mapping.DefaultLineMapper in project cmo-pipelines by knowledgesystems.

the class CVRSegDataReader method open.

@Override
public void open(ExecutionContext ec) throws ItemStreamException {
    CVRData cvrData = new CVRData();
    // load cvr data from cvr_data.json file
    File cvrFile = new File(privateDirectory, cvrUtilities.CVR_FILE);
    try {
        cvrData = cvrUtilities.readJson(cvrFile);
    } catch (IOException e) {
        log.error("Error reading file: " + cvrFile.getName());
        throw new ItemStreamException(e);
    }
    // only read from seg file if exists
    File segFile = new File(stagingDirectory, studyId + cvrUtilities.SEG_FILE);
    if (!segFile.exists()) {
        log.error("File does not exist - skipping data loading from SEG file: " + segFile.getName());
    } else {
        log.info("Loading SEG data from: " + segFile.getName());
        DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer(DelimitedLineTokenizer.DELIMITER_TAB);
        DefaultLineMapper<CVRSegRecord> mapper = new DefaultLineMapper<>();
        mapper.setLineTokenizer(tokenizer);
        mapper.setFieldSetMapper(new CVRSegFieldSetMapper());
        FlatFileItemReader<CVRSegRecord> reader = new FlatFileItemReader<>();
        reader.setResource(new FileSystemResource(segFile));
        reader.setLineMapper(mapper);
        reader.setLinesToSkip(1);
        reader.open(ec);
        try {
            CVRSegRecord to_add;
            while ((to_add = reader.read()) != null && to_add.getID() != null) {
                if (!cvrSampleListUtil.getNewDmpSamples().contains(to_add.getID())) {
                    cvrSegRecords.add(to_add);
                }
            }
        } catch (Exception e) {
            log.error("Error loading data from SEG file: " + segFile.getName());
            throw new ItemStreamException(e);
        }
        reader.close();
    }
    // merge cvr SEG data existing SEG data and new data from CVR
    for (CVRMergedResult result : cvrData.getResults()) {
        CVRSegData cvrSegData = result.getSegData();
        if (cvrSegData.getSegData() == null) {
            continue;
        }
        HashMap<Integer, String> indexMap = new HashMap<>();
        boolean first = true;
        String id = result.getMetaData().getDmpSampleId();
        for (List<String> segData : cvrSegData.getSegData()) {
            if (first) {
                for (int i = 0; i < segData.size(); i++) {
                    indexMap.put(i, segData.get(i));
                }
                first = false;
            } else {
                CVRSegRecord cvrSegRecord = new CVRSegRecord();
                for (int i = 0; i < segData.size(); i++) {
                    cvrSegRecord.setID(id);
                    // dots in source; replaced for method
                    String field = indexMap.get(i).replace(".", "_");
                    try {
                        cvrSegRecord.getClass().getMethod("set" + field, String.class).invoke(cvrSegRecord, segData.get(i));
                    } catch (Exception e) {
                        log.warn("No such method 'set" + field + "' for CVRSegRecord");
                    }
                }
                cvrSegRecords.add(cvrSegRecord);
            }
        }
    }
}
Also used : DelimitedLineTokenizer(org.springframework.batch.item.file.transform.DelimitedLineTokenizer) FlatFileItemReader(org.springframework.batch.item.file.FlatFileItemReader) DefaultLineMapper(org.springframework.batch.item.file.mapping.DefaultLineMapper) CVRSegRecord(org.cbioportal.cmo.pipelines.cvr.model.staging.CVRSegRecord) FileSystemResource(org.springframework.core.io.FileSystemResource)

Aggregations

DefaultLineMapper (org.springframework.batch.item.file.mapping.DefaultLineMapper)29 DelimitedLineTokenizer (org.springframework.batch.item.file.transform.DelimitedLineTokenizer)29 FlatFileItemReader (org.springframework.batch.item.file.FlatFileItemReader)18 FileSystemResource (org.springframework.core.io.FileSystemResource)13 Bean (org.springframework.context.annotation.Bean)10 ClassPathResource (org.springframework.core.io.ClassPathResource)7 JobRepositoryFactoryBean (org.springframework.batch.core.repository.support.JobRepositoryFactoryBean)5 CVRClinicalRecord (org.cbioportal.cmo.pipelines.cvr.model.staging.CVRClinicalRecord)3 Before (org.junit.Before)3 TaxonService (org.powo.api.TaxonService)3 ConversionService (org.springframework.core.convert.ConversionService)3 HashSet (java.util.HashSet)2 Transaction (org.baeldung.batch.model.Transaction)2 RecordFieldSetMapper (org.baeldung.batch.service.RecordFieldSetMapper)2 CVRFusionRecord (org.cbioportal.cmo.pipelines.cvr.model.staging.CVRFusionRecord)2 MskimpactSeqDate (org.cbioportal.cmo.pipelines.cvr.model.staging.MskimpactSeqDate)2 StringToIsoDateTimeConverter (org.powo.model.convert.StringToIsoDateTimeConverter)2 StepScope (org.springframework.batch.core.configuration.annotation.StepScope)2 BeanWrapperFieldSetMapper (org.springframework.batch.item.file.mapping.BeanWrapperFieldSetMapper)2 ConversionServiceFactoryBean (org.springframework.context.support.ConversionServiceFactoryBean)2