Search in sources :

Example 21 with DefaultLineMapper

use of org.springframework.batch.item.file.mapping.DefaultLineMapper in project cmo-pipelines by knowledgesystems.

the class CVRGenePanelReader method open.

@Override
public void open(ExecutionContext ec) throws ItemStreamException {
    // get genetic profiles for study if known, otherwise use default list
    if (CVRUtilities.GENETIC_PROFILES_BY_STUDY.containsKey(studyId)) {
        this.geneticProfiles = CVRUtilities.GENETIC_PROFILES_BY_STUDY.get(studyId);
    } else {
        this.geneticProfiles = CVRUtilities.DEFAULT_GENETIC_PROFILES;
    }
    CVRData cvrData = new CVRData();
    // load cvr data from cvr_data.json file
    File cvrFile = new File(privateDirectory, cvrUtilities.CVR_FILE);
    try {
        cvrData = cvrUtilities.readJson(cvrFile);
    } catch (IOException e) {
        log.error("Error reading file: " + cvrFile.getName());
        throw new ItemStreamException(e);
    }
    File genePanelFile = new File(stagingDirectory, cvrUtilities.GENE_PANEL_FILE);
    if (!genePanelFile.exists()) {
        log.error("File does not exist - skipping data loading from gene panel file: " + genePanelFile.getName());
    } else {
        log.info("Loading gene panel data from: " + genePanelFile.getName());
        final DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer(DelimitedLineTokenizer.DELIMITER_TAB);
        tokenizer.setNames(getGenePanelMatrixHeader(genePanelFile));
        DefaultLineMapper<CVRGenePanelRecord> mapper = new DefaultLineMapper<>();
        mapper.setLineTokenizer(tokenizer);
        mapper.setFieldSetMapper(new CVRGenePanelFieldSetMapper());
        FlatFileItemReader<CVRGenePanelRecord> reader = new FlatFileItemReader<>();
        reader.setResource(new FileSystemResource(genePanelFile));
        reader.setLineMapper(mapper);
        reader.setLinesToSkip(1);
        reader.open(ec);
        try {
            CVRGenePanelRecord to_add;
            while ((to_add = reader.read()) != null) {
                if (!cvrSampleListUtil.getNewDmpSamples().contains(to_add.getSAMPLE_ID()) && to_add.getSAMPLE_ID() != null) {
                    genePanelRecords.add(to_add);
                }
            }
        } catch (Exception e) {
            log.error("Error reading data from gene panel file: " + genePanelFile.getName());
            throw new ItemStreamException(e);
        }
        reader.close();
    }
    for (CVRMergedResult result : cvrData.getResults()) {
        CVRGenePanelRecord record = new CVRGenePanelRecord(result.getMetaData(), geneticProfiles);
        genePanelRecords.add(record);
    }
    // only try setting header and genetic profiles if gene panel records list is not empty
    if (!genePanelRecords.isEmpty()) {
        setGenePanelHeader(ec);
        ec.put("geneticProfiles", geneticProfiles);
    }
}
Also used : DelimitedLineTokenizer(org.springframework.batch.item.file.transform.DelimitedLineTokenizer) DefaultLineMapper(org.springframework.batch.item.file.mapping.DefaultLineMapper) FileSystemResource(org.springframework.core.io.FileSystemResource) CVRGenePanelRecord(org.cbioportal.cmo.pipelines.cvr.model.staging.CVRGenePanelRecord)

Example 22 with DefaultLineMapper

use of org.springframework.batch.item.file.mapping.DefaultLineMapper in project cmo-pipelines by knowledgesystems.

the class CVRMutationDataReader method loadExistingMutationRecords.

private void loadExistingMutationRecords() throws Exception {
    log.info("Loading mutation data from: " + mutationFile.getName());
    DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer(DelimitedLineTokenizer.DELIMITER_TAB);
    DefaultLineMapper<MutationRecord> mapper = new DefaultLineMapper<>();
    mapper.setLineTokenizer(tokenizer);
    mapper.setFieldSetMapper(new CVRMutationFieldSetMapper());
    FlatFileItemReader<MutationRecord> reader = new FlatFileItemReader<>();
    reader.setResource(new FileSystemResource(mutationFile));
    reader.setLineMapper(mapper);
    reader.setLinesToSkip(1);
    reader.setSkippedLinesCallback(new LineCallbackHandler() {

        @Override
        public void handleLine(String line) {
            tokenizer.setNames(line.split("\t"));
        }
    });
    reader.open(new ExecutionContext());
    List<MutationRecord> recordsToAnnotate = new ArrayList<>();
    MutationRecord to_add;
    while ((to_add = reader.read()) != null && to_add.getTUMOR_SAMPLE_BARCODE() != null) {
        // skip if new sample or if mutation record for sample seen already
        if (cvrSampleListUtil.getNewDmpSamples().contains(to_add.getTUMOR_SAMPLE_BARCODE()) || cvrUtilities.isDuplicateRecord(to_add, mutationMap.get(to_add.getTUMOR_SAMPLE_BARCODE()))) {
            continue;
        }
        cvrSampleListUtil.updateSignedoutSampleSnpCounts(to_add.getTUMOR_SAMPLE_BARCODE(), 1);
        recordsToAnnotate.add(to_add);
    }
    reader.close();
    log.info("Loaded " + String.valueOf(recordsToAnnotate.size()) + " records from MAF");
    annotateRecordsWithPOST(recordsToAnnotate, forceAnnotation);
}
Also used : DelimitedLineTokenizer(org.springframework.batch.item.file.transform.DelimitedLineTokenizer) DefaultLineMapper(org.springframework.batch.item.file.mapping.DefaultLineMapper) FileSystemResource(org.springframework.core.io.FileSystemResource)

Example 23 with DefaultLineMapper

use of org.springframework.batch.item.file.mapping.DefaultLineMapper in project cmo-pipelines by knowledgesystems.

the class CVRSvDataReader method open.

@Override
public void open(ExecutionContext ec) throws ItemStreamException {
    CVRData cvrData = new CVRData();
    // load cvr data from cvr_data.json file
    File cvrFile = new File(privateDirectory, cvrUtilities.CVR_FILE);
    try {
        cvrData = cvrUtilities.readJson(cvrFile);
    } catch (IOException e) {
        log.error("Error reading file: " + cvrFile.getName());
        throw new ItemStreamException(e);
    }
    File svFile = new File(stagingDirectory, cvrUtilities.SV_FILE);
    if (!svFile.exists()) {
        log.info("File does not exist - skipping data loading from SV file: " + svFile.getName());
    } else {
        log.info("Loading SV data from: " + svFile.getName());
        DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer(DelimitedLineTokenizer.DELIMITER_TAB);
        DefaultLineMapper<CVRSvRecord> mapper = new DefaultLineMapper<>();
        mapper.setLineTokenizer(tokenizer);
        mapper.setFieldSetMapper(new CVRSvFieldSetMapper());
        FlatFileItemReader<CVRSvRecord> reader = new FlatFileItemReader<>();
        reader.setResource(new FileSystemResource(svFile));
        reader.setLineMapper(mapper);
        reader.setLinesToSkip(1);
        reader.open(ec);
        try {
            CVRSvRecord to_add;
            while ((to_add = reader.read()) != null) {
                if (!cvrSampleListUtil.getNewDmpSamples().contains(to_add.getSampleId()) && to_add.getSampleId() != null) {
                    to_add.setSite1_Gene(to_add.getSite1_Gene().trim());
                    to_add.setSite2_Gene(to_add.getSite2_Gene().trim());
                    svRecords.add(to_add);
                }
            }
        } catch (Exception e) {
            log.error("Error loading data from SV file: " + svFile.getName());
            throw new ItemStreamException(e);
        }
        reader.close();
    }
    for (CVRMergedResult result : cvrData.getResults()) {
        String sampleId = result.getMetaData().getDmpSampleId();
        List<CVRSvVariant> variants = result.getSvVariants();
        for (CVRSvVariant variant : variants) {
            CVRSvRecord record = new CVRSvRecord(variant, sampleId);
            svRecords.add(record);
        }
    }
}
Also used : DelimitedLineTokenizer(org.springframework.batch.item.file.transform.DelimitedLineTokenizer) FlatFileItemReader(org.springframework.batch.item.file.FlatFileItemReader) DefaultLineMapper(org.springframework.batch.item.file.mapping.DefaultLineMapper) FileSystemResource(org.springframework.core.io.FileSystemResource) CVRSvRecord(org.cbioportal.cmo.pipelines.cvr.model.staging.CVRSvRecord)

Example 24 with DefaultLineMapper

use of org.springframework.batch.item.file.mapping.DefaultLineMapper in project tutorials by csh0034.

the class MigrationUserJobConfig method defaultLineMapper.

private DefaultLineMapper<User> defaultLineMapper() {
    DefaultLineMapper<User> defaultLineMapper = new DefaultLineMapper<>();
    defaultLineMapper.setLineTokenizer(new DelimitedLineTokenizer());
    defaultLineMapper.setFieldSetMapper(fieldSet -> User.create(fieldSet.readString(0), fieldSet.readString(1), fieldSet.readBoolean(2, "1")));
    return defaultLineMapper;
}
Also used : DelimitedLineTokenizer(org.springframework.batch.item.file.transform.DelimitedLineTokenizer) User(com.ask.springbatch.entity.User) DefaultLineMapper(org.springframework.batch.item.file.mapping.DefaultLineMapper)

Example 25 with DefaultLineMapper

use of org.springframework.batch.item.file.mapping.DefaultLineMapper in project spring-batch by roytuts.

the class SpringBatchConfig method fileItemReader.

@Bean
public FlatFileItemReader<Person> fileItemReader(BeanWrapperFieldSetMapper<Person> beanWrapperFieldSetMapper) {
    FlatFileItemReader<Person> fileItemReader = new FlatFileItemReader<>();
    fileItemReader.setResource(new ClassPathResource("person.csv"));
    DelimitedLineTokenizer delimitedLineTokenizer = new DelimitedLineTokenizer();
    delimitedLineTokenizer.setNames("id", "firstName", "lastName");
    DefaultLineMapper<Person> defaultLineMapper = new DefaultLineMapper<>();
    defaultLineMapper.setLineTokenizer(delimitedLineTokenizer);
    defaultLineMapper.setFieldSetMapper(beanWrapperFieldSetMapper);
    fileItemReader.setLineMapper(defaultLineMapper);
    return fileItemReader;
}
Also used : FlatFileItemReader(org.springframework.batch.item.file.FlatFileItemReader) DelimitedLineTokenizer(org.springframework.batch.item.file.transform.DelimitedLineTokenizer) DefaultLineMapper(org.springframework.batch.item.file.mapping.DefaultLineMapper) Person(com.roytuts.spring.batch.quartz.scheduler.vo.Person) ClassPathResource(org.springframework.core.io.ClassPathResource) JobDetailFactoryBean(org.springframework.scheduling.quartz.JobDetailFactoryBean) JobRepositoryFactoryBean(org.springframework.batch.core.repository.support.JobRepositoryFactoryBean) SchedulerFactoryBean(org.springframework.scheduling.quartz.SchedulerFactoryBean) CronTriggerFactoryBean(org.springframework.scheduling.quartz.CronTriggerFactoryBean) Bean(org.springframework.context.annotation.Bean)

Aggregations

DefaultLineMapper (org.springframework.batch.item.file.mapping.DefaultLineMapper)29 DelimitedLineTokenizer (org.springframework.batch.item.file.transform.DelimitedLineTokenizer)29 FlatFileItemReader (org.springframework.batch.item.file.FlatFileItemReader)18 FileSystemResource (org.springframework.core.io.FileSystemResource)13 Bean (org.springframework.context.annotation.Bean)10 ClassPathResource (org.springframework.core.io.ClassPathResource)7 JobRepositoryFactoryBean (org.springframework.batch.core.repository.support.JobRepositoryFactoryBean)5 CVRClinicalRecord (org.cbioportal.cmo.pipelines.cvr.model.staging.CVRClinicalRecord)3 Before (org.junit.Before)3 TaxonService (org.powo.api.TaxonService)3 ConversionService (org.springframework.core.convert.ConversionService)3 HashSet (java.util.HashSet)2 Transaction (org.baeldung.batch.model.Transaction)2 RecordFieldSetMapper (org.baeldung.batch.service.RecordFieldSetMapper)2 CVRFusionRecord (org.cbioportal.cmo.pipelines.cvr.model.staging.CVRFusionRecord)2 MskimpactSeqDate (org.cbioportal.cmo.pipelines.cvr.model.staging.MskimpactSeqDate)2 StringToIsoDateTimeConverter (org.powo.model.convert.StringToIsoDateTimeConverter)2 StepScope (org.springframework.batch.core.configuration.annotation.StepScope)2 BeanWrapperFieldSetMapper (org.springframework.batch.item.file.mapping.BeanWrapperFieldSetMapper)2 ConversionServiceFactoryBean (org.springframework.context.support.ConversionServiceFactoryBean)2