use of org.springframework.batch.item.file.mapping.DefaultLineMapper in project cmo-pipelines by knowledgesystems.
the class GMLFusionDataReader method processGmlFusionsFile.
private void processGmlFusionsFile() {
File gmlFusionsFile = new File(privateDirectory, CVRUtilities.FUSION_GML_FILE);
if (!gmlFusionsFile.exists()) {
LOG.info("File does not exist - skipping data loading from germline fusions file: " + gmlFusionsFile.getName());
return;
}
LOG.info("Loading germline fusions data from: " + gmlFusionsFile.getName());
DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer(DelimitedLineTokenizer.DELIMITER_TAB);
DefaultLineMapper<CVRFusionRecord> mapper = new DefaultLineMapper<>();
mapper.setLineTokenizer(tokenizer);
mapper.setFieldSetMapper(new CVRGMLFusionFieldSetMapper());
FlatFileItemReader<CVRFusionRecord> reader = new FlatFileItemReader<>();
reader.setResource(new FileSystemResource(gmlFusionsFile));
reader.setLineMapper(mapper);
reader.setLinesToSkip(1);
reader.open(new ExecutionContext());
try {
CVRFusionRecord to_add;
while ((to_add = reader.read()) != null) {
String patientId = cvrSampleListUtil.getSamplePatientId(to_add.getTumor_Sample_Barcode());
// and whether patient is in new dmp germline patients (to prevent duplicates)
if (!Strings.isNullOrEmpty(patientId) && !cvrSampleListUtil.getNewDmpGmlPatients().contains(patientId)) {
String fusion = getGmlFusionKey(to_add);
if (gmlFusionsSeen.add(fusion)) {
gmlFusionRecords.add(to_add);
}
}
}
} catch (Exception e) {
LOG.info("Error loading data from germline fusions file: " + gmlFusionsFile.getName());
throw new ItemStreamException(e);
}
reader.close();
}
use of org.springframework.batch.item.file.mapping.DefaultLineMapper in project cmo-pipelines by knowledgesystems.
the class GMLClinicalTasklet method loadClinicalDataGmlPatientSampleMapping.
private void loadClinicalDataGmlPatientSampleMapping(File clinicalFile) throws Exception {
// load clinical file and create patient-sample mapping
if (!clinicalFile.exists()) {
throw new ItemStreamException("Could not find clinical file: " + clinicalFile.getName());
} else {
LOG.info("Loading clinical data from: " + clinicalFile.getName());
DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer(DelimitedLineTokenizer.DELIMITER_TAB);
DefaultLineMapper<CVRClinicalRecord> mapper = new DefaultLineMapper<>();
mapper.setLineTokenizer(tokenizer);
mapper.setFieldSetMapper(new CVRClinicalFieldSetMapper());
FlatFileItemReader<CVRClinicalRecord> reader = new FlatFileItemReader<>();
reader.setResource(new FileSystemResource(clinicalFile));
reader.setLineMapper(mapper);
reader.setLinesToSkip(1);
reader.open(new ExecutionContext());
CVRClinicalRecord to_add;
while ((to_add = reader.read()) != null) {
cvrSampleListUtil.updateGmlPatientSampleMap(to_add.getPATIENT_ID(), to_add.getSAMPLE_ID());
clinicalRecords.add(to_add);
cvrSampleListUtil.addPortalSample(to_add.getSAMPLE_ID());
}
reader.close();
}
// updates portalSamplesNotInDmpList and dmpSamplesNotInPortal sample lists
// portalSamples list is only updated if threshold check for max num samples to remove passes
cvrSampleListUtil.updateSampleLists();
updateSamplesRemovedList();
}
use of org.springframework.batch.item.file.mapping.DefaultLineMapper in project cmo-pipelines by knowledgesystems.
the class LinkedMskimpactCaseReader method loadExistingLinkedIds.
private void loadExistingLinkedIds() {
File stagingFile = new File(stagingDirectory, cvrUtilities.CORRESPONDING_ID_FILE);
if (!stagingFile.exists()) {
LOG.warn("File does not exist - skipping data loading from linked ARCHER samples file: " + stagingFile.getName());
return;
}
LOG.info("Loading linked ARCHER sample data from: " + stagingFile.getName());
DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer(DelimitedLineTokenizer.DELIMITER_TAB);
DefaultLineMapper<LinkedMskimpactCaseRecord> mapper = new DefaultLineMapper<>();
mapper.setLineTokenizer(tokenizer);
mapper.setFieldSetMapper(new LinkedImpactCaseFieldSetMapper());
FlatFileItemReader<LinkedMskimpactCaseRecord> reader = new FlatFileItemReader<>();
reader.setResource(new FileSystemResource(stagingFile));
reader.setLineMapper(mapper);
reader.setLinesToSkip(1);
reader.open(new ExecutionContext());
try {
LinkedMskimpactCaseRecord to_add;
while ((to_add = reader.read()) != null) {
// only add samples that are not in the new dmp sample list
if (!cvrSampleListUtil.getNewDmpSamples().contains(to_add.getSAMPLE_ID())) {
compiledLinkedIdsMap.put(to_add.getSAMPLE_ID(), to_add);
}
// keep a backup in case JSON returned dropped all "linked_mskimpact_case" data
existingLinkedIdsMap.put(to_add.getSAMPLE_ID(), to_add);
}
} catch (Exception e) {
LOG.error("Error reading linked ARCHER sample data from file: " + stagingFile.getName());
throw new ItemStreamException(e);
} finally {
reader.close();
}
}
use of org.springframework.batch.item.file.mapping.DefaultLineMapper in project cmo-pipelines by knowledgesystems.
the class GMLMutationDataReader method loadExistingMutationRecords.
private void loadExistingMutationRecords() throws Exception {
log.info("Loading mutation data from: " + mutationFile.getName());
DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer(DelimitedLineTokenizer.DELIMITER_TAB);
DefaultLineMapper<MutationRecord> mapper = new DefaultLineMapper<>();
mapper.setLineTokenizer(tokenizer);
mapper.setFieldSetMapper(new CVRMutationFieldSetMapper());
FlatFileItemReader<MutationRecord> reader = new FlatFileItemReader<>();
reader.setResource(new FileSystemResource(mutationFile));
reader.setLineMapper(mapper);
reader.setLinesToSkip(1);
reader.setSkippedLinesCallback(new LineCallbackHandler() {
@Override
public void handleLine(String line) {
tokenizer.setNames(line.split("\t"));
}
});
reader.open(new ExecutionContext());
List<MutationRecord> recordsToAnnotate = new ArrayList<>();
MutationRecord to_add;
while ((to_add = reader.read()) != null && to_add.getTUMOR_SAMPLE_BARCODE() != null) {
// skip if record already seen or if current record is a germline sample and record is a GERMLINE variant
if (cvrUtilities.isDuplicateRecord(to_add, mutationMap.get(to_add.getTUMOR_SAMPLE_BARCODE())) || (germlineSamples.contains(to_add.getTUMOR_SAMPLE_BARCODE()) && to_add.getMUTATION_STATUS().equals("GERMLINE"))) {
continue;
}
recordsToAnnotate.add(to_add);
}
reader.close();
log.info("Loaded " + String.valueOf(recordsToAnnotate.size()) + " records from MAF");
annotateRecordsWithPOST(recordsToAnnotate, forceAnnotation);
}
use of org.springframework.batch.item.file.mapping.DefaultLineMapper in project cmo-pipelines by knowledgesystems.
the class CVRSegDataReader method open.
@Override
public void open(ExecutionContext ec) throws ItemStreamException {
CVRData cvrData = new CVRData();
// load cvr data from cvr_data.json file
File cvrFile = new File(privateDirectory, cvrUtilities.CVR_FILE);
try {
cvrData = cvrUtilities.readJson(cvrFile);
} catch (IOException e) {
log.error("Error reading file: " + cvrFile.getName());
throw new ItemStreamException(e);
}
// only read from seg file if exists
File segFile = new File(stagingDirectory, studyId + cvrUtilities.SEG_FILE);
if (!segFile.exists()) {
log.error("File does not exist - skipping data loading from SEG file: " + segFile.getName());
} else {
log.info("Loading SEG data from: " + segFile.getName());
DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer(DelimitedLineTokenizer.DELIMITER_TAB);
DefaultLineMapper<CVRSegRecord> mapper = new DefaultLineMapper<>();
mapper.setLineTokenizer(tokenizer);
mapper.setFieldSetMapper(new CVRSegFieldSetMapper());
FlatFileItemReader<CVRSegRecord> reader = new FlatFileItemReader<>();
reader.setResource(new FileSystemResource(segFile));
reader.setLineMapper(mapper);
reader.setLinesToSkip(1);
reader.open(ec);
try {
CVRSegRecord to_add;
while ((to_add = reader.read()) != null && to_add.getID() != null) {
if (!cvrSampleListUtil.getNewDmpSamples().contains(to_add.getID())) {
cvrSegRecords.add(to_add);
}
}
} catch (Exception e) {
log.error("Error loading data from SEG file: " + segFile.getName());
throw new ItemStreamException(e);
}
reader.close();
}
// merge cvr SEG data existing SEG data and new data from CVR
for (CVRMergedResult result : cvrData.getResults()) {
CVRSegData cvrSegData = result.getSegData();
if (cvrSegData.getSegData() == null) {
continue;
}
HashMap<Integer, String> indexMap = new HashMap<>();
boolean first = true;
String id = result.getMetaData().getDmpSampleId();
for (List<String> segData : cvrSegData.getSegData()) {
if (first) {
for (int i = 0; i < segData.size(); i++) {
indexMap.put(i, segData.get(i));
}
first = false;
} else {
CVRSegRecord cvrSegRecord = new CVRSegRecord();
for (int i = 0; i < segData.size(); i++) {
cvrSegRecord.setID(id);
// dots in source; replaced for method
String field = indexMap.get(i).replace(".", "_");
try {
cvrSegRecord.getClass().getMethod("set" + field, String.class).invoke(cvrSegRecord, segData.get(i));
} catch (Exception e) {
log.warn("No such method 'set" + field + "' for CVRSegRecord");
}
}
cvrSegRecords.add(cvrSegRecord);
}
}
}
}
Aggregations