use of org.dash.valid.DisequilibriumElement in project ImmunogeneticDataTools by nmdp-bioinformatics.
the class HLAFrequenciesLoader method loadNMDPLinkageReferenceData.
public static List<DisequilibriumElement> loadNMDPLinkageReferenceData(InputStream inStream, Locus[] locusPositions) throws IOException, InvalidFormatException {
List<DisequilibriumElement> disequilibriumElements = new ArrayList<DisequilibriumElement>();
Workbook workbook = WorkbookFactory.create(inStream);
// Return first sheet from the XLSX workbook
Sheet mySheet = workbook.getSheetAt(0);
// Get iterator to all the rows in current sheet
Iterator<Row> rowIterator = mySheet.iterator();
int firstRow = mySheet.getFirstRowNum();
List<String> raceHeaders = null;
// Traversing over each row of XLSX file
while (rowIterator.hasNext()) {
Row row = rowIterator.next();
if (row.getRowNum() == firstRow) {
raceHeaders = readHeaderElementsByRace(row);
} else {
disequilibriumElements.add(readDiseqilibriumElementsByRace(row, raceHeaders, locusPositions));
}
}
workbook.close();
return disequilibriumElements;
}
use of org.dash.valid.DisequilibriumElement in project ImmunogeneticDataTools by nmdp-bioinformatics.
the class HLAFrequenciesLoader method loadStandardReferenceData.
public static List<DisequilibriumElement> loadStandardReferenceData(BufferedReader reader) throws IOException {
String row;
String[] columns;
HashMap<String, List<FrequencyByRace>> frequencyMap = new HashMap<String, List<FrequencyByRace>>();
while ((row = reader.readLine()) != null) {
columns = row.split(GLStringConstants.COMMA);
String race = columns[0];
String haplotype = columns[1];
Double frequency = new Double(columns[2]);
String rank = columns[3];
List<FrequencyByRace> freqList = frequencyMap.get(haplotype);
if (freqList == null) {
freqList = new ArrayList<FrequencyByRace>();
}
FrequencyByRace freqByRace = new FrequencyByRace(frequency, rank, race);
freqList.add(freqByRace);
frequencyMap.put(haplotype, freqList);
}
List<DisequilibriumElement> disequilibriumElements = new ArrayList<DisequilibriumElement>();
DisequilibriumElementByRace disElement;
HashMap<String, Locus> locusMap = new HashMap<String, Locus>();
Locus locus = null;
for (String haplotype : frequencyMap.keySet()) {
String[] locusHaplotypes = haplotype.split(GLStringConstants.GENE_PHASE_DELIMITER);
HashMap<Locus, List<String>> hlaElementMap = new HashMap<Locus, List<String>>();
for (String locusHaplotype : locusHaplotypes) {
String[] parts = locusHaplotype.split(GLStringUtilities.ESCAPED_ASTERISK);
List<String> val = new ArrayList<String>();
val.add(locusHaplotype);
if (locusMap.containsKey(parts[0])) {
locus = locusMap.get(parts[0]);
} else {
locus = Locus.normalizeLocus(Locus.lookup(parts[0]));
locusMap.put(parts[0], locus);
}
hlaElementMap.put(locus, val);
}
disElement = new DisequilibriumElementByRace(hlaElementMap, frequencyMap.get(haplotype));
disequilibriumElements.add(disElement);
}
reader.close();
return disequilibriumElements;
}
use of org.dash.valid.DisequilibriumElement in project ImmunogeneticDataTools by nmdp-bioinformatics.
the class HLAFrequenciesLoader method init.
private void init(Set<File> frequencyFiles, File allelesFile) {
Set<Linkages> linkages = new HashSet<Linkages>();
try {
for (File frequencyFile : frequencyFiles) {
InputStream is = new FileInputStream(frequencyFile);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader reader = new BufferedReader(isr);
List<DisequilibriumElement> elements = loadStandardReferenceData(reader);
EnumSet<Locus> loci = Locus.lookup(elements.iterator().next().getLoci());
linkages.addAll(Linkages.lookup(loci));
this.disequilibriumElementsMap.put(loci, elements);
}
LinkagesLoader.getInstance(linkages);
if (allelesFile != null) {
loadIndividualLocusFrequencies(allelesFile);
}
} catch (IOException e) {
LOGGER.severe("Couldn't load disequilibrium element reference file.");
e.printStackTrace();
System.exit(-1);
}
}
use of org.dash.valid.DisequilibriumElement in project ImmunogeneticDataTools by nmdp-bioinformatics.
the class HLAFrequenciesLoader method loadLinkageReferenceData.
public List<DisequilibriumElement> loadLinkageReferenceData(String filename, Locus[] locusPositions) throws FileNotFoundException, IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(HLAFrequenciesLoader.class.getClassLoader().getResourceAsStream(filename)));
String row;
String[] columns;
HashMap<Locus, List<String>> hlaElementMap;
List<DisequilibriumElement> disequilibriumElements = new ArrayList<DisequilibriumElement>();
while ((row = reader.readLine()) != null) {
hlaElementMap = new HashMap<Locus, List<String>>();
columns = row.split(GLStringConstants.TAB);
for (int i = 0; i < locusPositions.length; i++) {
List<String> val = new ArrayList<String>();
val.add(GLStringConstants.DASH.equals(columns[i]) ? GLStringConstants.NNNN : columns[i]);
hlaElementMap.put(locusPositions[i], val);
}
disequilibriumElements.add(new BaseDisequilibriumElement(hlaElementMap, columns[locusPositions.length], columns[locusPositions.length + 1]));
}
reader.close();
return disequilibriumElements;
}
use of org.dash.valid.DisequilibriumElement in project ImmunogeneticDataTools by nmdp-bioinformatics.
the class NormalizeFrequencyFile method call.
@Override
public Integer call() throws Exception {
PrintWriter writer = new PrintWriter(outputFile);
if (SINGLE.equals(frequencies)) {
List<String> singleLocusFrequencies = HLAFrequenciesLoader.loadIndividualLocusFrequency(new FileInputStream(inputFile));
for (String allele : singleLocusFrequencies) {
writer.write(allele + GLStringConstants.NEWLINE);
}
} else {
HashSet<String> linkageNames = new HashSet<String>();
linkageNames.add(frequencies);
Set<Linkages> linkagesSet = Linkages.lookup(linkageNames);
LinkagesLoader.getInstance(linkagesSet);
List<DisequilibriumElement> disequilibriumElements = HLAFrequenciesLoader.loadNMDPLinkageReferenceData(new FileInputStream(inputFile), LOCUS_POSITION_MAP.get(Linkages.lookup(frequencies).getLoci()));
for (DisequilibriumElement element : disequilibriumElements) {
StringBuffer sb = new StringBuffer();
int locusCounter = 0;
for (Locus locus : Locus.lookup(element.getLoci())) {
if (locusCounter > 0) {
sb.append(GLStringConstants.GENE_PHASE_DELIMITER);
}
sb.append(element.getHlaElement(locus).get(0));
locusCounter++;
}
List<FrequencyByRace> frequencies = ((DisequilibriumElementByRace) element).getFrequenciesByRace();
for (FrequencyByRace frequency : frequencies) {
writer.write(frequency.getRace() + GLStringConstants.COMMA + sb + GLStringConstants.COMMA + frequency.getFrequency() + GLStringConstants.COMMA + frequency.getRank() + GLStringConstants.NEWLINE);
}
}
}
writer.close();
return 0;
}
Aggregations