use of org.dash.valid.Locus in project ImmunogeneticDataTools by nmdp-bioinformatics.
the class HLAFrequenciesLoader method loadStandardReferenceData.
public static List<DisequilibriumElement> loadStandardReferenceData(BufferedReader reader) throws IOException {
String row;
String[] columns;
HashMap<String, List<FrequencyByRace>> frequencyMap = new HashMap<String, List<FrequencyByRace>>();
while ((row = reader.readLine()) != null) {
columns = row.split(GLStringConstants.COMMA);
String race = columns[0];
String haplotype = columns[1];
Double frequency = new Double(columns[2]);
String rank = columns[3];
List<FrequencyByRace> freqList = frequencyMap.get(haplotype);
if (freqList == null) {
freqList = new ArrayList<FrequencyByRace>();
}
FrequencyByRace freqByRace = new FrequencyByRace(frequency, rank, race);
freqList.add(freqByRace);
frequencyMap.put(haplotype, freqList);
}
List<DisequilibriumElement> disequilibriumElements = new ArrayList<DisequilibriumElement>();
DisequilibriumElementByRace disElement;
HashMap<String, Locus> locusMap = new HashMap<String, Locus>();
Locus locus = null;
for (String haplotype : frequencyMap.keySet()) {
String[] locusHaplotypes = haplotype.split(GLStringConstants.GENE_PHASE_DELIMITER);
HashMap<Locus, List<String>> hlaElementMap = new HashMap<Locus, List<String>>();
for (String locusHaplotype : locusHaplotypes) {
String[] parts = locusHaplotype.split(GLStringUtilities.ESCAPED_ASTERISK);
List<String> val = new ArrayList<String>();
val.add(locusHaplotype);
if (locusMap.containsKey(parts[0])) {
locus = locusMap.get(parts[0]);
} else {
locus = Locus.normalizeLocus(Locus.lookup(parts[0]));
locusMap.put(parts[0], locus);
}
hlaElementMap.put(locus, val);
}
disElement = new DisequilibriumElementByRace(hlaElementMap, frequencyMap.get(haplotype));
disequilibriumElements.add(disElement);
}
reader.close();
return disequilibriumElements;
}
use of org.dash.valid.Locus in project ImmunogeneticDataTools by nmdp-bioinformatics.
the class HLAFrequenciesLoader method init.
private void init(Set<File> frequencyFiles, File allelesFile) {
Set<Linkages> linkages = new HashSet<Linkages>();
try {
for (File frequencyFile : frequencyFiles) {
InputStream is = new FileInputStream(frequencyFile);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader reader = new BufferedReader(isr);
List<DisequilibriumElement> elements = loadStandardReferenceData(reader);
EnumSet<Locus> loci = Locus.lookup(elements.iterator().next().getLoci());
linkages.addAll(Linkages.lookup(loci));
this.disequilibriumElementsMap.put(loci, elements);
}
LinkagesLoader.getInstance(linkages);
if (allelesFile != null) {
loadIndividualLocusFrequencies(allelesFile);
}
} catch (IOException e) {
LOGGER.severe("Couldn't load disequilibrium element reference file.");
e.printStackTrace();
System.exit(-1);
}
}
use of org.dash.valid.Locus in project ImmunogeneticDataTools by nmdp-bioinformatics.
the class GLStringUtilities method buildHaplotypes.
public static List<Haplotype> buildHaplotypes(LinkageDisequilibriumGenotypeList linkedGlString) {
String glString = linkedGlString.getGLString();
List<Haplotype> knownHaplotypes = new CopyOnWriteArrayList<Haplotype>();
HashMap<String, Locus> locusMap = new HashMap<String, Locus>();
Locus locus = null;
if (StringUtils.countMatches(glString, GLStringConstants.GENE_PHASE_DELIMITER) > 1 && StringUtils.countMatches(glString, GLStringConstants.GENE_COPY_DELIMITER) == 1) {
List<String> genes = GLStringUtilities.parse(glString, GLStringConstants.GENE_DELIMITER);
for (String gene : genes) {
List<String> genotypeAmbiguities = GLStringUtilities.parse(gene, GLStringConstants.GENOTYPE_AMBIGUITY_DELIMITER);
for (String genotypeAmbiguity : genotypeAmbiguities) {
List<String> geneCopies = GLStringUtilities.parse(genotypeAmbiguity, GLStringConstants.GENE_COPY_DELIMITER);
int i = 0;
for (String geneCopy : geneCopies) {
HashMap<Locus, SingleLocusHaplotype> singleLocusHaplotypes = new HashMap<Locus, SingleLocusHaplotype>();
List<String> genePhases = GLStringUtilities.parse(geneCopy, GLStringConstants.GENE_PHASE_DELIMITER);
for (String genePhase : genePhases) {
String[] splitString = genePhase.split(GLStringUtilities.ESCAPED_ASTERISK);
String locusVal = splitString[0];
List<String> alleleAmbiguities = GLStringUtilities.parse(genePhase, GLStringConstants.ALLELE_AMBIGUITY_DELIMITER);
if (locusMap.containsKey(locusVal)) {
locus = locusMap.get(locusVal);
} else {
locus = Locus.normalizeLocus(Locus.lookup(locusVal));
locusMap.put(locusVal, locus);
}
SingleLocusHaplotype haplotype = new SingleLocusHaplotype(locus, alleleAmbiguities, i);
singleLocusHaplotypes.put(locus, haplotype);
}
MultiLocusHaplotype multiLocusHaplotype = new MultiLocusHaplotype(singleLocusHaplotypes, linkedGlString.hasHomozygous(Locus.HLA_DRB345));
multiLocusHaplotype.setSequence(i + 1);
knownHaplotypes.add(multiLocusHaplotype);
i++;
}
}
}
}
return knownHaplotypes;
}
use of org.dash.valid.Locus in project ImmunogeneticDataTools by nmdp-bioinformatics.
the class HLAFrequenciesLoader method loadLinkageReferenceData.
public List<DisequilibriumElement> loadLinkageReferenceData(String filename, Locus[] locusPositions) throws FileNotFoundException, IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(HLAFrequenciesLoader.class.getClassLoader().getResourceAsStream(filename)));
String row;
String[] columns;
HashMap<Locus, List<String>> hlaElementMap;
List<DisequilibriumElement> disequilibriumElements = new ArrayList<DisequilibriumElement>();
while ((row = reader.readLine()) != null) {
hlaElementMap = new HashMap<Locus, List<String>>();
columns = row.split(GLStringConstants.TAB);
for (int i = 0; i < locusPositions.length; i++) {
List<String> val = new ArrayList<String>();
val.add(GLStringConstants.DASH.equals(columns[i]) ? GLStringConstants.NNNN : columns[i]);
hlaElementMap.put(locusPositions[i], val);
}
disequilibriumElements.add(new BaseDisequilibriumElement(hlaElementMap, columns[locusPositions.length], columns[locusPositions.length + 1]));
}
reader.close();
return disequilibriumElements;
}
use of org.dash.valid.Locus in project ImmunogeneticDataTools by nmdp-bioinformatics.
the class NormalizeFrequencyFile method call.
@Override
public Integer call() throws Exception {
PrintWriter writer = new PrintWriter(outputFile);
if (SINGLE.equals(frequencies)) {
List<String> singleLocusFrequencies = HLAFrequenciesLoader.loadIndividualLocusFrequency(new FileInputStream(inputFile));
for (String allele : singleLocusFrequencies) {
writer.write(allele + GLStringConstants.NEWLINE);
}
} else {
HashSet<String> linkageNames = new HashSet<String>();
linkageNames.add(frequencies);
Set<Linkages> linkagesSet = Linkages.lookup(linkageNames);
LinkagesLoader.getInstance(linkagesSet);
List<DisequilibriumElement> disequilibriumElements = HLAFrequenciesLoader.loadNMDPLinkageReferenceData(new FileInputStream(inputFile), LOCUS_POSITION_MAP.get(Linkages.lookup(frequencies).getLoci()));
for (DisequilibriumElement element : disequilibriumElements) {
StringBuffer sb = new StringBuffer();
int locusCounter = 0;
for (Locus locus : Locus.lookup(element.getLoci())) {
if (locusCounter > 0) {
sb.append(GLStringConstants.GENE_PHASE_DELIMITER);
}
sb.append(element.getHlaElement(locus).get(0));
locusCounter++;
}
List<FrequencyByRace> frequencies = ((DisequilibriumElementByRace) element).getFrequenciesByRace();
for (FrequencyByRace frequency : frequencies) {
writer.write(frequency.getRace() + GLStringConstants.COMMA + sb + GLStringConstants.COMMA + frequency.getFrequency() + GLStringConstants.COMMA + frequency.getRank() + GLStringConstants.NEWLINE);
}
}
}
writer.close();
return 0;
}
Aggregations