Search in sources :

Example 6 with Locus

use of org.dash.valid.Locus in project ImmunogeneticDataTools by nmdp-bioinformatics.

the class HLAFrequenciesLoader method loadStandardReferenceData.

public static List<DisequilibriumElement> loadStandardReferenceData(BufferedReader reader) throws IOException {
    String row;
    String[] columns;
    HashMap<String, List<FrequencyByRace>> frequencyMap = new HashMap<String, List<FrequencyByRace>>();
    while ((row = reader.readLine()) != null) {
        columns = row.split(GLStringConstants.COMMA);
        String race = columns[0];
        String haplotype = columns[1];
        Double frequency = new Double(columns[2]);
        String rank = columns[3];
        List<FrequencyByRace> freqList = frequencyMap.get(haplotype);
        if (freqList == null) {
            freqList = new ArrayList<FrequencyByRace>();
        }
        FrequencyByRace freqByRace = new FrequencyByRace(frequency, rank, race);
        freqList.add(freqByRace);
        frequencyMap.put(haplotype, freqList);
    }
    List<DisequilibriumElement> disequilibriumElements = new ArrayList<DisequilibriumElement>();
    DisequilibriumElementByRace disElement;
    HashMap<String, Locus> locusMap = new HashMap<String, Locus>();
    Locus locus = null;
    for (String haplotype : frequencyMap.keySet()) {
        String[] locusHaplotypes = haplotype.split(GLStringConstants.GENE_PHASE_DELIMITER);
        HashMap<Locus, List<String>> hlaElementMap = new HashMap<Locus, List<String>>();
        for (String locusHaplotype : locusHaplotypes) {
            String[] parts = locusHaplotype.split(GLStringUtilities.ESCAPED_ASTERISK);
            List<String> val = new ArrayList<String>();
            val.add(locusHaplotype);
            if (locusMap.containsKey(parts[0])) {
                locus = locusMap.get(parts[0]);
            } else {
                locus = Locus.normalizeLocus(Locus.lookup(parts[0]));
                locusMap.put(parts[0], locus);
            }
            hlaElementMap.put(locus, val);
        }
        disElement = new DisequilibriumElementByRace(hlaElementMap, frequencyMap.get(haplotype));
        disequilibriumElements.add(disElement);
    }
    reader.close();
    return disequilibriumElements;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DisequilibriumElementByRace(org.dash.valid.race.DisequilibriumElementByRace) BaseDisequilibriumElement(org.dash.valid.base.BaseDisequilibriumElement) DisequilibriumElement(org.dash.valid.DisequilibriumElement) ArrayList(java.util.ArrayList) List(java.util.List) Locus(org.dash.valid.Locus) FrequencyByRace(org.dash.valid.race.FrequencyByRace)

Example 7 with Locus

use of org.dash.valid.Locus in project ImmunogeneticDataTools by nmdp-bioinformatics.

the class HLAFrequenciesLoader method init.

private void init(Set<File> frequencyFiles, File allelesFile) {
    Set<Linkages> linkages = new HashSet<Linkages>();
    try {
        for (File frequencyFile : frequencyFiles) {
            InputStream is = new FileInputStream(frequencyFile);
            InputStreamReader isr = new InputStreamReader(is);
            BufferedReader reader = new BufferedReader(isr);
            List<DisequilibriumElement> elements = loadStandardReferenceData(reader);
            EnumSet<Locus> loci = Locus.lookup(elements.iterator().next().getLoci());
            linkages.addAll(Linkages.lookup(loci));
            this.disequilibriumElementsMap.put(loci, elements);
        }
        LinkagesLoader.getInstance(linkages);
        if (allelesFile != null) {
            loadIndividualLocusFrequencies(allelesFile);
        }
    } catch (IOException e) {
        LOGGER.severe("Couldn't load disequilibrium element reference file.");
        e.printStackTrace();
        System.exit(-1);
    }
}
Also used : Linkages(org.dash.valid.Linkages) InputStreamReader(java.io.InputStreamReader) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) BaseDisequilibriumElement(org.dash.valid.base.BaseDisequilibriumElement) DisequilibriumElement(org.dash.valid.DisequilibriumElement) BufferedReader(java.io.BufferedReader) Locus(org.dash.valid.Locus) File(java.io.File) HashSet(java.util.HashSet)

Example 8 with Locus

use of org.dash.valid.Locus in project ImmunogeneticDataTools by nmdp-bioinformatics.

the class GLStringUtilities method buildHaplotypes.

public static List<Haplotype> buildHaplotypes(LinkageDisequilibriumGenotypeList linkedGlString) {
    String glString = linkedGlString.getGLString();
    List<Haplotype> knownHaplotypes = new CopyOnWriteArrayList<Haplotype>();
    HashMap<String, Locus> locusMap = new HashMap<String, Locus>();
    Locus locus = null;
    if (StringUtils.countMatches(glString, GLStringConstants.GENE_PHASE_DELIMITER) > 1 && StringUtils.countMatches(glString, GLStringConstants.GENE_COPY_DELIMITER) == 1) {
        List<String> genes = GLStringUtilities.parse(glString, GLStringConstants.GENE_DELIMITER);
        for (String gene : genes) {
            List<String> genotypeAmbiguities = GLStringUtilities.parse(gene, GLStringConstants.GENOTYPE_AMBIGUITY_DELIMITER);
            for (String genotypeAmbiguity : genotypeAmbiguities) {
                List<String> geneCopies = GLStringUtilities.parse(genotypeAmbiguity, GLStringConstants.GENE_COPY_DELIMITER);
                int i = 0;
                for (String geneCopy : geneCopies) {
                    HashMap<Locus, SingleLocusHaplotype> singleLocusHaplotypes = new HashMap<Locus, SingleLocusHaplotype>();
                    List<String> genePhases = GLStringUtilities.parse(geneCopy, GLStringConstants.GENE_PHASE_DELIMITER);
                    for (String genePhase : genePhases) {
                        String[] splitString = genePhase.split(GLStringUtilities.ESCAPED_ASTERISK);
                        String locusVal = splitString[0];
                        List<String> alleleAmbiguities = GLStringUtilities.parse(genePhase, GLStringConstants.ALLELE_AMBIGUITY_DELIMITER);
                        if (locusMap.containsKey(locusVal)) {
                            locus = locusMap.get(locusVal);
                        } else {
                            locus = Locus.normalizeLocus(Locus.lookup(locusVal));
                            locusMap.put(locusVal, locus);
                        }
                        SingleLocusHaplotype haplotype = new SingleLocusHaplotype(locus, alleleAmbiguities, i);
                        singleLocusHaplotypes.put(locus, haplotype);
                    }
                    MultiLocusHaplotype multiLocusHaplotype = new MultiLocusHaplotype(singleLocusHaplotypes, linkedGlString.hasHomozygous(Locus.HLA_DRB345));
                    multiLocusHaplotype.setSequence(i + 1);
                    knownHaplotypes.add(multiLocusHaplotype);
                    i++;
                }
            }
        }
    }
    return knownHaplotypes;
}
Also used : HashMap(java.util.HashMap) SingleLocusHaplotype(org.dash.valid.gl.haplo.SingleLocusHaplotype) Locus(org.dash.valid.Locus) MultiLocusHaplotype(org.dash.valid.gl.haplo.MultiLocusHaplotype) Haplotype(org.dash.valid.gl.haplo.Haplotype) MultiLocusHaplotype(org.dash.valid.gl.haplo.MultiLocusHaplotype) SingleLocusHaplotype(org.dash.valid.gl.haplo.SingleLocusHaplotype) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList)

Example 9 with Locus

use of org.dash.valid.Locus in project ImmunogeneticDataTools by nmdp-bioinformatics.

the class HLAFrequenciesLoader method loadLinkageReferenceData.

public List<DisequilibriumElement> loadLinkageReferenceData(String filename, Locus[] locusPositions) throws FileNotFoundException, IOException {
    BufferedReader reader = new BufferedReader(new InputStreamReader(HLAFrequenciesLoader.class.getClassLoader().getResourceAsStream(filename)));
    String row;
    String[] columns;
    HashMap<Locus, List<String>> hlaElementMap;
    List<DisequilibriumElement> disequilibriumElements = new ArrayList<DisequilibriumElement>();
    while ((row = reader.readLine()) != null) {
        hlaElementMap = new HashMap<Locus, List<String>>();
        columns = row.split(GLStringConstants.TAB);
        for (int i = 0; i < locusPositions.length; i++) {
            List<String> val = new ArrayList<String>();
            val.add(GLStringConstants.DASH.equals(columns[i]) ? GLStringConstants.NNNN : columns[i]);
            hlaElementMap.put(locusPositions[i], val);
        }
        disequilibriumElements.add(new BaseDisequilibriumElement(hlaElementMap, columns[locusPositions.length], columns[locusPositions.length + 1]));
    }
    reader.close();
    return disequilibriumElements;
}
Also used : InputStreamReader(java.io.InputStreamReader) ArrayList(java.util.ArrayList) BaseDisequilibriumElement(org.dash.valid.base.BaseDisequilibriumElement) DisequilibriumElement(org.dash.valid.DisequilibriumElement) BufferedReader(java.io.BufferedReader) BaseDisequilibriumElement(org.dash.valid.base.BaseDisequilibriumElement) ArrayList(java.util.ArrayList) List(java.util.List) Locus(org.dash.valid.Locus)

Example 10 with Locus

use of org.dash.valid.Locus in project ImmunogeneticDataTools by nmdp-bioinformatics.

the class NormalizeFrequencyFile method call.

@Override
public Integer call() throws Exception {
    PrintWriter writer = new PrintWriter(outputFile);
    if (SINGLE.equals(frequencies)) {
        List<String> singleLocusFrequencies = HLAFrequenciesLoader.loadIndividualLocusFrequency(new FileInputStream(inputFile));
        for (String allele : singleLocusFrequencies) {
            writer.write(allele + GLStringConstants.NEWLINE);
        }
    } else {
        HashSet<String> linkageNames = new HashSet<String>();
        linkageNames.add(frequencies);
        Set<Linkages> linkagesSet = Linkages.lookup(linkageNames);
        LinkagesLoader.getInstance(linkagesSet);
        List<DisequilibriumElement> disequilibriumElements = HLAFrequenciesLoader.loadNMDPLinkageReferenceData(new FileInputStream(inputFile), LOCUS_POSITION_MAP.get(Linkages.lookup(frequencies).getLoci()));
        for (DisequilibriumElement element : disequilibriumElements) {
            StringBuffer sb = new StringBuffer();
            int locusCounter = 0;
            for (Locus locus : Locus.lookup(element.getLoci())) {
                if (locusCounter > 0) {
                    sb.append(GLStringConstants.GENE_PHASE_DELIMITER);
                }
                sb.append(element.getHlaElement(locus).get(0));
                locusCounter++;
            }
            List<FrequencyByRace> frequencies = ((DisequilibriumElementByRace) element).getFrequenciesByRace();
            for (FrequencyByRace frequency : frequencies) {
                writer.write(frequency.getRace() + GLStringConstants.COMMA + sb + GLStringConstants.COMMA + frequency.getFrequency() + GLStringConstants.COMMA + frequency.getRank() + GLStringConstants.NEWLINE);
            }
        }
    }
    writer.close();
    return 0;
}
Also used : Linkages(org.dash.valid.Linkages) FileInputStream(java.io.FileInputStream) DisequilibriumElementByRace(org.dash.valid.race.DisequilibriumElementByRace) DisequilibriumElement(org.dash.valid.DisequilibriumElement) Locus(org.dash.valid.Locus) PrintWriter(java.io.PrintWriter) HashSet(java.util.HashSet) FrequencyByRace(org.dash.valid.race.FrequencyByRace)

Aggregations

Locus (org.dash.valid.Locus)18 HashMap (java.util.HashMap)10 ArrayList (java.util.ArrayList)9 List (java.util.List)8 HashSet (java.util.HashSet)5 DisequilibriumElement (org.dash.valid.DisequilibriumElement)4 BaseDisequilibriumElement (org.dash.valid.base.BaseDisequilibriumElement)4 MultiLocusHaplotype (org.dash.valid.gl.haplo.MultiLocusHaplotype)4 DisequilibriumElementByRace (org.dash.valid.race.DisequilibriumElementByRace)4 FrequencyByRace (org.dash.valid.race.FrequencyByRace)4 Test (org.junit.Test)4 BufferedReader (java.io.BufferedReader)3 FileInputStream (java.io.FileInputStream)3 InputStreamReader (java.io.InputStreamReader)3 DisequilibriumElementComparator (org.dash.valid.DisequilibriumElementComparator)3 HaplotypePair (org.dash.valid.gl.haplo.HaplotypePair)3 SingleLocusHaplotype (org.dash.valid.gl.haplo.SingleLocusHaplotype)3 DetectedDisequilibriumElement (org.dash.valid.report.DetectedDisequilibriumElement)3 AlleleList (org.nmdp.gl.AlleleList)3 GenotypeList (org.nmdp.gl.GenotypeList)3