Search in sources :

Example 1 with DisequilibriumElement

use of org.dash.valid.DisequilibriumElement in project ImmunogeneticDataTools by nmdp-bioinformatics.

the class HLAFrequenciesLoader method loadNMDPLinkageReferenceData.

public static List<DisequilibriumElement> loadNMDPLinkageReferenceData(InputStream inStream, Locus[] locusPositions) throws IOException, InvalidFormatException {
    List<DisequilibriumElement> disequilibriumElements = new ArrayList<DisequilibriumElement>();
    Workbook workbook = WorkbookFactory.create(inStream);
    // Return first sheet from the XLSX workbook
    Sheet mySheet = workbook.getSheetAt(0);
    // Get iterator to all the rows in current sheet
    Iterator<Row> rowIterator = mySheet.iterator();
    int firstRow = mySheet.getFirstRowNum();
    List<String> raceHeaders = null;
    // Traversing over each row of XLSX file
    while (rowIterator.hasNext()) {
        Row row = rowIterator.next();
        if (row.getRowNum() == firstRow) {
            raceHeaders = readHeaderElementsByRace(row);
        } else {
            disequilibriumElements.add(readDiseqilibriumElementsByRace(row, raceHeaders, locusPositions));
        }
    }
    workbook.close();
    return disequilibriumElements;
}
Also used : BaseDisequilibriumElement(org.dash.valid.base.BaseDisequilibriumElement) DisequilibriumElement(org.dash.valid.DisequilibriumElement) ArrayList(java.util.ArrayList) Row(org.apache.poi.ss.usermodel.Row) Sheet(org.apache.poi.ss.usermodel.Sheet) Workbook(org.apache.poi.ss.usermodel.Workbook)

Example 2 with DisequilibriumElement

use of org.dash.valid.DisequilibriumElement in project ImmunogeneticDataTools by nmdp-bioinformatics.

the class HLAFrequenciesLoader method loadStandardReferenceData.

public static List<DisequilibriumElement> loadStandardReferenceData(BufferedReader reader) throws IOException {
    String row;
    String[] columns;
    HashMap<String, List<FrequencyByRace>> frequencyMap = new HashMap<String, List<FrequencyByRace>>();
    while ((row = reader.readLine()) != null) {
        columns = row.split(GLStringConstants.COMMA);
        String race = columns[0];
        String haplotype = columns[1];
        Double frequency = new Double(columns[2]);
        String rank = columns[3];
        List<FrequencyByRace> freqList = frequencyMap.get(haplotype);
        if (freqList == null) {
            freqList = new ArrayList<FrequencyByRace>();
        }
        FrequencyByRace freqByRace = new FrequencyByRace(frequency, rank, race);
        freqList.add(freqByRace);
        frequencyMap.put(haplotype, freqList);
    }
    List<DisequilibriumElement> disequilibriumElements = new ArrayList<DisequilibriumElement>();
    DisequilibriumElementByRace disElement;
    HashMap<String, Locus> locusMap = new HashMap<String, Locus>();
    Locus locus = null;
    for (String haplotype : frequencyMap.keySet()) {
        String[] locusHaplotypes = haplotype.split(GLStringConstants.GENE_PHASE_DELIMITER);
        HashMap<Locus, List<String>> hlaElementMap = new HashMap<Locus, List<String>>();
        for (String locusHaplotype : locusHaplotypes) {
            String[] parts = locusHaplotype.split(GLStringUtilities.ESCAPED_ASTERISK);
            List<String> val = new ArrayList<String>();
            val.add(locusHaplotype);
            if (locusMap.containsKey(parts[0])) {
                locus = locusMap.get(parts[0]);
            } else {
                locus = Locus.normalizeLocus(Locus.lookup(parts[0]));
                locusMap.put(parts[0], locus);
            }
            hlaElementMap.put(locus, val);
        }
        disElement = new DisequilibriumElementByRace(hlaElementMap, frequencyMap.get(haplotype));
        disequilibriumElements.add(disElement);
    }
    reader.close();
    return disequilibriumElements;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DisequilibriumElementByRace(org.dash.valid.race.DisequilibriumElementByRace) BaseDisequilibriumElement(org.dash.valid.base.BaseDisequilibriumElement) DisequilibriumElement(org.dash.valid.DisequilibriumElement) ArrayList(java.util.ArrayList) List(java.util.List) Locus(org.dash.valid.Locus) FrequencyByRace(org.dash.valid.race.FrequencyByRace)

Example 3 with DisequilibriumElement

use of org.dash.valid.DisequilibriumElement in project ImmunogeneticDataTools by nmdp-bioinformatics.

the class HLAFrequenciesLoader method init.

private void init(Set<File> frequencyFiles, File allelesFile) {
    Set<Linkages> linkages = new HashSet<Linkages>();
    try {
        for (File frequencyFile : frequencyFiles) {
            InputStream is = new FileInputStream(frequencyFile);
            InputStreamReader isr = new InputStreamReader(is);
            BufferedReader reader = new BufferedReader(isr);
            List<DisequilibriumElement> elements = loadStandardReferenceData(reader);
            EnumSet<Locus> loci = Locus.lookup(elements.iterator().next().getLoci());
            linkages.addAll(Linkages.lookup(loci));
            this.disequilibriumElementsMap.put(loci, elements);
        }
        LinkagesLoader.getInstance(linkages);
        if (allelesFile != null) {
            loadIndividualLocusFrequencies(allelesFile);
        }
    } catch (IOException e) {
        LOGGER.severe("Couldn't load disequilibrium element reference file.");
        e.printStackTrace();
        System.exit(-1);
    }
}
Also used : Linkages(org.dash.valid.Linkages) InputStreamReader(java.io.InputStreamReader) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) BaseDisequilibriumElement(org.dash.valid.base.BaseDisequilibriumElement) DisequilibriumElement(org.dash.valid.DisequilibriumElement) BufferedReader(java.io.BufferedReader) Locus(org.dash.valid.Locus) File(java.io.File) HashSet(java.util.HashSet)

Example 4 with DisequilibriumElement

use of org.dash.valid.DisequilibriumElement in project ImmunogeneticDataTools by nmdp-bioinformatics.

the class HLAFrequenciesLoader method loadLinkageReferenceData.

public List<DisequilibriumElement> loadLinkageReferenceData(String filename, Locus[] locusPositions) throws FileNotFoundException, IOException {
    BufferedReader reader = new BufferedReader(new InputStreamReader(HLAFrequenciesLoader.class.getClassLoader().getResourceAsStream(filename)));
    String row;
    String[] columns;
    HashMap<Locus, List<String>> hlaElementMap;
    List<DisequilibriumElement> disequilibriumElements = new ArrayList<DisequilibriumElement>();
    while ((row = reader.readLine()) != null) {
        hlaElementMap = new HashMap<Locus, List<String>>();
        columns = row.split(GLStringConstants.TAB);
        for (int i = 0; i < locusPositions.length; i++) {
            List<String> val = new ArrayList<String>();
            val.add(GLStringConstants.DASH.equals(columns[i]) ? GLStringConstants.NNNN : columns[i]);
            hlaElementMap.put(locusPositions[i], val);
        }
        disequilibriumElements.add(new BaseDisequilibriumElement(hlaElementMap, columns[locusPositions.length], columns[locusPositions.length + 1]));
    }
    reader.close();
    return disequilibriumElements;
}
Also used : InputStreamReader(java.io.InputStreamReader) ArrayList(java.util.ArrayList) BaseDisequilibriumElement(org.dash.valid.base.BaseDisequilibriumElement) DisequilibriumElement(org.dash.valid.DisequilibriumElement) BufferedReader(java.io.BufferedReader) BaseDisequilibriumElement(org.dash.valid.base.BaseDisequilibriumElement) ArrayList(java.util.ArrayList) List(java.util.List) Locus(org.dash.valid.Locus)

Example 5 with DisequilibriumElement

use of org.dash.valid.DisequilibriumElement in project ImmunogeneticDataTools by nmdp-bioinformatics.

the class NormalizeFrequencyFile method call.

@Override
public Integer call() throws Exception {
    PrintWriter writer = new PrintWriter(outputFile);
    if (SINGLE.equals(frequencies)) {
        List<String> singleLocusFrequencies = HLAFrequenciesLoader.loadIndividualLocusFrequency(new FileInputStream(inputFile));
        for (String allele : singleLocusFrequencies) {
            writer.write(allele + GLStringConstants.NEWLINE);
        }
    } else {
        HashSet<String> linkageNames = new HashSet<String>();
        linkageNames.add(frequencies);
        Set<Linkages> linkagesSet = Linkages.lookup(linkageNames);
        LinkagesLoader.getInstance(linkagesSet);
        List<DisequilibriumElement> disequilibriumElements = HLAFrequenciesLoader.loadNMDPLinkageReferenceData(new FileInputStream(inputFile), LOCUS_POSITION_MAP.get(Linkages.lookup(frequencies).getLoci()));
        for (DisequilibriumElement element : disequilibriumElements) {
            StringBuffer sb = new StringBuffer();
            int locusCounter = 0;
            for (Locus locus : Locus.lookup(element.getLoci())) {
                if (locusCounter > 0) {
                    sb.append(GLStringConstants.GENE_PHASE_DELIMITER);
                }
                sb.append(element.getHlaElement(locus).get(0));
                locusCounter++;
            }
            List<FrequencyByRace> frequencies = ((DisequilibriumElementByRace) element).getFrequenciesByRace();
            for (FrequencyByRace frequency : frequencies) {
                writer.write(frequency.getRace() + GLStringConstants.COMMA + sb + GLStringConstants.COMMA + frequency.getFrequency() + GLStringConstants.COMMA + frequency.getRank() + GLStringConstants.NEWLINE);
            }
        }
    }
    writer.close();
    return 0;
}
Also used : Linkages(org.dash.valid.Linkages) FileInputStream(java.io.FileInputStream) DisequilibriumElementByRace(org.dash.valid.race.DisequilibriumElementByRace) DisequilibriumElement(org.dash.valid.DisequilibriumElement) Locus(org.dash.valid.Locus) PrintWriter(java.io.PrintWriter) HashSet(java.util.HashSet) FrequencyByRace(org.dash.valid.race.FrequencyByRace)

Aggregations

DisequilibriumElement (org.dash.valid.DisequilibriumElement)5 Locus (org.dash.valid.Locus)4 BaseDisequilibriumElement (org.dash.valid.base.BaseDisequilibriumElement)4 ArrayList (java.util.ArrayList)3 BufferedReader (java.io.BufferedReader)2 FileInputStream (java.io.FileInputStream)2 InputStreamReader (java.io.InputStreamReader)2 HashSet (java.util.HashSet)2 List (java.util.List)2 Linkages (org.dash.valid.Linkages)2 DisequilibriumElementByRace (org.dash.valid.race.DisequilibriumElementByRace)2 FrequencyByRace (org.dash.valid.race.FrequencyByRace)2 File (java.io.File)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 PrintWriter (java.io.PrintWriter)1 HashMap (java.util.HashMap)1 Row (org.apache.poi.ss.usermodel.Row)1 Sheet (org.apache.poi.ss.usermodel.Sheet)1 Workbook (org.apache.poi.ss.usermodel.Workbook)1