Examples with DataSet - uk.ac.babraham.SeqMonk.DataTypes.DataSet

Example 1 with DataSet

use of uk.ac.babraham.SeqMonk.DataTypes.DataSet in project SeqMonk by s-andrews.

the class DataCollectionTreeModel method dataSetsRemoved.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.DataTypes.DataChangeListener#dataSetRemoved(uk.ac.babraham.SeqMonk.DataTypes.DataSet)
	 */
public void dataSetsRemoved(DataSet[] d) {
    // Find the indices of each of these datasets and sort them low to high
    // before telling the listeners
    Hashtable<Integer, DataSet> indices = new Hashtable<Integer, DataSet>();
    for (int i = 0; i < d.length; i++) {
        indices.put(getIndexOfChild(dataSetNode, d[i]), d[i]);
    }
    // We have to make an Integer object array before we can convert this
    // to a primitive int array
    Integer[] deleteIndices = indices.keySet().toArray(new Integer[0]);
    Arrays.sort(deleteIndices);
    DataSet[] deleteSets = new DataSet[deleteIndices.length];
    for (int i = 0; i < deleteIndices.length; i++) {
        deleteSets[i] = indices.get(deleteIndices[i]);
    }
    int[] delInd = new int[deleteIndices.length];
    for (int i = 0; i < deleteIndices.length; i++) {
        delInd[i] = deleteIndices[i];
    }
    TreeModelEvent me = new TreeModelEvent(d, getPathToRoot(dataSetNode), delInd, deleteSets);
    Enumeration<TreeModelListener> e = listeners.elements();
    while (e.hasMoreElements()) {
        e.nextElement().treeNodesRemoved(me);
    }
}

Also used : TreeModelEvent(javax.swing.event.TreeModelEvent) DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) Hashtable(java.util.Hashtable) TreeModelListener(javax.swing.event.TreeModelListener)

Example 2 with DataSet

use of uk.ac.babraham.SeqMonk.DataTypes.DataSet in project SeqMonk by s-andrews.

the class ActiveProbeListParser method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    DataSet newData = processNormalDataStore(activeList);
    processingFinished(new DataSet[] { newData });
}

Also used : DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet)

Example 3 with DataSet

use of uk.ac.babraham.SeqMonk.DataTypes.DataSet in project SeqMonk by s-andrews.

the class BedFileParser method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    // System.err.println("Started parsing BED files");
    int extendBy = prefs.extendReads();
    try {
        File[] probeFiles = getFiles();
        DataSet[] newData = new DataSet[probeFiles.length];
        for (int f = 0; f < probeFiles.length; f++) {
            BufferedReader br;
            if (probeFiles[f].getName().toLowerCase().endsWith(".gz")) {
                br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(probeFiles[f]))));
            } else {
                br = new BufferedReader(new FileReader(probeFiles[f]));
            }
            String line;
            if (prefs.isHiC()) {
                newData[f] = new PairedDataSet(probeFiles[f].getName(), probeFiles[f].getCanonicalPath(), prefs.removeDuplicates(), prefs.hiCDistance(), prefs.hiCIgnoreTrans());
            } else {
                newData[f] = new DataSet(probeFiles[f].getName(), probeFiles[f].getCanonicalPath(), prefs.removeDuplicates());
            }
            int lineCount = 0;
            // Now process the file
            while ((line = br.readLine()) != null) {
                if (cancel) {
                    br.close();
                    progressCancelled();
                    return;
                }
                // Ignore blank lines
                if (line.trim().length() == 0)
                    continue;
                ++lineCount;
                if (lineCount % 100000 == 0) {
                    progressUpdated("Read " + lineCount + " lines from " + probeFiles[f].getName(), f, probeFiles.length);
                }
                String[] sections = line.split("\t");
                // Check to see if we've got enough data to work with
                if (sections.length < 3) {
                    progressWarningReceived(new SeqMonkException("Not enough data from line '" + line + "'"));
                    // Skip this line...
                    continue;
                }
                int strand;
                int start;
                int end;
                try {
                    // The start is zero indexed so we need to add 1 to get genomic positions
                    start = Integer.parseInt(sections[1]) + 1;
                    // The end is zero indexed, but not included in the feature position so
                    // we need to add one to get genomic coordinates, but subtract one to not
                    // include the final base.
                    end = Integer.parseInt(sections[2]);
                    // End must always be later than start
                    if (start > end) {
                        progressWarningReceived(new SeqMonkException("End position " + end + " was lower than start position " + start));
                        int temp = start;
                        start = end;
                        end = temp;
                    }
                    if (sections.length >= 6) {
                        if (sections[5].equals("+")) {
                            strand = Location.FORWARD;
                        } else if (sections[5].equals("-")) {
                            strand = Location.REVERSE;
                        } else {
                            progressWarningReceived(new SeqMonkException("Unknown strand character '" + sections[5] + "' marked as unknown strand"));
                            strand = Location.UNKNOWN;
                        }
                        if (extendBy > 0) {
                            if (strand == Location.REVERSE) {
                                start -= extendBy;
                            } else if (strand == Location.FORWARD) {
                                end += extendBy;
                            }
                        }
                    } else {
                        strand = Location.UNKNOWN;
                    }
                } catch (NumberFormatException e) {
                    progressWarningReceived(new SeqMonkException("Location " + sections[0] + "-" + sections[1] + " was not an integer"));
                    continue;
                }
                try {
                    ChromosomeWithOffset c = dataCollection().genome().getChromosome(sections[0]);
                    // We also don't allow readings which are beyond the end of the chromosome
                    start = c.position(start);
                    end = c.position(end);
                    if (end > c.chromosome().length()) {
                        int overrun = end - c.chromosome().length();
                        progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")"));
                        continue;
                    }
                    // We can now make the new reading
                    long read = SequenceRead.packPosition(start, end, strand);
                    newData[f].addData(c.chromosome(), read);
                } catch (IllegalArgumentException iae) {
                    progressWarningReceived(iae);
                } catch (SeqMonkException sme) {
                    progressWarningReceived(sme);
                    continue;
                }
            }
            // We're finished with the file.
            br.close();
            // Cache the data in the new dataset
            progressUpdated("Caching data from " + probeFiles[f].getName(), f, probeFiles.length);
            newData[f].finalise();
        }
        processingFinished(newData);
    } catch (Exception ex) {
        progressExceptionReceived(ex);
        return;
    }
}

Also used : ChromosomeWithOffset(uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset) InputStreamReader(java.io.InputStreamReader) DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) PairedDataSet(uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet) PairedDataSet(uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet) FileInputStream(java.io.FileInputStream) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) File(java.io.File)

Example 4 with DataSet

use of uk.ac.babraham.SeqMonk.DataTypes.DataSet in project SeqMonk by s-andrews.

the class GenericSeqReadParser method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    try {
        // This call just makes sure that the options panel exists if
        // it's never been called for before.
        getOptionsPanel();
        int removeDuplicates = optionsPanel.removeDuplicates();
        int extendBy = optionsPanel.extendBy();
        File[] probeFiles = getFiles();
        DataSet[] newData = new DataSet[probeFiles.length];
        for (int f = 0; f < probeFiles.length; f++) {
            BufferedReader br;
            if (probeFiles[f].getName().toLowerCase().endsWith(".gz")) {
                br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(probeFiles[f]))));
            } else {
                br = new BufferedReader(new FileReader(probeFiles[f]));
            }
            String line;
            // First skip the header lines
            for (int i = 0; i < startRowValue; i++) {
                line = br.readLine();
                if (line == null) {
                    br.close();
                    throw new Exception("Ran out of file before skipping all of the header lines");
                }
            }
            int maxIndexValue = 0;
            if (chrColValue > maxIndexValue)
                maxIndexValue = chrColValue;
            if (startColValue > maxIndexValue)
                maxIndexValue = startColValue;
            if (endColValue > maxIndexValue)
                maxIndexValue = endColValue;
            if (strandColValue > maxIndexValue)
                maxIndexValue = strandColValue;
            if (countColValue > maxIndexValue)
                maxIndexValue = countColValue;
            if (optionsPanel.isHiC()) {
                int distance = 0;
                if (optionsPanel.hiCDistance.getText().length() > 0) {
                    distance = Integer.parseInt(optionsPanel.hiCDistance.getText());
                }
                // TODO: Add an option to remove trans hits when importing from the generic parser.
                newData[f] = new PairedDataSet(probeFiles[f].getName(), probeFiles[f].getCanonicalPath(), removeDuplicates, distance, false);
            } else {
                newData[f] = new DataSet(probeFiles[f].getName(), probeFiles[f].getCanonicalPath(), removeDuplicates);
            }
            int lineCount = 0;
            // Now process the rest of the file
            while ((line = br.readLine()) != null) {
                if (cancel) {
                    br.close();
                    progressCancelled();
                    return;
                }
                ++lineCount;
                if (lineCount % 100000 == 0) {
                    progressUpdated("Read " + lineCount + " lines from " + probeFiles[f].getName(), f, probeFiles.length);
                }
                String[] sections = line.split(delimitersValue);
                // Check to see if we've got enough data to work with
                if (maxIndexValue >= sections.length) {
                    progressWarningReceived(new SeqMonkException("Not enough data (" + sections.length + ") to get a probe name on line '" + line + "'"));
                    // Skip this line...
                    continue;
                }
                int strand;
                int start;
                int end;
                int count = 1;
                try {
                    start = Integer.parseInt(sections[startColValue].replaceAll(" ", ""));
                    end = Integer.parseInt(sections[endColValue].replaceAll(" ", ""));
                    // End must always be later than start
                    if (end < start) {
                        int temp = start;
                        start = end;
                        end = temp;
                    }
                    if (countColValue != -1 && sections[countColValue].length() > 0) {
                        try {
                            count = Integer.parseInt(sections[countColValue].replaceAll(" ", ""));
                        } catch (NumberFormatException e) {
                            progressWarningReceived(new SeqMonkException("Count value " + sections[countColValue] + " was not an integer"));
                            continue;
                        }
                    }
                    if (useStrand) {
                        sections[strandColValue] = sections[strandColValue].replaceAll(" ", "");
                        if (sections[strandColValue].equals("+") || sections[strandColValue].equals("1") || sections[strandColValue].equals("FF") || sections[strandColValue].equals("F")) {
                            strand = Location.FORWARD;
                        } else if (sections[strandColValue].equals("-") || sections[strandColValue].equals("-1") || sections[strandColValue].equals("RF") || sections[strandColValue].equals("R")) {
                            strand = Location.REVERSE;
                        } else {
                            progressWarningReceived(new SeqMonkException("Unknown strand character '" + sections[strandColValue] + "' marked as unknown strand"));
                            strand = Location.UNKNOWN;
                        }
                    } else {
                        strand = Location.UNKNOWN;
                    }
                    if (extendBy > 0) {
                        if (strand == Location.REVERSE) {
                            start -= extendBy;
                        } else {
                            end += extendBy;
                        }
                    }
                } catch (NumberFormatException e) {
                    progressWarningReceived(new SeqMonkException("Location '" + sections[startColValue] + "'-'" + sections[endColValue] + "' was not an integer"));
                    continue;
                }
                ChromosomeWithOffset c;
                try {
                    c = dataCollection().genome().getChromosome(sections[chrColValue]);
                } catch (IllegalArgumentException sme) {
                    progressWarningReceived(sme);
                    continue;
                }
                start = c.position(start);
                end = c.position(end);
                // We also don't allow readings which are beyond the end of the chromosome
                if (end > c.chromosome().length()) {
                    int overrun = end - c.chromosome().length();
                    progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")"));
                    continue;
                }
                if (start < 1) {
                    progressWarningReceived(new SeqMonkException("Reading start position " + start + " was less than 1"));
                    continue;
                }
                // We can now make the new reading
                try {
                    long read = SequenceRead.packPosition(start, end, strand);
                    for (int i = 0; i < count; i++) {
                        newData[f].addData(c.chromosome(), read);
                    }
                } catch (SeqMonkException e) {
                    progressWarningReceived(e);
                    continue;
                }
            // System.out.println("Added probe "+newProbe.name()+" on "+newProbe.chromosome()+" at pos "+newProbe.position());
            }
            // We're finished with the file.
            br.close();
            // Cache the data in the new dataset
            progressUpdated("Caching data from " + probeFiles[f].getName(), f, probeFiles.length);
            newData[f].finalise();
        }
        processingFinished(newData);
    } catch (Exception ex) {
        progressExceptionReceived(ex);
        return;
    }
}

Example 5 with DataSet

use of uk.ac.babraham.SeqMonk.DataTypes.DataSet in project SeqMonk by s-andrews.

the class MethylKitFileParser method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    try {
        File[] methylKitFiles = getFiles();
        DataSet[] newData = new DataSet[methylKitFiles.length];
        for (int f = 0; f < methylKitFiles.length; f++) {
            BufferedReader br;
            if (methylKitFiles[f].getName().toLowerCase().endsWith(".gz")) {
                br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(methylKitFiles[f]))));
            } else {
                br = new BufferedReader(new FileReader(methylKitFiles[f]));
            }
            String line;
            newData[f] = new DataSet(methylKitFiles[f].getName(), methylKitFiles[f].getCanonicalPath(), prefs.removeDuplicates());
            int lineCount = 0;
            // Now process the file
            while ((line = br.readLine()) != null) {
                if (cancel) {
                    br.close();
                    progressCancelled();
                    return;
                }
                // Ignore blank lines
                if (line.trim().length() == 0)
                    continue;
                // In case it has comments
                if (line.startsWith("#"))
                    continue;
                // This is the start of the header
                if (line.startsWith("chrBase"))
                    continue;
                ++lineCount;
                if (lineCount % 100000 == 0) {
                    progressUpdated("Read " + lineCount + " lines from " + methylKitFiles[f].getName(), f, methylKitFiles.length);
                }
                String[] sections = line.split("\t");
                // Check to see if we've got enough data to work with
                if (sections.length < 6) {
                    progressWarningReceived(new SeqMonkException("Not enough data from line '" + line + "'"));
                    // Skip this line...
                    continue;
                }
                int position;
                int totalCount;
                int methCount;
                int unmethCount;
                try {
                    position = Integer.parseInt(sections[2]);
                    totalCount = Integer.parseInt(sections[4]);
                    methCount = Math.round((Float.parseFloat(sections[5]) / 100) * totalCount);
                    unmethCount = Math.round((Float.parseFloat(sections[6]) / 100) * totalCount);
                } catch (NumberFormatException e) {
                    progressWarningReceived(new SeqMonkException("Failed to parse position and counts from " + line));
                    continue;
                }
                try {
                    ChromosomeWithOffset c = dataCollection().genome().getChromosome(sections[1]);
                    // We also don't allow readings which are beyond the end of the chromosome
                    if (position > c.chromosome().length()) {
                        int overrun = position - c.chromosome().length();
                        progressWarningReceived(new SeqMonkException("Reading position " + position + " was " + overrun + "bp beyond the end of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")"));
                        continue;
                    }
                    // We can now make the new reads
                    long methRead = SequenceRead.packPosition(position, position, Location.FORWARD);
                    long unmethRead = SequenceRead.packPosition(position, position, Location.REVERSE);
                    for (int i = 0; i < methCount; i++) {
                        newData[f].addData(c.chromosome(), methRead);
                    }
                    for (int i = 0; i < unmethCount; i++) {
                        newData[f].addData(c.chromosome(), unmethRead);
                    }
                } catch (IllegalArgumentException iae) {
                    progressWarningReceived(iae);
                } catch (SeqMonkException sme) {
                    progressWarningReceived(sme);
                    continue;
                }
            }
            // We're finished with the file.
            br.close();
            // Cache the data in the new dataset
            progressUpdated("Caching data from " + methylKitFiles[f].getName(), f, methylKitFiles.length);
            newData[f].finalise();
        }
        processingFinished(newData);
    } catch (Exception ex) {
        progressExceptionReceived(ex);
        return;
    }
}

Also used : ChromosomeWithOffset(uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset) InputStreamReader(java.io.InputStreamReader) DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) FileInputStream(java.io.FileInputStream) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) File(java.io.File)

Aggregations

DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)36 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)22 DataGroup (uk.ac.babraham.SeqMonk.DataTypes.DataGroup)16 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)14 File (java.io.File)12 Vector (java.util.Vector)11 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)11 BufferedReader (java.io.BufferedReader)10 FileReader (java.io.FileReader)10 FileInputStream (java.io.FileInputStream)9 InputStreamReader (java.io.InputStreamReader)9 GZIPInputStream (java.util.zip.GZIPInputStream)9 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)8 ChromosomeWithOffset (uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset)7 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)6 JLabel (javax.swing.JLabel)5 ReplicateSet (uk.ac.babraham.SeqMonk.DataTypes.ReplicateSet)5 IOException (java.io.IOException)4 GridBagConstraints (java.awt.GridBagConstraints)3 GridBagLayout (java.awt.GridBagLayout)3