Search in sources :

Example 1 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class HeatmapProbeListPanel method paint.

public void paint(Graphics g) {
    super.paint(g);
    if (drawnPixels.length != getWidth() || drawnPixels[0].length != getHeight()) {
        drawnPixels = new boolean[getWidth()][getHeight()];
    } else {
        for (int i = 0; i < getWidth(); i++) {
            for (int j = 0; j < getHeight(); j++) {
                drawnPixels[i][j] = false;
            }
        }
    }
    g.setColor(Color.WHITE);
    g.fillRect(0, 0, getWidth(), getHeight());
    g.setColor(Color.BLACK);
    // Add a label at the top to signify current filters
    StringBuffer topLabel = new StringBuffer();
    // Now append any current limits
    if (matrix.currentMinStrength() > 0) {
        topLabel.append("Strength > ");
        topLabel.append(df.format(matrix.currentMinStrength()));
        topLabel.append(" ");
    }
    if (matrix.minDifference() > 0) {
        topLabel.append("Difference > ");
        topLabel.append(df.format(matrix.minDifference()));
        topLabel.append(" ");
    }
    if (matrix.currentMaxSignficance() < 1) {
        topLabel.append("P-value < ");
        topLabel.append(df.format(matrix.currentMaxSignficance()));
        topLabel.append(" ");
    }
    if (topLabel.length() == 0) {
        topLabel.append("No filters");
    }
    g.drawString(topLabel.toString(), getWidth() / 2 - (g.getFontMetrics().stringWidth(topLabel.toString()) / 2), 15 + (g.getFontMetrics().getAscent() / 2));
    // Find the max height and width of the probe list names in this genome
    if (maxNameWidth == 0) {
        nameHeight = g.getFontMetrics().getHeight();
        maxNameWidth = 0;
        long runningBaseOffset = 0;
        for (int l = 0; l < probeLists.length; l++) {
            probeListIndexOffsets.put(probeLists[l], runningBaseOffset);
            int thisWidth = g.getFontMetrics().stringWidth(probeLists[l].name());
            if (thisWidth > maxNameWidth)
                maxNameWidth = thisWidth;
            runningBaseOffset += probeLists[l].getAllProbes().length;
        }
        // Give both the width and height a bit of breathing space
        nameHeight += 6;
        maxNameWidth += 6;
    }
    // Make the background of the plot black
    g.setColor(Color.WHITE);
    g.fillRect(maxNameWidth, 30, getWidth() - (maxNameWidth + 10), getHeight() - (nameHeight + 30));
    // Draw the actual data
    InteractionProbePair[] interactions = matrix.filteredInteractions();
    // Cache some values for use with the quantitation colouring
    double minQuantitatedValue;
    double maxQuantitatedValue;
    if (DisplayPreferences.getInstance().getScaleType() == DisplayPreferences.SCALE_TYPE_POSITIVE) {
        minQuantitatedValue = 0;
        maxQuantitatedValue = DisplayPreferences.getInstance().getMaxDataValue();
    } else {
        maxQuantitatedValue = DisplayPreferences.getInstance().getMaxDataValue();
        minQuantitatedValue = 0 - maxQuantitatedValue;
    }
    for (int i = 0; i < interactions.length; i++) {
        for (int forRev = 0; forRev <= 1; forRev++) {
            int probe1Index;
            int probe2Index;
            if (forRev == 0) {
                probe1Index = interactions[i].probe1Index();
                probe2Index = interactions[i].probe2Index();
            } else {
                probe2Index = interactions[i].probe1Index();
                probe1Index = interactions[i].probe2Index();
            }
            int xIndex = probe1Index;
            if (probeSortingOrder != null) {
                xIndex = probeSortingOrder[xIndex];
            }
            if (xIndex < currentXStartIndex)
                continue;
            if (xIndex > currentXEndIndex)
                continue;
            int xStart = getXForPosition(xIndex);
            if (xStart < maxNameWidth)
                continue;
            int xEnd = getXForPosition(xIndex + 1);
            if (xEnd > getWidth() - 10)
                continue;
            int yIndex = probe2Index;
            if (probeSortingOrder != null) {
                yIndex = probeSortingOrder[yIndex];
            }
            if (yIndex < currentYStartIndex)
                continue;
            if (yIndex > currentYEndIndex)
                continue;
            int yStart = getYForPosition(yIndex);
            if (yStart > getHeight() - nameHeight)
                continue;
            int yEnd = getYForPosition(yIndex + 1);
            if (yEnd < 30)
                continue;
            if (xEnd - xStart < 3) {
                xEnd += 1;
                xStart -= 1;
            }
            if (yEnd - yStart < 3) {
                yEnd -= 1;
                yStart += 1;
            }
            // See if we can skip drawing this because something else is already there
            if (drawnPixels[xStart][yEnd] && drawnPixels[xEnd][yStart]) {
                continue;
            }
            switch(matrix.currentColourSetting()) {
                case HeatmapMatrix.COLOUR_BY_OBS_EXP:
                    g.setColor(matrix.colourGradient().getColor(Math.log10(interactions[i].strength() - matrix.initialMinStrength()), Math.log10(matrix.initialMinStrength()), Math.log10(matrix.maxValue() - matrix.initialMinStrength())));
                    break;
                case HeatmapMatrix.COLOUR_BY_INTERACTIONS:
                    g.setColor(matrix.colourGradient().getColor(interactions[i].absolute(), matrix.initialMinAbsolute(), 50));
                    break;
                case HeatmapMatrix.COLOUR_BY_P_VALUE:
                    g.setColor(matrix.colourGradient().getColor(Math.log10(interactions[i].signficance()) * -10, Math.log10(matrix.initialMaxSignificance()) * -10, 50));
                    break;
                case HeatmapMatrix.COLOUR_BY_QUANTITATION:
                    Probe probeForQuantitation;
                    if (forRev == 0) {
                        probeForQuantitation = interactions[i].lowestProbe();
                    } else {
                        probeForQuantitation = interactions[i].highestProbe();
                    }
                    try {
                        g.setColor(matrix.colourGradient().getColor(((DataStore) dataSet).getValueForProbe(probeForQuantitation), minQuantitatedValue, maxQuantitatedValue));
                    } catch (SeqMonkException e) {
                    }
                    break;
            }
            g.fillRect(xStart, yEnd, xEnd - xStart, yStart - yEnd);
            // them again
            for (int x = Math.min(xStart, xEnd); x <= Math.min(xStart, xEnd) + Math.abs(xStart - xEnd); x++) {
                for (int y = Math.min(yStart, yEnd); y <= Math.min(yStart, yEnd) + Math.abs(yStart - yEnd); y++) {
                    drawnPixels[x][y] = true;
                }
            }
        }
    }
    // System.err.println("Skipped "+skipped+" interactions");
    // Draw the probe list lines
    g.setColor(Color.GRAY);
    // lines but we will bracket around related groups.
    if (currentCluster == null) {
        // Draw Probe List Lines on X axis
        int runningGenomeLength = 0;
        for (int l = 0; l < probeLists.length; l++) {
            int startPos = getXForPosition(runningGenomeLength);
            int endPos = getXForPosition(runningGenomeLength + probeLists[l].getAllProbes().length);
            if (l > 0) {
                if (startPos >= maxNameWidth && startPos <= getWidth() - 10) {
                    g.drawLine(startPos, 30, startPos, getHeight() - nameHeight);
                }
            }
            if (l + 1 == probeLists.length) {
                if (endPos >= maxNameWidth && endPos <= getWidth() - 10) {
                    g.drawLine(endPos, 30, endPos, getHeight() - nameHeight);
                }
            }
            int nameWidth = g.getFontMetrics().stringWidth(probeLists[l].name());
            g.drawString(probeLists[l].name(), (startPos + ((endPos - startPos) / 2)) - (nameWidth / 2), getHeight() - 3);
            runningGenomeLength += probeLists[l].getAllProbes().length;
        }
        // Draw Chr Lines on Y axis
        runningGenomeLength = 0;
        for (int l = 0; l < probeLists.length; l++) {
            int startPos = getYForPosition(runningGenomeLength);
            int endPos = getYForPosition(runningGenomeLength + probeLists[l].getAllProbes().length);
            if (l > 0) {
                if (startPos <= getHeight() - nameHeight && startPos >= 30) {
                    g.drawLine(maxNameWidth, startPos, getWidth() - 10, startPos);
                }
            }
            if (l + 1 == probeLists.length) {
                if (endPos <= getHeight() - nameHeight && endPos >= 30) {
                    g.drawLine(maxNameWidth, endPos, getWidth() - 10, endPos);
                }
            }
            int nameWidth = g.getFontMetrics().stringWidth(probeLists[l].name());
            g.drawString(probeLists[l].name(), (maxNameWidth / 2) - (nameWidth / 2), (endPos + ((startPos - endPos) / 2)) + (g.getFontMetrics().getAscent() / 2));
            runningGenomeLength += probeLists[l].getAllProbes().length;
        }
    } else // If we are clustered then we draw bracketed sets around the current R value cutoff
    {
        // Draw Cluster Lines on X axis
        int runningListPosition = 0;
        for (int l = 0; l < clusterIntervals.length; l++) {
            runningListPosition += clusterIntervals[l];
            if (runningListPosition < currentXStartIndex)
                continue;
            if (runningListPosition > currentXEndIndex)
                break;
            int pos = getXForPosition(runningListPosition);
            g.drawLine(pos, 30, pos, getHeight() - nameHeight);
        }
        // Draw Cluster Lines on Y axis
        runningListPosition = 0;
        for (int l = 0; l < clusterIntervals.length; l++) {
            runningListPosition += clusterIntervals[l];
            if (runningListPosition < currentYStartIndex)
                continue;
            if (runningListPosition > currentYEndIndex)
                break;
            int pos = getYForPosition(runningListPosition);
            g.drawLine(maxNameWidth, pos, getWidth() - 10, pos);
        }
    }
    // Draw the axes
    g.drawLine(maxNameWidth, getHeight() - nameHeight, getWidth() - 10, getHeight() - nameHeight);
    g.drawLine(maxNameWidth, getHeight() - nameHeight, maxNameWidth, 30);
    // Draw a selection if we're making one
    if (makingSelection) {
        g.setColor(ColourScheme.DRAGGED_SELECTION);
        g.drawRect(Math.min(selectionEndX, selectionStartX), Math.min(selectionEndY, selectionStartY), Math.abs(selectionEndX - selectionStartX), Math.abs(selectionEndY - selectionStartY));
    }
}
Also used : InteractionProbePair(uk.ac.babraham.SeqMonk.DataTypes.Interaction.InteractionProbePair) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) HiCDataStore(uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 2 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class MAPlotPanel method getFilteredProbes.

/**
 * Gets the filtered probes.
 *
 * @param probeset the probeset
 * @return the filtered probes
 */
public ProbeList getFilteredProbes(ProbeSet probeset) {
    double minY = Math.min(ySelectionStart, ySelectionEnd);
    double maxY = Math.max(ySelectionStart, ySelectionEnd);
    ProbeList list = new ProbeList(probeList, "Difference between " + df.format(minY) + " and " + df.format(maxY), "Difference between " + xStore.name() + " and " + yStore.name() + " was between " + df.format(minY) + " and " + df.format(maxY), null);
    if (madeSelection) {
        Probe[] probes = probeList.getAllProbes();
        for (int p = 0; p < probes.length; p++) {
            try {
                double diff = xStore.getValueForProbe(probes[p]) - yStore.getValueForProbe(probes[p]);
                if (diff < minY)
                    continue;
                if (diff > maxY)
                    continue;
                list.addProbe(probes[p], null);
            } catch (SeqMonkException e) {
                e.printStackTrace();
            }
        }
    }
    return list;
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 3 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class MAPlotPanel method calculateNonredundantSet.

/**
 * This collapses individual points which are over the same
 * pixel when redrawing the plot at a different scale
 */
private synchronized void calculateNonredundantSet() {
    closestPoint = null;
    ProbePairValue[][] grid = new ProbePairValue[getWidth()][getHeight()];
    Probe[] probes = probeList.getAllProbes();
    try {
        for (int p = 0; p < probes.length; p++) {
            float xValue = (xStore.getValueForProbe(probes[p]) + yStore.getValueForProbe(probes[p])) / 2;
            float yValue = xStore.getValueForProbe(probes[p]) - yStore.getValueForProbe(probes[p]);
            if (Float.isNaN(xValue) || Float.isInfinite(xValue) || Float.isNaN(yValue) || Float.isInfinite(yValue)) {
                continue;
            }
            int x = getX(xValue);
            int y = getY(yValue);
            if (grid[x][y] == null) {
                grid[x][y] = new ProbePairValue(xValue, yValue, x, y);
                grid[x][y].setProbe(probes[p]);
            } else {
                // belong to
                if (subLists == null)
                    grid[x][y].count++;
                // As we have multiple probes at this point we remove the
                // specific probe annotation.
                grid[x][y].setProbe(null);
            }
        }
        if (subLists != null) {
            for (int s = 0; s < subLists.length; s++) {
                Probe[] subListProbes = subLists[s].getAllProbes();
                for (int p = 0; p < subListProbes.length; p++) {
                    float xValue = (xStore.getValueForProbe(subListProbes[p]) + yStore.getValueForProbe(subListProbes[p])) / 2;
                    float yValue = xStore.getValueForProbe(subListProbes[p]) - yStore.getValueForProbe(subListProbes[p]);
                    int x = getX(xValue);
                    int y = getY(yValue);
                    if (grid[x][y] == null) {
                        // This messes up where we catch it in the middle of a redraw
                        continue;
                    // throw new IllegalArgumentException("Found subList position not in main list");
                    }
                    // 1 = no list so 2 is the lowest sublist index
                    grid[x][y].count = s + 2;
                }
            }
        }
    } catch (SeqMonkException e) {
        throw new IllegalStateException(e);
    }
    // Now we need to put all of the ProbePairValues into
    // a single array;
    int count = 0;
    for (int x = 0; x < grid.length; x++) {
        for (int y = 0; y < grid[x].length; y++) {
            if (grid[x][y] != null)
                count++;
        }
    }
    ProbePairValue[] nonred = new ProbePairValue[count];
    count--;
    for (int x = 0; x < grid.length; x++) {
        for (int y = 0; y < grid[x].length; y++) {
            if (grid[x][y] != null) {
                nonred[count] = grid[x][y];
                count--;
            }
        }
    }
    Arrays.sort(nonred);
    // Work out the 95% percentile count
    int minCount = 1;
    int maxCount = 2;
    if (nonred.length > 0) {
        minCount = nonred[0].count;
        maxCount = nonred[((nonred.length - 1) * 95) / 100].count;
    }
    // Go through every nonred assigning a suitable colour
    ColourGradient gradient = new HotColdColourGradient();
    for (int i = 0; i < nonred.length; i++) {
        if (subLists == null) {
            nonred[i].color = gradient.getColor(nonred[i].count, minCount, maxCount);
        } else {
            if (nonred[i].count > subLists.length + 1) {
                throw new IllegalArgumentException("Count above threshold when showing sublists");
            }
            if (nonred[i].count == 1) {
                nonred[i].color = VERY_LIGHT_GREY;
            } else {
                nonred[i].color = ColourIndexSet.getColour(nonred[i].count - 2);
            }
        }
    }
    nonRedundantValues = nonred;
    lastNonredWidth = getWidth();
    lastNonredHeight = getHeight();
// System.out.println("Nonred was "+nonRedundantValues.length+" from "+probes.length);
}
Also used : HotColdColourGradient(uk.ac.babraham.SeqMonk.Gradients.HotColdColourGradient) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) ColourGradient(uk.ac.babraham.SeqMonk.Gradients.ColourGradient) HotColdColourGradient(uk.ac.babraham.SeqMonk.Gradients.HotColdColourGradient)

Example 4 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class HierarchicalClusterPanel method paint.

public void paint(Graphics g) {
    super.paint(g);
    if (getHeight() != lastHeight) {
        calculateSkippablePositions();
    }
    // Work out how big a font we can use
    Font nameFont = new Font("sans", Font.PLAIN, 10);
    Font originalFont = g.getFont();
    for (int p = 0; p < probes.length; p++) {
        int position = clusterPostions[p];
        if (skippablePositions[position])
            continue;
        // Retrieve and normalise the raw values
        float[] theseValues = new float[stores.length];
        for (int d = 0; d < stores.length; d++) {
            try {
                theseValues[d] = stores[d].getValueForProbe(probes[p]);
            } catch (SeqMonkException e) {
            }
        }
        if (normalise) {
            float median = SimpleStats.mean(theseValues);
            for (int d = 0; d < theseValues.length; d++) {
                theseValues[d] -= median;
            }
        }
        int startY = getYForPosition(position);
        int endY = getYForPosition(position + 1);
        int yHeight = (startY - endY) + 1;
        if (yHeight < 1)
            yHeight = 1;
        // Draw the probe name
        String probeName = probes[p].name();
        g.setFont(nameFont);
        g.setColor(Color.BLACK);
        g.drawString(probeName, getXForPosition(stores.length) + 1, startY - (yHeight / 2));
        g.setFont(originalFont);
        for (int d = 0; d < stores.length; d++) {
            int startX = getXForPosition(d);
            int endX = getXForPosition(d + 1);
            if (Float.isNaN(theseValues[d]))
                continue;
            if (negativeScale) {
                g.setColor(gradient.getColor(theseValues[d], 0 - maxValue, maxValue));
            } else {
                g.setColor(gradient.getColor(theseValues[d], 0, maxValue));
            }
            g.fillRect(startX, endY, endX - startX, yHeight);
        }
        g.setColor(Color.DARK_GRAY);
    }
    // Draw the sample names at the top
    // Work out what width we have to work with
    int nameWidth = getXForPosition(1) - getXForPosition(0);
    g.setColor(Color.DARK_GRAY);
    for (int d = 0; d < stores.length; d++) {
        // Find the longest version of the name which fits within the available width
        String thisName = stores[d].name();
        if (g.getFontMetrics().stringWidth(thisName) < nameWidth) {
            int startX = getXForPosition(d);
            startX += (nameWidth - g.getFontMetrics().stringWidth(thisName)) / 2;
            g.drawString(thisName, startX, TOP_NAME_HEIGHT - 3);
        } else {
            // be able to shorten.
            while (g.getFontMetrics().stringWidth(thisName + "..") > nameWidth && thisName.length() > 0) {
                thisName = thisName.substring(0, thisName.length() - 1);
            }
            g.drawString(thisName + "..", getXForPosition(d), TOP_NAME_HEIGHT - 3);
        }
    }
    // Draw the replicate set groups if there are any
    for (int r = 0; r < repSets.length; r++) {
        g.setColor(ColourIndexSet.getColour(r));
        g.drawString(repSets[r].name(), getXForPosition(stores.length), 30 + (10 * r));
        for (int s = 0; s < stores.length; s++) {
            if (repSets[r].containsDataStore(stores[s])) {
                g.fillRect(getXForPosition(s), 30 + (10 * (r - 1)), getXForPosition(s + 1) - getXForPosition(s), 10);
            }
        }
    }
    // Draw lines on the cluster boundaries for the current R-value limit
    // Draw Cluster Lines on X axis
    int runningListPosition = 0;
    g.setColor(Color.BLACK);
    for (int l = 0; l < clusterIntervals.length; l++) {
        runningListPosition += clusterIntervals[l];
        if (runningListPosition < currentYStartIndex)
            continue;
        if (runningListPosition > currentYEndIndex)
            break;
        g.drawLine(1, getYForPosition(runningListPosition), (getWidth() - NAME_SPACE), getYForPosition(runningListPosition));
    }
    // Draw a box if we're making a selection
    if (makingSelection) {
        g.setColor(Color.GREEN);
        g.drawRect(getXForPosition(0), Math.min(selectionEndY, selectionStartY), getXForPosition(stores.length) - getXForPosition(0), Math.abs(selectionStartY - selectionEndY));
    }
}
Also used : SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Font(java.awt.Font)

Example 5 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class GFF3AnnotationParser method parseAnnotation.

public AnnotationSet[] parseAnnotation(File file, Genome genome, String prefix) throws Exception {
    System.err.println("Parsing " + file);
    if (prefix == null) {
        featurePrefix = JOptionPane.showInputDialog(SeqMonkApplication.getInstance(), "Feature prefix", "GFFv3/GTP Options", JOptionPane.QUESTION_MESSAGE);
    } else {
        featurePrefix = prefix;
    }
    if (featurePrefix == null)
        featurePrefix = "";
    Vector<AnnotationSet> annotationSets = new Vector<AnnotationSet>();
    AnnotationSet currentAnnotation = new AnnotationSet(genome, file.getName());
    annotationSets.add(currentAnnotation);
    Hashtable<String, FeatureGroup> groupedFeatures = new Hashtable<String, FeatureGroup>();
    BufferedReader br;
    if (file.getName().toLowerCase().endsWith(".gz")) {
        br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file))));
    } else {
        br = new BufferedReader(new FileReader(file));
    }
    String line;
    int count = 0;
    while ((line = br.readLine()) != null) {
        if (cancel) {
            progressCancelled();
            br.close();
            return null;
        }
        if (count % 1000 == 0) {
            progressUpdated("Read " + count + " lines from " + file.getName(), 0, 1);
        }
        if (count > 1000000 && count % 1000000 == 0) {
            progressUpdated("Caching...", 0, 1);
            currentAnnotation.finalise();
            currentAnnotation = new AnnotationSet(genome, file.getName() + "[" + annotationSets.size() + "]");
            annotationSets.add(currentAnnotation);
        }
        ++count;
        // Ignore blank lines
        if (line.trim().length() == 0)
            continue;
        // Skip comments
        if (line.startsWith("#"))
            continue;
        String[] sections = line.split("\t");
        // Check to see if we've got enough data to work with
        if (sections.length < 7) {
            progressWarningReceived(new SeqMonkException("Not enough data from line '" + line + "'"));
            continue;
        }
        int strand;
        int start;
        int end;
        try {
            start = Integer.parseInt(sections[3]);
            end = Integer.parseInt(sections[4]);
            // End must always be later than start
            if (end < start) {
                int temp = start;
                start = end;
                end = temp;
            }
            if (sections.length >= 7) {
                if (sections[6].equals("+")) {
                    strand = Location.FORWARD;
                } else if (sections[6].equals("-")) {
                    strand = Location.REVERSE;
                } else {
                    strand = Location.UNKNOWN;
                }
            } else {
                strand = Location.UNKNOWN;
            }
        } catch (NumberFormatException e) {
            progressWarningReceived(new SeqMonkException("Location " + sections[3] + "-" + sections[4] + " was not an integer"));
            continue;
        }
        ChromosomeWithOffset c;
        try {
            c = genome.getChromosome(sections[0]);
        } catch (IllegalArgumentException e) {
            progressWarningReceived(new SeqMonkException("Couldn't find a chromosome called " + sections[0]));
            continue;
        }
        start = c.position(start);
        end = c.position(end);
        // We also don't allow readings which are beyond the end of the chromosome
        if (end > c.chromosome().length()) {
            int overrun = end - c.chromosome().length();
            progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")"));
            continue;
        }
        if (sections.length > 8 && sections[8].trim().length() > 0) {
            // Should check for escaped colons
            String[] attributes = sections[8].split(" *; *");
            // Make up a data structure of the attributes we have
            Hashtable<String, Vector<String>> keyValuePairs = new Hashtable<String, Vector<String>>();
            for (int a = 0; a < attributes.length; a++) {
                // Should check for escaped equals
                String[] keyValue = attributes[a].split("=", 2);
                // See if we didn't get split
                if (keyValue.length == 1) {
                    // This could be a GTF file which uses quoted values in space delimited fields
                    keyValue = attributes[a].split(" \"");
                    if (keyValue.length == 2) {
                        // We need to remove the quote from the end of the value
                        keyValue[1] = keyValue[1].substring(0, keyValue[1].length() - 1);
                    // System.out.println("Key='"+keyValue[0]+"' value='"+keyValue[1]+"'");
                    }
                }
                if (keyValue.length == 2) {
                    if (keyValuePairs.containsKey(keyValue[0])) {
                        keyValuePairs.get(keyValue[0]).add(keyValue[1]);
                    } else {
                        Vector<String> newVector = new Vector<String>();
                        newVector.add(keyValue[1]);
                        keyValuePairs.put(keyValue[0], newVector);
                    }
                } else {
                    progressWarningReceived(new SeqMonkException("No key value delimiter in " + attributes[a]));
                }
            }
            if (keyValuePairs.containsKey("Parent") && !sections[2].equals("mRNA")) {
                // We change exons to mRNA so we don't end up with spliced exon objects
                if (sections[2].equals("exon"))
                    sections[2] = "mRNA";
                String[] parents = keyValuePairs.get("Parent").elementAt(0).split(",");
                for (int p = 0; p < parents.length; p++) {
                    if (!groupedFeatures.containsKey(sections[2] + "_" + parents[p])) {
                        // Make a new feature to which we can add this
                        Feature feature = new Feature(featurePrefix + sections[2], c.chromosome().name());
                        groupedFeatures.put(sections[2] + "_" + parents[p], new FeatureGroup(feature, strand, feature.location()));
                        Enumeration<String> en = keyValuePairs.keys();
                        while (en.hasMoreElements()) {
                            String key = en.nextElement();
                            String[] values = keyValuePairs.get(key).toArray(new String[0]);
                            for (int v = 0; v < values.length; v++) {
                                feature.addAttribute(key, values[v]);
                            }
                        }
                    }
                    groupedFeatures.get(sections[2] + "_" + parents[p]).addSublocation(new Location(start, end, strand));
                }
            } else // parent feature
            if (keyValuePairs.containsKey("transcript_id")) {
                if (sections[2].equals("exon"))
                    sections[2] = "mRNA";
                if (!groupedFeatures.containsKey(sections[2] + "_" + keyValuePairs.get("transcript_id").elementAt(0))) {
                    Feature feature = new Feature(featurePrefix + sections[2], c.chromosome().name());
                    Enumeration<String> en = keyValuePairs.keys();
                    while (en.hasMoreElements()) {
                        String key = en.nextElement();
                        String[] values = keyValuePairs.get(key).toArray(new String[0]);
                        for (int v = 0; v < values.length; v++) {
                            feature.addAttribute(key, values[v]);
                        }
                    }
                    groupedFeatures.put(sections[2] + "_" + keyValuePairs.get("transcript_id").elementAt(0), new FeatureGroup(feature, strand, feature.location()));
                }
                groupedFeatures.get(sections[2] + "_" + keyValuePairs.get("transcript_id").elementAt(0)).addSublocation(new Location(start, end, strand));
            } else {
                // If we get here we're making a feature with attributes
                Feature feature = new Feature(featurePrefix + sections[2], c.chromosome().name());
                feature.setLocation(new Location(start, end, strand));
                Enumeration<String> en = keyValuePairs.keys();
                while (en.hasMoreElements()) {
                    String key = en.nextElement();
                    String[] values = keyValuePairs.get(key).toArray(new String[0]);
                    for (int v = 0; v < values.length; v++) {
                        feature.addAttribute(key, values[v]);
                    }
                }
                if (keyValuePairs.containsKey("ID")) {
                    // This is a feature which may end up having subfeatures
                    groupedFeatures.put(sections[2] + "_" + keyValuePairs.get("ID").elementAt(0), new FeatureGroup(feature, strand, feature.location()));
                // System.out.println("Making new entry for "+keyValuePairs.get("ID").elementAt(0));
                } else {
                    // We can just add this to the annotation collection
                    currentAnnotation.addFeature(feature);
                }
            }
        } else {
            // No group parameter to worry about
            Feature feature = new Feature(featurePrefix + sections[2], c.chromosome().name());
            feature.setLocation(new Location(start, end, strand));
            currentAnnotation.addFeature(feature);
        }
    }
    br.close();
    // Now go through the grouped features adding them to the annotation set
    Iterator<FeatureGroup> i = groupedFeatures.values().iterator();
    while (i.hasNext()) {
        Feature f = i.next().feature();
        currentAnnotation.addFeature(f);
    }
    return annotationSets.toArray(new AnnotationSet[0]);
}
Also used : ChromosomeWithOffset(uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset) AnnotationSet(uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) GZIPInputStream(java.util.zip.GZIPInputStream) FileReader(java.io.FileReader) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector) Enumeration(java.util.Enumeration) InputStreamReader(java.io.InputStreamReader) Hashtable(java.util.Hashtable) FileInputStream(java.io.FileInputStream) BufferedReader(java.io.BufferedReader) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)

Aggregations

SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)91 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)49 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)30 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)22 Vector (java.util.Vector)21 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)20 File (java.io.File)19 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)17 BufferedReader (java.io.BufferedReader)16 FileReader (java.io.FileReader)16 ChromosomeWithOffset (uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset)14 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)13 FileInputStream (java.io.FileInputStream)11 IOException (java.io.IOException)11 InputStreamReader (java.io.InputStreamReader)11 GZIPInputStream (java.util.zip.GZIPInputStream)11 HiCDataStore (uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)8 ProgressListener (uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)8 FileNotFoundException (java.io.FileNotFoundException)7 SequenceReadWithChromosome (uk.ac.babraham.SeqMonk.DataTypes.Sequence.SequenceReadWithChromosome)7