Search in sources :

Example 11 with PeakIdentity

use of net.sf.mzmine.datamodel.PeakIdentity in project mzmine2 by mzmine.

the class CameraSearchTask method groupPeaksByIsotope.

/**
 * Uses Isotope-field in PeakIdentity to group isotopes and build spectrum
 *
 * @param peakList PeakList object
 * @return new PeakList object
 */
private PeakList groupPeaksByIsotope(PeakList peakList) {
    // Create new feature list.
    final PeakList combinedPeakList = new SimplePeakList(peakList + " " + parameters.getParameter(CameraSearchParameters.SUFFIX).getValue(), peakList.getRawDataFiles());
    // Load previous applied methods.
    for (final PeakList.PeakListAppliedMethod method : peakList.getAppliedMethods()) {
        combinedPeakList.addDescriptionOfAppliedTask(method);
    }
    // Add task description to feature list.
    combinedPeakList.addDescriptionOfAppliedTask(new SimplePeakListAppliedMethod("Bioconductor CAMERA", parameters));
    // ------------------------------------------------
    // Find unique isotopes belonging to the same group
    // ------------------------------------------------
    Set<String> isotopeGroups = new HashSet<>();
    for (PeakListRow row : peakList.getRows()) {
        PeakIdentity identity = row.getPreferredPeakIdentity();
        if (identity == null)
            continue;
        String isotope = identity.getPropertyValue("Isotope");
        if (isotope == null)
            continue;
        String isotopeGroup = isotope.substring(1, isotope.indexOf("]"));
        if (isotopeGroup == null || isotopeGroup.length() == 0)
            continue;
        isotopeGroups.add(isotopeGroup);
    }
    List<PeakListRow> groupRows = new ArrayList<>();
    Set<String> groupNames = new HashSet<>();
    Map<Double, Double> spectrum = new HashMap<>();
    List<PeakListRow> newPeakListRows = new ArrayList<>();
    for (String isotopeGroup : isotopeGroups) {
        // -----------------------------------------
        // Find all peaks belonging to isotopeGroups
        // -----------------------------------------
        groupRows.clear();
        groupNames.clear();
        spectrum.clear();
        int minLength = Integer.MAX_VALUE;
        PeakListRow groupRow = null;
        for (PeakListRow row : peakList.getRows()) {
            PeakIdentity identity = row.getPreferredPeakIdentity();
            if (identity == null)
                continue;
            String isotope = identity.getPropertyValue("Isotope");
            if (isotope == null)
                continue;
            String isoGroup = isotope.substring(1, isotope.indexOf("]"));
            if (isoGroup == null)
                continue;
            if (isoGroup.equals(isotopeGroup)) {
                groupRows.add(row);
                groupNames.add(identity.getName());
                spectrum.put(row.getAverageMZ(), row.getAverageHeight());
                if (isoGroup.length() < minLength) {
                    minLength = isoGroup.length();
                    groupRow = row;
                }
            }
        }
        // Skip peaks that have different identity names (belong to different pcgroup)
        if (groupRow == null || groupNames.size() != 1)
            continue;
        if (groupRow == null)
            continue;
        PeakIdentity identity = groupRow.getPreferredPeakIdentity();
        if (identity == null)
            continue;
        DataPoint[] dataPoints = new DataPoint[spectrum.size()];
        int count = 0;
        for (Entry<Double, Double> e : spectrum.entrySet()) dataPoints[count++] = new SimpleDataPoint(e.getKey(), e.getValue());
        IsotopePattern pattern = new SimpleIsotopePattern(dataPoints, IsotopePatternStatus.PREDICTED, "Spectrum");
        groupRow.getBestPeak().setIsotopePattern(pattern);
        // combinedPeakList.addRow(groupRow);
        newPeakListRows.add(groupRow);
    }
    if (includeSingletons) {
        for (PeakListRow row : peakList.getRows()) {
            PeakIdentity identity = row.getPreferredPeakIdentity();
            if (identity == null)
                continue;
            String isotope = identity.getPropertyValue("Isotope");
            if (isotope == null || isotope.length() == 0) {
                DataPoint[] dataPoints = new DataPoint[1];
                dataPoints[0] = new SimpleDataPoint(row.getAverageMZ(), row.getAverageHeight());
                IsotopePattern pattern = new SimpleIsotopePattern(dataPoints, IsotopePatternStatus.PREDICTED, "Spectrum");
                row.getBestPeak().setIsotopePattern(pattern);
                newPeakListRows.add(row);
            }
        }
    }
    // ------------------------------------
    // Sort new peak rows by retention time
    // ------------------------------------
    Collections.sort(newPeakListRows, new Comparator<PeakListRow>() {

        @Override
        public int compare(PeakListRow row1, PeakListRow row2) {
            double retTime1 = row1.getAverageRT();
            double retTime2 = row2.getAverageRT();
            return Double.compare(retTime1, retTime2);
        }
    });
    for (PeakListRow row : newPeakListRows) combinedPeakList.addRow(row);
    return combinedPeakList;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) IsotopePattern(net.sf.mzmine.datamodel.IsotopePattern) SimpleIsotopePattern(net.sf.mzmine.datamodel.impl.SimpleIsotopePattern) SimplePeakListAppliedMethod(net.sf.mzmine.datamodel.impl.SimplePeakListAppliedMethod) DataPoint(net.sf.mzmine.datamodel.DataPoint) SimpleDataPoint(net.sf.mzmine.datamodel.impl.SimpleDataPoint) SimplePeakIdentity(net.sf.mzmine.datamodel.impl.SimplePeakIdentity) PeakIdentity(net.sf.mzmine.datamodel.PeakIdentity) SimpleDataPoint(net.sf.mzmine.datamodel.impl.SimpleDataPoint) PeakListRow(net.sf.mzmine.datamodel.PeakListRow) DataPoint(net.sf.mzmine.datamodel.DataPoint) SimpleDataPoint(net.sf.mzmine.datamodel.impl.SimpleDataPoint) SimplePeakList(net.sf.mzmine.datamodel.impl.SimplePeakList) SimpleIsotopePattern(net.sf.mzmine.datamodel.impl.SimpleIsotopePattern) SimplePeakList(net.sf.mzmine.datamodel.impl.SimplePeakList) PeakList(net.sf.mzmine.datamodel.PeakList) HashSet(java.util.HashSet)

Example 12 with PeakIdentity

use of net.sf.mzmine.datamodel.PeakIdentity in project mzmine2 by mzmine.

the class CSVExportTask method exportPeakList.

private void exportPeakList(PeakList peakList, FileWriter writer, File fileName) {
    NumberFormat mzForm = MZmineCore.getConfiguration().getMZFormat();
    RawDataFile[] rawDataFiles = peakList.getRawDataFiles();
    // Buffer for writing
    StringBuffer line = new StringBuffer();
    // Write column headers
    // Common elements
    int length = commonElements.length;
    String name;
    for (int i = 0; i < length; i++) {
        name = commonElements[i].toString();
        name = name.replace("Export ", "");
        name = escapeStringForCSV(name);
        line.append(name + fieldSeparator);
    }
    // peak Information
    Set<String> peakInformationFields = new HashSet<>();
    for (PeakListRow row : peakList.getRows()) {
        if (!filter.filter(row))
            continue;
        if (row.getPeakInformation() != null) {
            for (String key : row.getPeakInformation().getAllProperties().keySet()) {
                peakInformationFields.add(key);
            }
        }
    }
    if (exportAllPeakInfo)
        for (String field : peakInformationFields) line.append(field + fieldSeparator);
    // Data file elements
    length = dataFileElements.length;
    for (int df = 0; df < peakList.getNumberOfRawDataFiles(); df++) {
        for (int i = 0; i < length; i++) {
            name = rawDataFiles[df].getName();
            name = name + " " + dataFileElements[i].toString();
            name = escapeStringForCSV(name);
            line.append(name + fieldSeparator);
        }
    }
    line.append("\n");
    try {
        writer.write(line.toString());
    } catch (Exception e) {
        setStatus(TaskStatus.ERROR);
        setErrorMessage("Could not write to file " + fileName);
        return;
    }
    // Write data rows
    for (PeakListRow peakListRow : peakList.getRows()) {
        if (!filter.filter(peakListRow)) {
            processedRows++;
            continue;
        }
        // Cancel?
        if (isCanceled()) {
            return;
        }
        // Reset the buffer
        line.setLength(0);
        // Common elements
        length = commonElements.length;
        for (int i = 0; i < length; i++) {
            switch(commonElements[i]) {
                case ROW_ID:
                    line.append(peakListRow.getID() + fieldSeparator);
                    break;
                case ROW_MZ:
                    line.append(peakListRow.getAverageMZ() + fieldSeparator);
                    break;
                case ROW_RT:
                    line.append(peakListRow.getAverageRT() + fieldSeparator);
                    break;
                case ROW_IDENTITY:
                    // Identity elements
                    PeakIdentity peakId = peakListRow.getPreferredPeakIdentity();
                    if (peakId == null) {
                        line.append(fieldSeparator);
                        break;
                    }
                    String propertyValue = peakId.toString();
                    propertyValue = escapeStringForCSV(propertyValue);
                    line.append(propertyValue + fieldSeparator);
                    break;
                case ROW_IDENTITY_ALL:
                    // Identity elements
                    PeakIdentity[] peakIdentities = peakListRow.getPeakIdentities();
                    propertyValue = "";
                    for (int x = 0; x < peakIdentities.length; x++) {
                        if (x > 0)
                            propertyValue += idSeparator;
                        propertyValue += peakIdentities[x].toString();
                    }
                    propertyValue = escapeStringForCSV(propertyValue);
                    line.append(propertyValue + fieldSeparator);
                    break;
                case ROW_IDENTITY_DETAILS:
                    peakId = peakListRow.getPreferredPeakIdentity();
                    if (peakId == null) {
                        line.append(fieldSeparator);
                        break;
                    }
                    propertyValue = peakId.getDescription();
                    if (propertyValue != null)
                        propertyValue = propertyValue.replaceAll("\\n", ";");
                    propertyValue = escapeStringForCSV(propertyValue);
                    line.append(propertyValue + fieldSeparator);
                    break;
                case ROW_COMMENT:
                    String comment = escapeStringForCSV(peakListRow.getComment());
                    line.append(comment + fieldSeparator);
                    break;
                case ROW_PEAK_NUMBER:
                    int numDetected = 0;
                    for (Feature p : peakListRow.getPeaks()) {
                        if (p.getFeatureStatus() == FeatureStatus.DETECTED) {
                            numDetected++;
                        }
                    }
                    line.append(numDetected + fieldSeparator);
                    break;
            }
        }
        // peak Information
        if (exportAllPeakInfo) {
            if (peakListRow.getPeakInformation() != null) {
                Map<String, String> allPropertiesMap = peakListRow.getPeakInformation().getAllProperties();
                for (String key : peakInformationFields) {
                    String value = allPropertiesMap.get(key);
                    if (value == null)
                        value = "";
                    line.append(value + fieldSeparator);
                }
            }
        }
        // Data file elements
        length = dataFileElements.length;
        for (RawDataFile dataFile : rawDataFiles) {
            for (int i = 0; i < length; i++) {
                Feature peak = peakListRow.getPeak(dataFile);
                if (peak != null) {
                    switch(dataFileElements[i]) {
                        case PEAK_STATUS:
                            line.append(peak.getFeatureStatus() + fieldSeparator);
                            break;
                        case PEAK_NAME:
                            line.append(PeakUtils.peakToString(peak) + fieldSeparator);
                            break;
                        case PEAK_MZ:
                            line.append(peak.getMZ() + fieldSeparator);
                            break;
                        case PEAK_RT:
                            line.append(peak.getRT() + fieldSeparator);
                            break;
                        case PEAK_RT_START:
                            line.append(peak.getRawDataPointsRTRange().lowerEndpoint() + fieldSeparator);
                            break;
                        case PEAK_RT_END:
                            line.append(peak.getRawDataPointsRTRange().upperEndpoint() + fieldSeparator);
                            break;
                        case PEAK_DURATION:
                            line.append(RangeUtils.rangeLength(peak.getRawDataPointsRTRange()) + fieldSeparator);
                            break;
                        case PEAK_HEIGHT:
                            line.append(peak.getHeight() + fieldSeparator);
                            break;
                        case PEAK_AREA:
                            line.append(peak.getArea() + fieldSeparator);
                            break;
                        case PEAK_CHARGE:
                            line.append(peak.getCharge() + fieldSeparator);
                            break;
                        case PEAK_DATAPOINTS:
                            line.append(peak.getScanNumbers().length + fieldSeparator);
                            break;
                        case PEAK_FWHM:
                            line.append(peak.getFWHM() + fieldSeparator);
                            break;
                        case PEAK_TAILINGFACTOR:
                            line.append(peak.getTailingFactor() + fieldSeparator);
                            break;
                        case PEAK_ASYMMETRYFACTOR:
                            line.append(peak.getAsymmetryFactor() + fieldSeparator);
                            break;
                        case PEAK_MZMIN:
                            line.append(peak.getRawDataPointsMZRange().lowerEndpoint() + fieldSeparator);
                            break;
                        case PEAK_MZMAX:
                            line.append(peak.getRawDataPointsMZRange().upperEndpoint() + fieldSeparator);
                            break;
                    }
                } else {
                    switch(dataFileElements[i]) {
                        case PEAK_STATUS:
                            line.append(FeatureStatus.UNKNOWN + fieldSeparator);
                            break;
                        default:
                            line.append("0" + fieldSeparator);
                            break;
                    }
                }
            }
        }
        line.append("\n");
        try {
            writer.write(line.toString());
        } catch (Exception e) {
            setStatus(TaskStatus.ERROR);
            setErrorMessage("Could not write to file " + fileName);
            return;
        }
        processedRows++;
    }
}
Also used : Feature(net.sf.mzmine.datamodel.Feature) PeakIdentity(net.sf.mzmine.datamodel.PeakIdentity) PeakListRow(net.sf.mzmine.datamodel.PeakListRow) RawDataFile(net.sf.mzmine.datamodel.RawDataFile) NumberFormat(java.text.NumberFormat) HashSet(java.util.HashSet)

Example 13 with PeakIdentity

use of net.sf.mzmine.datamodel.PeakIdentity in project mzmine2 by mzmine.

the class SortSpectralDBIdentitiesTask method sortIdentities.

/**
 * Sort database matches by score
 *
 * @param row
 * @param filterMinSimilarity
 * @param minScore
 */
public static void sortIdentities(PeakListRow row, boolean filterMinSimilarity, double minScore) {
    // get all row identities
    PeakIdentity[] identities = row.getPeakIdentities();
    if (identities == null || identities.length == 0)
        return;
    // filter for SpectralDBPeakIdentity and write to map
    List<SpectralDBPeakIdentity> match = new ArrayList<>();
    for (PeakIdentity identity : identities) {
        if (identity instanceof SpectralDBPeakIdentity) {
            row.removePeakIdentity(identity);
            if (!filterMinSimilarity || ((SpectralDBPeakIdentity) identity).getSimilarity().getScore() >= minScore)
                match.add((SpectralDBPeakIdentity) identity);
        }
    }
    if (match.isEmpty())
        return;
    // reversed order: by similarity score
    match.sort((a, b) -> {
        return Double.compare(b.getSimilarity().getScore(), a.getSimilarity().getScore());
    });
    for (SpectralDBPeakIdentity entry : match) {
        row.addPeakIdentity(entry, false);
    }
    row.setPreferredPeakIdentity(match.get(0));
    // Notify the GUI about the change in the project
    MZmineCore.getProjectManager().getCurrentProject().notifyObjectChanged(row, false);
}
Also used : SpectralDBPeakIdentity(net.sf.mzmine.util.spectraldb.entry.SpectralDBPeakIdentity) PeakIdentity(net.sf.mzmine.datamodel.PeakIdentity) SpectralDBPeakIdentity(net.sf.mzmine.util.spectraldb.entry.SpectralDBPeakIdentity) ArrayList(java.util.ArrayList)

Example 14 with PeakIdentity

use of net.sf.mzmine.datamodel.PeakIdentity in project mzmine2 by mzmine.

the class HierarAlignerGCTask method run.

/**
 * @see Runnable#run()
 */
public void run() {
    // Check options validity
    if ((Math.abs(mzWeight) < EPSILON) && (Math.abs(rtWeight) < EPSILON)) {
        setStatus(TaskStatus.ERROR);
        setErrorMessage("Cannot run alignment, all the weight parameters are zero!");
        return;
    }
    setStatus(TaskStatus.PROCESSING);
    logger.info("Running join aligner");
    // TIME STUFF
    long startTime, endTime;
    float ms;
    // 
    if (DEBUG)
        startTime = System.currentTimeMillis();
    // MEMORY STUFF
    Runtime run_time = Runtime.getRuntime();
    Long prevTotal = 0l;
    Long prevFree = run_time.freeMemory();
    if (DEBUG)
        printMemoryUsage(logger, run_time, prevTotal, prevFree, "START TASK...");
    // - third for actual alignment
    for (int i = 0; i < peakLists.length; i++) {
        totalRows += peakLists[i].getNumberOfRows() * 3;
    }
    // Collect all data files
    Vector<RawDataFile> allDataFiles = new Vector<RawDataFile>();
    for (PeakList peakList : peakLists) {
        for (RawDataFile dataFile : peakList.getRawDataFiles()) {
            // Each data file can only have one column in aligned feature list
            if (allDataFiles.contains(dataFile)) {
                setStatus(TaskStatus.ERROR);
                setErrorMessage("Cannot run alignment, because file " + dataFile + " is present in multiple feature lists");
                return;
            }
            allDataFiles.add(dataFile);
        }
    }
    // Create a new aligned feature list
    alignedPeakList = new SimplePeakList(peakListName, allDataFiles.toArray(new RawDataFile[0]));
    if (DEBUG)
        printMemoryUsage(logger, run_time, prevTotal, prevFree, "COMPOUND DETECTED");
    /**
     * Alignment mapping *
     */
    // Iterate source feature lists
    Hashtable<SimpleFeature, Double> rtPeaksBackup = new Hashtable<SimpleFeature, Double>();
    Hashtable<PeakListRow, Object[]> infoRowsBackup = new Hashtable<PeakListRow, Object[]>();
    // Since clustering is now order independent, option removed!
    // Build comparison order
    ArrayList<Integer> orderIds = new ArrayList<Integer>();
    for (int i = 0; i < peakLists.length; ++i) {
        orderIds.add(i);
    }
    Integer[] newIds = orderIds.toArray(new Integer[orderIds.size()]);
    // 
    // TriangularMatrix distances = null;
    DistanceMatrix distancesGNF_Tri = null;
    DistanceMatrix distancesGNF_Tri_Bkp = null;
    int nbPeaks = 0;
    for (int i = 0; i < newIds.length; ++i) {
        PeakList peakList = peakLists[newIds[i]];
        nbPeaks += peakList.getNumberOfRows();
    }
    // If 'Hybrid' or no distance matrix: no need for a matrix
    if (CLUSTERER_TYPE == ClustererType.HYBRID || !saveRAMratherThanCPU_1) {
        // distances = new double[nbPeaks][nbPeaks];
        int nRowCount = nbPeaks;
        distancesGNF_Tri = new DistanceMatrixTriangular1D2D(nRowCount);
    }
    full_rows_list = new ArrayList<>();
    for (int i = 0; i < newIds.length; ++i) {
        PeakList peakList = peakLists[newIds[i]];
        PeakListRow[] allRows = peakList.getRows();
        for (int j = 0; j < allRows.length; ++j) {
            PeakListRow row = allRows[j];
            full_rows_list.add(row);
        }
    }
    RowVsRowDistanceProvider distProvider = new RowVsRowDistanceProvider(project, // rtAdjustementMapping,
    full_rows_list, mzWeight, rtWeight, // rtToleranceAfter,
    maximumScore);
    // If 'Hybrid' or no distance matrix: no need for a matrix
    if (CLUSTERER_TYPE == ClustererType.HYBRID || !saveRAMratherThanCPU_1) {
        for (int x = 0; x < nbPeaks; ++x) {
            for (int y = x; y < nbPeaks; ++y) {
                float dist = (float) distProvider.getRankedDistance(x, y, mzTolerance.getMzTolerance(), rtTolerance.getTolerance(), minScore);
                // if (CLUSTERER_TYPE == ClustererType.CLASSIC_OLD)
                // distances.set(x, y , dist);
                // else
                distancesGNF_Tri.setValue(x, y, dist);
            }
            processedRows++;
            if (DEBUG)
                logger.info("Treating lists: " + (Math.round(100 * processedRows / (double) nbPeaks)) + " %");
        }
    }
    if (DEBUG)
        printMemoryUsage(logger, run_time, prevTotal, prevFree, "DISTANCES COMPUTED");
    // ////
    // Math.abs(row.getBestPeak().getRT() -
    double max_dist = maximumScore;
    // k_row.getBestPeak().getRT()) /
    // ((RangeUtils.rangeLength(rtRange) /
    // 2.0));
    // String newickCluster;
    List<List<Integer>> gnfClusters = null;
    // ////
    boolean do_verbose = true;
    boolean do_cluster = true;
    boolean do_print = (exportDendrogramAsTxt);
    boolean do_data = false;
    org.gnf.clustering.Node[] arNodes = null;
    int nRowCount = full_rows_list.size();
    String[] rowNames = null;
    if (do_print) {
        rowNames = new String[nRowCount];
        for (int i = 0; i < nRowCount; i++) {
            // rowNames[i] = "ID_" + i + "_" +
            // full_rows_list.get(i).getID();
            Feature peak = full_rows_list.get(i).getBestPeak();
            double rt = peak.getRT();
            int end = peak.getDataFile().getName().indexOf(" ");
            String short_fname = peak.getDataFile().getName().substring(0, end);
            rowNames[i] = "@" + rtFormat.format(rt) + "^[" + short_fname + "]";
        }
    }
    String outputPrefix = null;
    if (CLUSTERER_TYPE == ClustererType.CLASSIC) {
        // Pure Hierar!
        outputPrefix = "hierar_0";
        throw new IllegalStateException("'" + ClustererType.CLASSIC.toString() + "' algorithm not yet implemented!");
    } else if (CLUSTERER_TYPE == ClustererType.CACHED) {
        // TODO: ...!
        if (DEBUG_2)
            logger.info(distancesGNF_Tri.toString());
        if (saveRAMratherThanCPU_2) {
            // Requires: distances values will be
            // recomputed on demand during
            // "getValidatedClusters_3()"
            // No duplicate backup storage!
            distancesGNF_Tri_Bkp = null;
        } else {
            // Otherwise, backing up the distance matrix (matrix being
            // deeply changed during "clusterDM()", then no more
            // exploitable)
            distancesGNF_Tri_Bkp = new DistanceMatrixTriangular1D2D(distancesGNF_Tri);
            if (DEBUG)
                printMemoryUsage(logger, run_time, prevTotal, prevFree, "GNF CLUSTERER BACKUP MATRIX");
        }
        if (DEBUG)
            logger.info("Clustering...");
        if (distancesGNF_Tri != null)
            arNodes = org.gnf.clustering.sequentialcache.SequentialCacheClustering.clusterDM(distancesGNF_Tri, linkageStartegyType, null, nRowCount);
        distancesGNF_Tri = null;
        System.gc();
        if (DEBUG)
            printMemoryUsage(logger, run_time, prevTotal, prevFree, "GNF CLUSTERER DONE");
        if (DEBUG_2)
            logger.info(distancesGNF_Tri.toString());
        if (DEBUG_2)
            for (int i = 0; i < arNodes.length; i++) {
                logger.info("Node " + i + ": " + arNodes[i]);
            }
        // TODO: Use usual interfacing ...
        // ClusteringResult<org.gnf.clustering.Node> clust_res = new
        // ClusteringResult<>(
        // Arrays.asList(arNodes), null, 0, null);
        outputPrefix = "hierar_1";
    } else if (CLUSTERER_TYPE == ClustererType.HYBRID) {
        throw new IllegalStateException("'" + ClustererType.HYBRID.toString() + "' algorithm not yet implemented!");
    }
    // Sort Nodes by correlation score (Required in
    // 'getValidatedClusters_3')
    int[] rowOrder = new int[nRowCount];
    if (DEBUG)
        logger.info("Sorting tree nodes...");
    org.gnf.clustering.Utils.NodeSort(arNodes, nRowCount - 2, 0, rowOrder);
    if (do_cluster) {
        gnfClusters = getValidatedClusters_3(arNodes, 0.0f, newIds.length, max_dist, distancesGNF_Tri_Bkp, distProvider);
        // -- Print
        if (DEBUG_2 && do_verbose)
            for (int i = 0; i < gnfClusters.size(); i++) {
                List<Integer> cl = gnfClusters.get(i);
                String str = "";
                for (int j = 0; j < cl.size(); j++) {
                    int r = cl.get(j);
                    str += cl.get(j) + "^(" + full_rows_list.get(r).getID() + ", " + full_rows_list.get(r).getAverageRT() + ")" + " ";
                }
                logger.info(str);
            }
    }
    // File output
    int ext_pos = dendrogramTxtFilename.getAbsolutePath().lastIndexOf(".");
    outputPrefix = dendrogramTxtFilename.getAbsolutePath().substring(0, ext_pos);
    String outGtr = outputPrefix + ".gtr";
    String outCdt = outputPrefix + ".cdt";
    if (DEBUG)
        logger.info("Writing output to file...");
    int nColCount = 1;
    String[] colNames = new String[nColCount];
    colNames[nColCount - 1] = "Id";
    String sep = "\t";
    if (do_print) {
        try {
            float[] arFloats = new float[nRowCount];
            for (int i = 0; i < arFloats.length; i++) {
                arFloats[i] = i / 2.0f;
            }
            DataSource source = (do_data) ? new FloatSource1D(arFloats, nRowCount, nColCount) : null;
            /* org.gnf.clustering.Utils. */
            HierarAlignerGCTask.GenerateCDT(outCdt, source, /* null */
            nRowCount, nColCount, sep, rowNames, colNames, rowOrder);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        org.gnf.clustering.Utils.WriteTreeToFile(outGtr, nRowCount - 1, arNodes, true);
        if (DEBUG)
            printMemoryUsage(logger, run_time, prevTotal, prevFree, "GNF CLUSTERER FILES PRINTED");
    }
    // //// Arrange row clustered list with method 0,1,2
    List<List<PeakListRow>> clustersList = new ArrayList<>();
    // Build feature list row clusters
    for (List<Integer> cl : gnfClusters) {
        List<PeakListRow> rows_cluster = new ArrayList<>();
        for (int i = 0; i < cl.size(); i++) {
            rows_cluster.add(full_rows_list.get(cl.get(i)));
        }
        clustersList.add(rows_cluster);
        // 
        processedRows += rows_cluster.size();
    }
    if (DEBUG)
        printMemoryUsage(logger, run_time, prevTotal, prevFree, "GNF CLUSTERER CLUSTER_LIST");
    // Fill alignment table: One row per cluster
    for (List<PeakListRow> cluster : clustersList) {
        if (isCanceled())
            return;
        PeakListRow targetRow = new SimplePeakListRow(newRowID);
        newRowID++;
        alignedPeakList.addRow(targetRow);
        // 
        infoRowsBackup.put(targetRow, new Object[] { new HashMap<RawDataFile, Double[]>(), new HashMap<RawDataFile, PeakIdentity>(), new HashMap<RawDataFile, Double>() });
        for (PeakListRow row : cluster) {
            // Add all non-existing identities from the original row to the
            // aligned row
            // Set the preferred identity
            targetRow.setPreferredPeakIdentity(row.getPreferredPeakIdentity());
            // for (RawDataFile file : row.getRawDataFiles()) {
            for (RawDataFile file : alignedPeakList.getRawDataFiles()) {
                if (Arrays.asList(row.getRawDataFiles()).contains(file)) {
                    Feature originalPeak = row.getPeak(file);
                    if (originalPeak != null) {
                        targetRow.addPeak(file, originalPeak);
                    } else {
                        setStatus(TaskStatus.ERROR);
                        setErrorMessage("Cannot run alignment, no originalPeak");
                        return;
                    }
                }
            }
            // present
            for (PeakIdentity identity : row.getPeakIdentities()) {
                PeakIdentity clonedIdentity = (PeakIdentity) identity.clone();
                if (!PeakUtils.containsIdentity(targetRow, clonedIdentity))
                    targetRow.addPeakIdentity(clonedIdentity, false);
            }
        // processedRows++;
        }
    }
    // of the "targetRow.update()" used down there
    for (SimpleFeature peak : rtPeaksBackup.keySet()) {
        peak.setRT((double) rtPeaksBackup.get(peak));
    }
    /**
     * Post-processing... *
     */
    // Build reference RDFs index: We need an ordered reference here, to be
    // able to parse
    // correctly while reading back stored info
    RawDataFile[] rdf_sorted = alignedPeakList.getRawDataFiles().clone();
    Arrays.sort(rdf_sorted, new RawDataFileSorter(SortingDirection.Ascending));
    // Process
    for (PeakListRow targetRow : infoRowsBackup.keySet()) {
        if (isCanceled())
            return;
        // Refresh averaged RTs...
        ((SimplePeakListRow) targetRow).update();
    }
    // 
    if (DEBUG) {
        endTime = System.currentTimeMillis();
        ms = (endTime - startTime);
        logger.info("## >> Whole JoinAlignerGCTask processing took " + Float.toString(ms) + " ms.");
    }
    // ----------------------------------------------------------------------
    // Add new aligned feature list to the project
    this.project.addPeakList(alignedPeakList);
    if (DEBUG) {
        for (RawDataFile rdf : alignedPeakList.getRawDataFiles()) logger.info("RDF: " + rdf);
    }
    // Add task description to peakList
    alignedPeakList.addDescriptionOfAppliedTask(new SimplePeakListAppliedMethod(HierarAlignerGCTask.TASK_NAME, parameters));
    logger.info("Finished join aligner GC");
    setStatus(TaskStatus.FINISHED);
}
Also used : ArrayList(java.util.ArrayList) SimplePeakListAppliedMethod(net.sf.mzmine.datamodel.impl.SimplePeakListAppliedMethod) Feature(net.sf.mzmine.datamodel.Feature) SimpleFeature(net.sf.mzmine.datamodel.impl.SimpleFeature) SimplePeakListRow(net.sf.mzmine.datamodel.impl.SimplePeakListRow) SimplePeakList(net.sf.mzmine.datamodel.impl.SimplePeakList) PeakList(net.sf.mzmine.datamodel.PeakList) ArrayList(java.util.ArrayList) List(java.util.List) SimplePeakList(net.sf.mzmine.datamodel.impl.SimplePeakList) DistanceMatrix(org.gnf.clustering.DistanceMatrix) Vector(java.util.Vector) SimpleFeature(net.sf.mzmine.datamodel.impl.SimpleFeature) DataSource(org.gnf.clustering.DataSource) PeakIdentity(net.sf.mzmine.datamodel.PeakIdentity) RawDataFile(net.sf.mzmine.datamodel.RawDataFile) SimplePeakListRow(net.sf.mzmine.datamodel.impl.SimplePeakListRow) PeakListRow(net.sf.mzmine.datamodel.PeakListRow) FloatSource1D(org.gnf.clustering.FloatSource1D) Hashtable(java.util.Hashtable) IOException(java.io.IOException) SimplePeakList(net.sf.mzmine.datamodel.impl.SimplePeakList) PeakList(net.sf.mzmine.datamodel.PeakList)

Example 15 with PeakIdentity

use of net.sf.mzmine.datamodel.PeakIdentity in project mzmine2 by mzmine.

the class RowsFilterTask method filterPeakListRows.

/**
 * Filter the feature list rows.
 *
 * @param peakList feature list to filter.
 * @return a new feature list with rows of the original feature list that pass the filtering.
 */
private PeakList filterPeakListRows(final PeakList peakList) {
    // Create new feature list.
    final PeakList newPeakList = new SimplePeakList(peakList.getName() + ' ' + parameters.getParameter(RowsFilterParameters.SUFFIX).getValue(), peakList.getRawDataFiles());
    // Copy previous applied methods.
    for (final PeakListAppliedMethod method : peakList.getAppliedMethods()) {
        newPeakList.addDescriptionOfAppliedTask(method);
    }
    // Add task description to peakList.
    newPeakList.addDescriptionOfAppliedTask(new SimplePeakListAppliedMethod(getTaskDescription(), parameters));
    // Get parameters.
    final boolean onlyIdentified = parameters.getParameter(RowsFilterParameters.HAS_IDENTITIES).getValue();
    final boolean filterByIdentityText = parameters.getParameter(RowsFilterParameters.IDENTITY_TEXT).getValue();
    final boolean filterByCommentText = parameters.getParameter(RowsFilterParameters.COMMENT_TEXT).getValue();
    final String groupingParameter = (String) parameters.getParameter(RowsFilterParameters.GROUPSPARAMETER).getValue();
    final boolean filterByMinPeakCount = parameters.getParameter(RowsFilterParameters.MIN_PEAK_COUNT).getValue();
    final boolean filterByMinIsotopePatternSize = parameters.getParameter(RowsFilterParameters.MIN_ISOTOPE_PATTERN_COUNT).getValue();
    final boolean filterByMzRange = parameters.getParameter(RowsFilterParameters.MZ_RANGE).getValue();
    final boolean filterByRtRange = parameters.getParameter(RowsFilterParameters.RT_RANGE).getValue();
    final boolean filterByDuration = parameters.getParameter(RowsFilterParameters.PEAK_DURATION).getValue();
    final boolean filterByFWHM = parameters.getParameter(RowsFilterParameters.FWHM).getValue();
    final boolean filterByCharge = parameters.getParameter(RowsFilterParameters.CHARGE).getValue();
    final boolean filterByKMD = parameters.getParameter(RowsFilterParameters.KENDRICK_MASS_DEFECT).getValue();
    final boolean filterByMS2 = parameters.getParameter(RowsFilterParameters.MS2_Filter).getValue();
    final String removeRowString = parameters.getParameter(RowsFilterParameters.REMOVE_ROW).getValue();
    Double minCount = parameters.getParameter(RowsFilterParameters.MIN_PEAK_COUNT).getEmbeddedParameter().getValue();
    final boolean renumber = parameters.getParameter(RowsFilterParameters.Reset_ID).getValue();
    int rowsCount = 0;
    boolean removeRow = false;
    if (removeRowString.equals(RowsFilterParameters.removeRowChoices[0]))
        removeRow = false;
    else
        removeRow = true;
    // Keep rows that don't match any criteria. Keep by default.
    boolean filterRowCriteriaFailed = false;
    // Handle < 1 values for minPeakCount
    if ((minCount == null) || (minCount < 1))
        minCount = 1.0;
    // Round value down to nearest hole number
    int intMinCount = minCount.intValue();
    // Filter rows.
    final PeakListRow[] rows = peakList.getRows();
    totalRows = rows.length;
    for (processedRows = 0; !isCanceled() && processedRows < totalRows; processedRows++) {
        filterRowCriteriaFailed = false;
        final PeakListRow row = rows[processedRows];
        final int peakCount = getPeakCount(row, groupingParameter);
        // Check number of peaks.
        if (filterByMinPeakCount) {
            if (peakCount < intMinCount)
                filterRowCriteriaFailed = true;
        }
        // Check identities.
        if (onlyIdentified) {
            if (row.getPreferredPeakIdentity() == null)
                filterRowCriteriaFailed = true;
        }
        // Check average m/z.
        if (filterByMzRange) {
            final Range<Double> mzRange = parameters.getParameter(RowsFilterParameters.MZ_RANGE).getEmbeddedParameter().getValue();
            if (!mzRange.contains(row.getAverageMZ()))
                filterRowCriteriaFailed = true;
        }
        // Check average RT.
        if (filterByRtRange) {
            final Range<Double> rtRange = parameters.getParameter(RowsFilterParameters.RT_RANGE).getEmbeddedParameter().getValue();
            if (!rtRange.contains(row.getAverageRT()))
                filterRowCriteriaFailed = true;
        }
        // Search peak identity text.
        if (filterByIdentityText) {
            if (row.getPreferredPeakIdentity() == null)
                filterRowCriteriaFailed = true;
            if (row.getPreferredPeakIdentity() != null) {
                final String searchText = parameters.getParameter(RowsFilterParameters.IDENTITY_TEXT).getEmbeddedParameter().getValue().toLowerCase().trim();
                int numFailedIdentities = 0;
                PeakIdentity[] identities = row.getPeakIdentities();
                for (int index = 0; !isCanceled() && index < identities.length; index++) {
                    String rowText = identities[index].getName().toLowerCase().trim();
                    if (!rowText.contains(searchText))
                        numFailedIdentities += 1;
                }
                if (numFailedIdentities == identities.length)
                    filterRowCriteriaFailed = true;
            }
        }
        // Search peak comment text.
        if (filterByCommentText) {
            if (row.getComment() == null)
                filterRowCriteriaFailed = true;
            if (row.getComment() != null) {
                final String searchText = parameters.getParameter(RowsFilterParameters.COMMENT_TEXT).getEmbeddedParameter().getValue().toLowerCase().trim();
                final String rowText = row.getComment().toLowerCase().trim();
                if (!rowText.contains(searchText))
                    filterRowCriteriaFailed = true;
            }
        }
        // Calculate average duration and isotope pattern count.
        int maxIsotopePatternSizeOnRow = 1;
        double avgDuration = 0.0;
        final Feature[] peaks = row.getPeaks();
        for (final Feature p : peaks) {
            final IsotopePattern pattern = p.getIsotopePattern();
            if (pattern != null && maxIsotopePatternSizeOnRow < pattern.getNumberOfDataPoints()) {
                maxIsotopePatternSizeOnRow = pattern.getNumberOfDataPoints();
            }
            avgDuration += RangeUtils.rangeLength(p.getRawDataPointsRTRange());
        }
        // Check isotope pattern count.
        if (filterByMinIsotopePatternSize) {
            final int minIsotopePatternSize = parameters.getParameter(RowsFilterParameters.MIN_ISOTOPE_PATTERN_COUNT).getEmbeddedParameter().getValue();
            if (maxIsotopePatternSizeOnRow < minIsotopePatternSize)
                filterRowCriteriaFailed = true;
        }
        // Check average duration.
        avgDuration /= peakCount;
        if (filterByDuration) {
            final Range<Double> durationRange = parameters.getParameter(RowsFilterParameters.PEAK_DURATION).getEmbeddedParameter().getValue();
            if (!durationRange.contains(avgDuration))
                filterRowCriteriaFailed = true;
        }
        // Filter by FWHM range
        if (filterByFWHM) {
            final Range<Double> FWHMRange = parameters.getParameter(RowsFilterParameters.FWHM).getEmbeddedParameter().getValue();
            // If any of the peaks fail the FWHM criteria,
            Double FWHM_value = row.getBestPeak().getFWHM();
            if (FWHM_value != null && !FWHMRange.contains(FWHM_value))
                filterRowCriteriaFailed = true;
        }
        // Filter by charge range
        if (filterByCharge) {
            final Range<Integer> chargeRange = parameters.getParameter(RowsFilterParameters.CHARGE).getEmbeddedParameter().getValue();
            int charge = row.getBestPeak().getCharge();
            if (charge == 0 || !chargeRange.contains(charge))
                filterRowCriteriaFailed = true;
        }
        // Filter by KMD or RKM range
        if (filterByKMD) {
            // get embedded parameters
            final Range<Double> rangeKMD = parameters.getParameter(RowsFilterParameters.KENDRICK_MASS_DEFECT).getEmbeddedParameters().getParameter(KendrickMassDefectFilterParameters.kendrickMassDefectRange).getValue();
            final String kendrickMassBase = parameters.getParameter(RowsFilterParameters.KENDRICK_MASS_DEFECT).getEmbeddedParameters().getParameter(KendrickMassDefectFilterParameters.kendrickMassBase).getValue();
            final double shift = parameters.getParameter(RowsFilterParameters.KENDRICK_MASS_DEFECT).getEmbeddedParameters().getParameter(KendrickMassDefectFilterParameters.shift).getValue();
            final int charge = parameters.getParameter(RowsFilterParameters.KENDRICK_MASS_DEFECT).getEmbeddedParameters().getParameter(KendrickMassDefectFilterParameters.charge).getValue();
            final int divisor = parameters.getParameter(RowsFilterParameters.KENDRICK_MASS_DEFECT).getEmbeddedParameters().getParameter(KendrickMassDefectFilterParameters.divisor).getValue();
            final boolean useRemainderOfKendrickMass = parameters.getParameter(RowsFilterParameters.KENDRICK_MASS_DEFECT).getEmbeddedParameters().getParameter(KendrickMassDefectFilterParameters.useRemainderOfKendrickMass).getValue();
            // get m/z
            Double valueMZ = row.getBestPeak().getMZ();
            // calc exact mass of Kendrick mass base
            double exactMassFormula = FormulaUtils.calculateExactMass(kendrickMassBase);
            // calc exact mass of Kendrick mass factor
            double kendrickMassFactor = Math.round(exactMassFormula / divisor) / (exactMassFormula / divisor);
            double defectOrRemainder = 0.0;
            if (!useRemainderOfKendrickMass) {
                // calc Kendrick mass defect
                defectOrRemainder = Math.ceil(charge * (valueMZ * kendrickMassFactor)) - charge * (valueMZ * kendrickMassFactor);
            } else {
                // calc Kendrick mass remainder
                defectOrRemainder = (charge * (divisor - Math.round(FormulaUtils.calculateExactMass(kendrickMassBase))) * valueMZ) / // 
                FormulaUtils.calculateExactMass(kendrickMassBase) - Math.floor((charge * (divisor - Math.round(FormulaUtils.calculateExactMass(kendrickMassBase))) * valueMZ) / FormulaUtils.calculateExactMass(kendrickMassBase));
            }
            // shift Kendrick mass defect or remainder of Kendrick mass
            double kendrickMassDefectShifted = defectOrRemainder + shift - Math.floor(defectOrRemainder + shift);
            // check if shifted Kendrick mass defect or remainder of Kendrick mass is in range
            if (!rangeKMD.contains(kendrickMassDefectShifted))
                filterRowCriteriaFailed = true;
        }
        // Check ms2 filter .
        if (filterByMS2) {
            // iterates the peaks
            int failCounts = 0;
            for (int i = 0; i < peakCount; i++) {
                if (row.getPeaks()[i].getMostIntenseFragmentScanNumber() < 1) {
                    failCounts++;
                // filterRowCriteriaFailed = true;
                // break;
                }
            }
            if (failCounts == peakCount) {
                filterRowCriteriaFailed = true;
            }
        }
        if (!filterRowCriteriaFailed && !removeRow) {
            // Only add the row if none of the criteria have failed.
            rowsCount++;
            PeakListRow resetRow = copyPeakRow(row);
            if (renumber) {
                resetRow.setID(rowsCount);
            }
            newPeakList.addRow(resetRow);
        }
        if (filterRowCriteriaFailed && removeRow) {
            // Only remove rows that match *all* of the criteria, so add
            // rows that fail any of the criteria.
            rowsCount++;
            PeakListRow resetRow = copyPeakRow(row);
            if (renumber) {
                resetRow.setID(rowsCount);
            }
            newPeakList.addRow(resetRow);
        }
    }
    return newPeakList;
}
Also used : SimplePeakListAppliedMethod(net.sf.mzmine.datamodel.impl.SimplePeakListAppliedMethod) PeakListAppliedMethod(net.sf.mzmine.datamodel.PeakList.PeakListAppliedMethod) IsotopePattern(net.sf.mzmine.datamodel.IsotopePattern) SimplePeakListAppliedMethod(net.sf.mzmine.datamodel.impl.SimplePeakListAppliedMethod) Feature(net.sf.mzmine.datamodel.Feature) SimpleFeature(net.sf.mzmine.datamodel.impl.SimpleFeature) PeakIdentity(net.sf.mzmine.datamodel.PeakIdentity) SimplePeakListRow(net.sf.mzmine.datamodel.impl.SimplePeakListRow) PeakListRow(net.sf.mzmine.datamodel.PeakListRow) SimplePeakList(net.sf.mzmine.datamodel.impl.SimplePeakList) SimplePeakList(net.sf.mzmine.datamodel.impl.SimplePeakList) PeakList(net.sf.mzmine.datamodel.PeakList)

Aggregations

PeakIdentity (net.sf.mzmine.datamodel.PeakIdentity)32 PeakListRow (net.sf.mzmine.datamodel.PeakListRow)19 Feature (net.sf.mzmine.datamodel.Feature)14 ArrayList (java.util.ArrayList)9 RawDataFile (net.sf.mzmine.datamodel.RawDataFile)9 SimplePeakListRow (net.sf.mzmine.datamodel.impl.SimplePeakListRow)9 PeakList (net.sf.mzmine.datamodel.PeakList)8 HashMap (java.util.HashMap)7 SimplePeakIdentity (net.sf.mzmine.datamodel.impl.SimplePeakIdentity)7 SimplePeakList (net.sf.mzmine.datamodel.impl.SimplePeakList)7 DataPoint (net.sf.mzmine.datamodel.DataPoint)6 SimplePeakListAppliedMethod (net.sf.mzmine.datamodel.impl.SimplePeakListAppliedMethod)6 IOException (java.io.IOException)5 List (java.util.List)5 IsotopePattern (net.sf.mzmine.datamodel.IsotopePattern)5 File (java.io.File)4 HashSet (java.util.HashSet)4 Scan (net.sf.mzmine.datamodel.Scan)4 SimpleDataPoint (net.sf.mzmine.datamodel.impl.SimpleDataPoint)4 Range (com.google.common.collect.Range)3