use of net.sf.mzmine.datamodel.PeakListRow in project mzmine2 by mzmine.
the class ADAP3AlignerTask method alignPeaks.
private PeakList alignPeaks() {
// Collect all data files
List<RawDataFile> allDataFiles = new ArrayList<>(peakLists.length);
for (final PeakList peakList : peakLists) {
RawDataFile[] dataFiles = peakList.getRawDataFiles();
if (dataFiles.length != 1)
throw new IllegalArgumentException("Found more then one data " + "file in some of the peaks lists");
allDataFiles.add(dataFiles[0]);
}
for (int i = 0; i < peakLists.length; ++i) {
PeakList peakList = peakLists[i];
Sample sample = new Sample(i);
for (final PeakListRow row : peakList.getRows()) {
Component component = getComponent(row);
if (component != null)
sample.addComponent(component);
}
alignment.addSample(sample);
}
process();
// Create new feature list
final PeakList alignedPeakList = new SimplePeakList(peakListName, allDataFiles.toArray(new RawDataFile[0]));
int rowID = 0;
List<ReferenceComponent> alignedComponents = alignment.getComponents();
Collections.sort(alignedComponents);
for (final ReferenceComponent referenceComponent : alignedComponents) {
SimplePeakListRow newRow = new SimplePeakListRow(++rowID);
for (int i = 0; i < referenceComponent.size(); ++i) {
Component component = referenceComponent.getComponent(i);
Peak peak = component.getBestPeak();
peak.getInfo().mzValue(component.getMZ());
PeakListRow row = findPeakListRow(referenceComponent.getSampleID(i), peak.getInfo().peakID);
if (row == null)
throw new IllegalStateException(String.format("Cannot find a feature list row for fileId = %d and peakId = %d", referenceComponent.getSampleID(), peak.getInfo().peakID));
RawDataFile file = row.getRawDataFiles()[0];
// Create a new MZmine feature
Feature feature = ADAPInterface.peakToFeature(file, peak);
// Add spectrum as an isotopic pattern
DataPoint[] spectrum = component.getSpectrum().entrySet().stream().map(e -> new SimpleDataPoint(e.getKey(), e.getValue())).toArray(DataPoint[]::new);
feature.setIsotopePattern(new SimpleIsotopePattern(spectrum, IsotopePattern.IsotopePatternStatus.PREDICTED, "Spectrum"));
newRow.addPeak(file, feature);
}
// Save alignment score
SimplePeakInformation peakInformation = (SimplePeakInformation) newRow.getPeakInformation();
if (peakInformation == null)
peakInformation = new SimplePeakInformation();
peakInformation.addProperty("Alignment score", Double.toString(referenceComponent.getScore()));
newRow.setPeakInformation(peakInformation);
alignedPeakList.addRow(newRow);
}
return alignedPeakList;
}
use of net.sf.mzmine.datamodel.PeakListRow in project mzmine2 by mzmine.
the class HierarAlignerGCTask method run.
/**
* @see Runnable#run()
*/
public void run() {
// Check options validity
if ((Math.abs(mzWeight) < EPSILON) && (Math.abs(rtWeight) < EPSILON)) {
setStatus(TaskStatus.ERROR);
setErrorMessage("Cannot run alignment, all the weight parameters are zero!");
return;
}
setStatus(TaskStatus.PROCESSING);
logger.info("Running join aligner");
// TIME STUFF
long startTime, endTime;
float ms;
//
if (DEBUG)
startTime = System.currentTimeMillis();
// MEMORY STUFF
Runtime run_time = Runtime.getRuntime();
Long prevTotal = 0l;
Long prevFree = run_time.freeMemory();
if (DEBUG)
printMemoryUsage(logger, run_time, prevTotal, prevFree, "START TASK...");
// - third for actual alignment
for (int i = 0; i < peakLists.length; i++) {
totalRows += peakLists[i].getNumberOfRows() * 3;
}
// Collect all data files
Vector<RawDataFile> allDataFiles = new Vector<RawDataFile>();
for (PeakList peakList : peakLists) {
for (RawDataFile dataFile : peakList.getRawDataFiles()) {
// Each data file can only have one column in aligned feature list
if (allDataFiles.contains(dataFile)) {
setStatus(TaskStatus.ERROR);
setErrorMessage("Cannot run alignment, because file " + dataFile + " is present in multiple feature lists");
return;
}
allDataFiles.add(dataFile);
}
}
// Create a new aligned feature list
alignedPeakList = new SimplePeakList(peakListName, allDataFiles.toArray(new RawDataFile[0]));
if (DEBUG)
printMemoryUsage(logger, run_time, prevTotal, prevFree, "COMPOUND DETECTED");
/**
* Alignment mapping *
*/
// Iterate source feature lists
Hashtable<SimpleFeature, Double> rtPeaksBackup = new Hashtable<SimpleFeature, Double>();
Hashtable<PeakListRow, Object[]> infoRowsBackup = new Hashtable<PeakListRow, Object[]>();
// Since clustering is now order independent, option removed!
// Build comparison order
ArrayList<Integer> orderIds = new ArrayList<Integer>();
for (int i = 0; i < peakLists.length; ++i) {
orderIds.add(i);
}
Integer[] newIds = orderIds.toArray(new Integer[orderIds.size()]);
//
// TriangularMatrix distances = null;
DistanceMatrix distancesGNF_Tri = null;
DistanceMatrix distancesGNF_Tri_Bkp = null;
int nbPeaks = 0;
for (int i = 0; i < newIds.length; ++i) {
PeakList peakList = peakLists[newIds[i]];
nbPeaks += peakList.getNumberOfRows();
}
// If 'Hybrid' or no distance matrix: no need for a matrix
if (CLUSTERER_TYPE == ClustererType.HYBRID || !saveRAMratherThanCPU_1) {
// distances = new double[nbPeaks][nbPeaks];
int nRowCount = nbPeaks;
distancesGNF_Tri = new DistanceMatrixTriangular1D2D(nRowCount);
}
full_rows_list = new ArrayList<>();
for (int i = 0; i < newIds.length; ++i) {
PeakList peakList = peakLists[newIds[i]];
PeakListRow[] allRows = peakList.getRows();
for (int j = 0; j < allRows.length; ++j) {
PeakListRow row = allRows[j];
full_rows_list.add(row);
}
}
RowVsRowDistanceProvider distProvider = new RowVsRowDistanceProvider(project, // rtAdjustementMapping,
full_rows_list, mzWeight, rtWeight, // rtToleranceAfter,
maximumScore);
// If 'Hybrid' or no distance matrix: no need for a matrix
if (CLUSTERER_TYPE == ClustererType.HYBRID || !saveRAMratherThanCPU_1) {
for (int x = 0; x < nbPeaks; ++x) {
for (int y = x; y < nbPeaks; ++y) {
float dist = (float) distProvider.getRankedDistance(x, y, mzTolerance.getMzTolerance(), rtTolerance.getTolerance(), minScore);
// if (CLUSTERER_TYPE == ClustererType.CLASSIC_OLD)
// distances.set(x, y , dist);
// else
distancesGNF_Tri.setValue(x, y, dist);
}
processedRows++;
if (DEBUG)
logger.info("Treating lists: " + (Math.round(100 * processedRows / (double) nbPeaks)) + " %");
}
}
if (DEBUG)
printMemoryUsage(logger, run_time, prevTotal, prevFree, "DISTANCES COMPUTED");
// ////
// Math.abs(row.getBestPeak().getRT() -
double max_dist = maximumScore;
// k_row.getBestPeak().getRT()) /
// ((RangeUtils.rangeLength(rtRange) /
// 2.0));
// String newickCluster;
List<List<Integer>> gnfClusters = null;
// ////
boolean do_verbose = true;
boolean do_cluster = true;
boolean do_print = (exportDendrogramAsTxt);
boolean do_data = false;
org.gnf.clustering.Node[] arNodes = null;
int nRowCount = full_rows_list.size();
String[] rowNames = null;
if (do_print) {
rowNames = new String[nRowCount];
for (int i = 0; i < nRowCount; i++) {
// rowNames[i] = "ID_" + i + "_" +
// full_rows_list.get(i).getID();
Feature peak = full_rows_list.get(i).getBestPeak();
double rt = peak.getRT();
int end = peak.getDataFile().getName().indexOf(" ");
String short_fname = peak.getDataFile().getName().substring(0, end);
rowNames[i] = "@" + rtFormat.format(rt) + "^[" + short_fname + "]";
}
}
String outputPrefix = null;
if (CLUSTERER_TYPE == ClustererType.CLASSIC) {
// Pure Hierar!
outputPrefix = "hierar_0";
throw new IllegalStateException("'" + ClustererType.CLASSIC.toString() + "' algorithm not yet implemented!");
} else if (CLUSTERER_TYPE == ClustererType.CACHED) {
// TODO: ...!
if (DEBUG_2)
logger.info(distancesGNF_Tri.toString());
if (saveRAMratherThanCPU_2) {
// Requires: distances values will be
// recomputed on demand during
// "getValidatedClusters_3()"
// No duplicate backup storage!
distancesGNF_Tri_Bkp = null;
} else {
// Otherwise, backing up the distance matrix (matrix being
// deeply changed during "clusterDM()", then no more
// exploitable)
distancesGNF_Tri_Bkp = new DistanceMatrixTriangular1D2D(distancesGNF_Tri);
if (DEBUG)
printMemoryUsage(logger, run_time, prevTotal, prevFree, "GNF CLUSTERER BACKUP MATRIX");
}
if (DEBUG)
logger.info("Clustering...");
if (distancesGNF_Tri != null)
arNodes = org.gnf.clustering.sequentialcache.SequentialCacheClustering.clusterDM(distancesGNF_Tri, linkageStartegyType, null, nRowCount);
distancesGNF_Tri = null;
System.gc();
if (DEBUG)
printMemoryUsage(logger, run_time, prevTotal, prevFree, "GNF CLUSTERER DONE");
if (DEBUG_2)
logger.info(distancesGNF_Tri.toString());
if (DEBUG_2)
for (int i = 0; i < arNodes.length; i++) {
logger.info("Node " + i + ": " + arNodes[i]);
}
// TODO: Use usual interfacing ...
// ClusteringResult<org.gnf.clustering.Node> clust_res = new
// ClusteringResult<>(
// Arrays.asList(arNodes), null, 0, null);
outputPrefix = "hierar_1";
} else if (CLUSTERER_TYPE == ClustererType.HYBRID) {
throw new IllegalStateException("'" + ClustererType.HYBRID.toString() + "' algorithm not yet implemented!");
}
// Sort Nodes by correlation score (Required in
// 'getValidatedClusters_3')
int[] rowOrder = new int[nRowCount];
if (DEBUG)
logger.info("Sorting tree nodes...");
org.gnf.clustering.Utils.NodeSort(arNodes, nRowCount - 2, 0, rowOrder);
if (do_cluster) {
gnfClusters = getValidatedClusters_3(arNodes, 0.0f, newIds.length, max_dist, distancesGNF_Tri_Bkp, distProvider);
// -- Print
if (DEBUG_2 && do_verbose)
for (int i = 0; i < gnfClusters.size(); i++) {
List<Integer> cl = gnfClusters.get(i);
String str = "";
for (int j = 0; j < cl.size(); j++) {
int r = cl.get(j);
str += cl.get(j) + "^(" + full_rows_list.get(r).getID() + ", " + full_rows_list.get(r).getAverageRT() + ")" + " ";
}
logger.info(str);
}
}
// File output
int ext_pos = dendrogramTxtFilename.getAbsolutePath().lastIndexOf(".");
outputPrefix = dendrogramTxtFilename.getAbsolutePath().substring(0, ext_pos);
String outGtr = outputPrefix + ".gtr";
String outCdt = outputPrefix + ".cdt";
if (DEBUG)
logger.info("Writing output to file...");
int nColCount = 1;
String[] colNames = new String[nColCount];
colNames[nColCount - 1] = "Id";
String sep = "\t";
if (do_print) {
try {
float[] arFloats = new float[nRowCount];
for (int i = 0; i < arFloats.length; i++) {
arFloats[i] = i / 2.0f;
}
DataSource source = (do_data) ? new FloatSource1D(arFloats, nRowCount, nColCount) : null;
/* org.gnf.clustering.Utils. */
HierarAlignerGCTask.GenerateCDT(outCdt, source, /* null */
nRowCount, nColCount, sep, rowNames, colNames, rowOrder);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
org.gnf.clustering.Utils.WriteTreeToFile(outGtr, nRowCount - 1, arNodes, true);
if (DEBUG)
printMemoryUsage(logger, run_time, prevTotal, prevFree, "GNF CLUSTERER FILES PRINTED");
}
// //// Arrange row clustered list with method 0,1,2
List<List<PeakListRow>> clustersList = new ArrayList<>();
// Build feature list row clusters
for (List<Integer> cl : gnfClusters) {
List<PeakListRow> rows_cluster = new ArrayList<>();
for (int i = 0; i < cl.size(); i++) {
rows_cluster.add(full_rows_list.get(cl.get(i)));
}
clustersList.add(rows_cluster);
//
processedRows += rows_cluster.size();
}
if (DEBUG)
printMemoryUsage(logger, run_time, prevTotal, prevFree, "GNF CLUSTERER CLUSTER_LIST");
// Fill alignment table: One row per cluster
for (List<PeakListRow> cluster : clustersList) {
if (isCanceled())
return;
PeakListRow targetRow = new SimplePeakListRow(newRowID);
newRowID++;
alignedPeakList.addRow(targetRow);
//
infoRowsBackup.put(targetRow, new Object[] { new HashMap<RawDataFile, Double[]>(), new HashMap<RawDataFile, PeakIdentity>(), new HashMap<RawDataFile, Double>() });
for (PeakListRow row : cluster) {
// Add all non-existing identities from the original row to the
// aligned row
// Set the preferred identity
targetRow.setPreferredPeakIdentity(row.getPreferredPeakIdentity());
// for (RawDataFile file : row.getRawDataFiles()) {
for (RawDataFile file : alignedPeakList.getRawDataFiles()) {
if (Arrays.asList(row.getRawDataFiles()).contains(file)) {
Feature originalPeak = row.getPeak(file);
if (originalPeak != null) {
targetRow.addPeak(file, originalPeak);
} else {
setStatus(TaskStatus.ERROR);
setErrorMessage("Cannot run alignment, no originalPeak");
return;
}
}
}
// present
for (PeakIdentity identity : row.getPeakIdentities()) {
PeakIdentity clonedIdentity = (PeakIdentity) identity.clone();
if (!PeakUtils.containsIdentity(targetRow, clonedIdentity))
targetRow.addPeakIdentity(clonedIdentity, false);
}
// processedRows++;
}
}
// of the "targetRow.update()" used down there
for (SimpleFeature peak : rtPeaksBackup.keySet()) {
peak.setRT((double) rtPeaksBackup.get(peak));
}
/**
* Post-processing... *
*/
// Build reference RDFs index: We need an ordered reference here, to be
// able to parse
// correctly while reading back stored info
RawDataFile[] rdf_sorted = alignedPeakList.getRawDataFiles().clone();
Arrays.sort(rdf_sorted, new RawDataFileSorter(SortingDirection.Ascending));
// Process
for (PeakListRow targetRow : infoRowsBackup.keySet()) {
if (isCanceled())
return;
// Refresh averaged RTs...
((SimplePeakListRow) targetRow).update();
}
//
if (DEBUG) {
endTime = System.currentTimeMillis();
ms = (endTime - startTime);
logger.info("## >> Whole JoinAlignerGCTask processing took " + Float.toString(ms) + " ms.");
}
// ----------------------------------------------------------------------
// Add new aligned feature list to the project
this.project.addPeakList(alignedPeakList);
if (DEBUG) {
for (RawDataFile rdf : alignedPeakList.getRawDataFiles()) logger.info("RDF: " + rdf);
}
// Add task description to peakList
alignedPeakList.addDescriptionOfAppliedTask(new SimplePeakListAppliedMethod(HierarAlignerGCTask.TASK_NAME, parameters));
logger.info("Finished join aligner GC");
setStatus(TaskStatus.FINISHED);
}
use of net.sf.mzmine.datamodel.PeakListRow in project mzmine2 by mzmine.
the class JoinAlignerTask method run.
/**
* @see Runnable#run()
*/
@Override
public void run() {
if ((mzWeight == 0) && (rtWeight == 0)) {
setStatus(TaskStatus.ERROR);
setErrorMessage("Cannot run alignment, all the weight parameters are zero");
return;
}
setStatus(TaskStatus.PROCESSING);
logger.info("Running join aligner");
// twice, first for score calculation, second for actual alignment.
for (int i = 0; i < peakLists.length; i++) {
totalRows += peakLists[i].getNumberOfRows() * 2;
}
// Collect all data files
Vector<RawDataFile> allDataFiles = new Vector<RawDataFile>();
for (PeakList peakList : peakLists) {
for (RawDataFile dataFile : peakList.getRawDataFiles()) {
// Each data file can only have one column in aligned feature list
if (allDataFiles.contains(dataFile)) {
setStatus(TaskStatus.ERROR);
setErrorMessage("Cannot run alignment, because file " + dataFile + " is present in multiple feature lists");
return;
}
allDataFiles.add(dataFile);
}
}
// Create a new aligned feature list
alignedPeakList = new SimplePeakList(peakListName, allDataFiles.toArray(new RawDataFile[0]));
// Iterate source feature lists
for (PeakList peakList : peakLists) {
// Create a sorted set of scores matching
TreeSet<RowVsRowScore> scoreSet = new TreeSet<RowVsRowScore>();
PeakListRow[] allRows = peakList.getRows();
// Calculate scores for all possible alignments of this row
for (PeakListRow row : allRows) {
if (isCanceled())
return;
// Calculate limits for a row with which the row can be aligned
Range<Double> mzRange = mzTolerance.getToleranceRange(row.getAverageMZ());
Range<Double> rtRange = rtTolerance.getToleranceRange(row.getAverageRT());
// Get all rows of the aligned peaklist within parameter limits
PeakListRow[] candidateRows = alignedPeakList.getRowsInsideScanAndMZRange(rtRange, mzRange);
// Calculate scores and store them
for (PeakListRow candidate : candidateRows) {
if (sameChargeRequired) {
if (!PeakUtils.compareChargeState(row, candidate))
continue;
}
if (sameIDRequired) {
if (!PeakUtils.compareIdentities(row, candidate))
continue;
}
if (compareIsotopePattern) {
IsotopePattern ip1 = row.getBestIsotopePattern();
IsotopePattern ip2 = candidate.getBestIsotopePattern();
if ((ip1 != null) && (ip2 != null)) {
ParameterSet isotopeParams = parameters.getParameter(JoinAlignerParameters.compareIsotopePattern).getEmbeddedParameters();
if (!IsotopePatternScoreCalculator.checkMatch(ip1, ip2, isotopeParams)) {
continue;
}
}
}
// compare the similarity of spectra mass lists on MS1 or MS2 level
if (compareSpectraSimilarity) {
DataPoint[] rowDPs = null;
DataPoint[] candidateDPs = null;
SpectralSimilarity sim = null;
// get data points of mass list of the representative scans
if (msLevel == 1) {
rowDPs = row.getBestPeak().getRepresentativeScan().getMassList(massList).getDataPoints();
candidateDPs = candidate.getBestPeak().getRepresentativeScan().getMassList(massList).getDataPoints();
}
// get data points of mass list of the best fragmentation scans
if (msLevel == 2) {
if (row.getBestFragmentation() != null && candidate.getBestFragmentation() != null) {
rowDPs = row.getBestFragmentation().getMassList(massList).getDataPoints();
candidateDPs = candidate.getBestFragmentation().getMassList(massList).getDataPoints();
} else
continue;
}
// compare mass list data points of selected scans
if (rowDPs != null && candidateDPs != null) {
// calculate similarity using SimilarityFunction
sim = createSimilarity(rowDPs, candidateDPs);
// user set threshold
if (sim == null) {
continue;
}
}
}
RowVsRowScore score = new RowVsRowScore(row, candidate, RangeUtils.rangeLength(mzRange) / 2.0, mzWeight, RangeUtils.rangeLength(rtRange) / 2.0, rtWeight);
scoreSet.add(score);
}
processedRows++;
}
// Create a table of mappings for best scores
Hashtable<PeakListRow, PeakListRow> alignmentMapping = new Hashtable<PeakListRow, PeakListRow>();
// Iterate scores by descending order
Iterator<RowVsRowScore> scoreIterator = scoreSet.iterator();
while (scoreIterator.hasNext()) {
RowVsRowScore score = scoreIterator.next();
// Check if the row is already mapped
if (alignmentMapping.containsKey(score.getPeakListRow()))
continue;
// Check if the aligned row is already filled
if (alignmentMapping.containsValue(score.getAlignedRow()))
continue;
alignmentMapping.put(score.getPeakListRow(), score.getAlignedRow());
}
// Align all rows using mapping
for (PeakListRow row : allRows) {
PeakListRow targetRow = alignmentMapping.get(row);
// If we have no mapping for this row, add a new one
if (targetRow == null) {
targetRow = new SimplePeakListRow(newRowID);
newRowID++;
alignedPeakList.addRow(targetRow);
}
// Add all peaks from the original row to the aligned row
for (RawDataFile file : row.getRawDataFiles()) {
targetRow.addPeak(file, row.getPeak(file));
}
// Add all non-existing identities from the original row to the
// aligned row
PeakUtils.copyPeakListRowProperties(row, targetRow);
processedRows++;
}
}
// Next feature list
// Add new aligned feature list to the project
project.addPeakList(alignedPeakList);
// Add task description to peakList
alignedPeakList.addDescriptionOfAppliedTask(new SimplePeakListAppliedMethod("Join aligner", parameters));
logger.info("Finished join aligner");
setStatus(TaskStatus.FINISHED);
}
use of net.sf.mzmine.datamodel.PeakListRow in project mzmine2 by mzmine.
the class ScoreAligner method generatePath.
private AlignmentPath generatePath(int col, ScoreCalculator c, PeakListRow base, List<List<PeakListRow>> listOfPeaksInFiles) {
int len = listOfPeaksInFiles.size();
AlignmentPath path = new AlignmentPath(len, base, col);
for (int i = (col + 1) % len; i != col; i = (i + 1) % len) {
PeakListRow bestPeak = null;
double bestPeakScore = c.getWorstScore();
for (PeakListRow curPeak : listOfPeaksInFiles.get(i)) {
if (curPeak == null || !c.matches(path, curPeak, params)) {
// it doesn't have a name).
continue;
}
double score = c.calculateScore(path, curPeak, params);
if (score < bestPeakScore) {
bestPeak = curPeak;
bestPeakScore = score;
}
}
double gapPenalty = 1.25;
if (bestPeak != null && bestPeakScore < gapPenalty) {
path.add(i, bestPeak, bestPeakScore);
} else {
path.addGap(i, gapPenalty);
}
}
return path;
}
use of net.sf.mzmine.datamodel.PeakListRow in project mzmine2 by mzmine.
the class ScoreAligner method align.
/*
* (non-Javadoc)
*
* @see gcgcaligner.AbstractAligner#align()
*/
public PeakList align() {
if (// Do the actual alignment if we already do not
alignment == null) // have the result
{
Vector<RawDataFile> allDataFiles = new Vector<RawDataFile>();
for (PeakList list : this.originalPeakList) {
allDataFiles.addAll(Arrays.asList(list.getRawDataFiles()));
}
peaksTotal = 0;
for (int i = 0; i < peakList.size(); i++) {
peaksTotal += peakList.get(i).size();
}
alignment = new SimplePeakList(params.getParameter(PathAlignerParameters.peakListName).getValue(), allDataFiles.toArray(new RawDataFile[0]));
List<AlignmentPath> addedPaths = getAlignmentPaths();
int ID = 1;
for (AlignmentPath p : addedPaths) {
// Convert alignments to original order of files and add them to
// final
// Alignment data structure
PeakListRow row = (PeakListRow) p.convertToAlignmentRow(ID++);
alignment.addRow(row);
}
}
PeakList curAlignment = alignment;
return curAlignment;
}
Aggregations