use of net.sf.mzmine.datamodel.impl.SimplePeakList in project mzmine2 by mzmine.
the class GridMassTask method run.
/**
* @see Runnable#run()
*/
public void run() {
Format mzFormat = MZmineCore.getConfiguration().getMZFormat();
Format timeFormat = MZmineCore.getConfiguration().getRTFormat();
setStatus(TaskStatus.PROCESSING);
logger.info("Started GRIDMASS v1.0 [Apr-09-2014] on " + dataFile);
scans = scanSelection.getMatchingScans(dataFile);
scanNumbers = scanSelection.getMatchingScanNumbers(dataFile);
totalScans = scans.length;
// Check if we have any scans
if (totalScans == 0) {
setStatus(TaskStatus.ERROR);
setErrorMessage("No scans match the selected criteria");
return;
}
// Check if the scans are properly ordered by RT
double prevRT = Double.NEGATIVE_INFINITY;
for (Scan s : scans) {
if (s.getRetentionTime() < prevRT) {
setStatus(TaskStatus.ERROR);
final String msg = "Retention time of scan #" + s.getScanNumber() + " is smaller then the retention time of the previous scan." + " Please make sure you only use scans with increasing retention times." + " You can restrict the scan numbers in the parameters, or you can use the Crop filter module";
setErrorMessage(msg);
return;
}
prevRT = s.getRetentionTime();
}
// Create new feature list
newPeakList = new SimplePeakList(dataFile + " " + suffix, dataFile);
int j;
// minimumTimeSpan
Scan scan = scans[0];
double minRT = scan.getRetentionTime();
double maxRT = scan.getRetentionTime();
retentiontime = new double[totalScans];
int i;
for (i = 0; i < totalScans; i++) {
scan = scans[i];
double irt = scan.getRetentionTime();
if (irt < minRT)
minRT = irt;
if (irt > maxRT)
maxRT = irt;
retentiontime[i] = irt;
}
rtPerScan = (maxRT - minRT) / i;
// "tolerable" units in scans
tolScans = Math.max(2, (int) ((minimumTimeSpan / rtPerScan)));
maxTolScans = Math.max(2, (int) ((maximumTimeSpan / rtPerScan)));
// Algorithm to find masses:
// (1) copy masses:intensity > threshold
// (2) sort intensities descend
// (3) Find "spot" for each intensity
// (3.1) if they have not spot ID assigned
// (3.1.1) Extend mass in mass and time while > 70% pixels > threshold
// (3.1.2) If extension > mintime ==> mark all pixels with the spot ID
// (3.1.3) if extension < mintime ==> mark all pixels with spot ID = -1
// (4) Group spots within a time-tolerance and mass-tolerance
logger.info("Getting data points on " + dataFile);
roi = new Datum[totalScans][];
ArrayList<Datum> roiAL = new ArrayList<Datum>();
long passed = 0, nopassed = 0;
minMasa = Double.MAX_VALUE;
maxMasa = 0;
int maxJ = 0;
boolean[] scanOk = new boolean[totalScans];
Arrays.fill(scanOk, true);
logger.info("Smoothing data points on " + dataFile + " (Time min=" + smoothTimeSpan + "; Time m/z=" + smoothTimeMZ + ")");
IndexedDataPoint[][] data = smoothDataPoints(dataFile, smoothTimeSpan, smoothTimeMZ, 0, smoothMZ, 0, minimumHeight);
logger.info("Determining intensities (mass sum) per scan on " + dataFile);
for (i = 0; i < totalScans; i++) {
if (isCanceled())
return;
scan = scans[i];
// scan.getDataPoints();
IndexedDataPoint[] mzv = data[i];
double prev = (mzv.length > 0 ? mzv[0].datapoint.getMZ() : 0);
double massSum = 0;
for (j = 0; j < mzv.length; j++) {
if (mzv[j].datapoint.getIntensity() >= minimumHeight)
massSum += mzv[j].datapoint.getMZ() - prev;
prev = mzv[j].datapoint.getMZ();
if (mzv[j].datapoint.getMZ() < minMasa)
minMasa = mzv[j].datapoint.getMZ();
if (mzv[j].datapoint.getMZ() > maxMasa)
maxMasa = mzv[j].datapoint.getMZ();
}
double dm = 100.0 / (maxMasa - minMasa);
if (i % 30 == 0 && debug > 0) {
System.out.println("");
System.out.print("t=" + Math.round(retentiontime[i] * 100) / 100.0 + ": (in %) ");
}
if (scanOk[i]) {
if (!scanOk[i]) {
// Disable neighbouring scans, how many ?
for (j = i; j > 0 && retentiontime[j] + additionTimeMaxPeaksPerScan > retentiontime[i]; j--) {
scanOk[j] = false;
}
for (j = i; j < totalScans && retentiontime[j] - additionTimeMaxPeaksPerScan < retentiontime[i]; j++) {
scanOk[j] = false;
}
}
if (debug > 0)
System.out.print(((int) (massSum * dm)) + (scanOk[i] ? " " : "*** "));
} else {
if (debug > 0)
System.out.print(((int) (massSum * dm)) + (scanOk[i] ? " " : "* "));
}
setProcedure(i, totalScans, 1);
}
if (debug > 0)
System.out.println("");
String[] it = ignoreTimes.trim().split(", ?");
for (j = 0; j < it.length; j++) {
String[] itj = it[j].split("-");
if (itj.length == 2) {
Double a = Double.parseDouble(itj[0].trim());
Double b = Double.parseDouble(itj[1].trim());
for (i = Math.abs(Arrays.binarySearch(retentiontime, a)); i < totalScans && retentiontime[i] <= b; i++) {
if (retentiontime[i] >= a) {
scanOk[i] = false;
}
}
}
}
passed = 0;
nopassed = 0;
for (i = 0; i < totalScans; i++) {
if (i % 100 == 0 && isCanceled())
return;
if (scanOk[i]) {
scan = scans[i];
IndexedDataPoint[] mzv = data[i];
DataPoint[] mzvOriginal = scan.getDataPoints();
ArrayList<Datum> dal = new ArrayList<Datum>();
for (j = 0; j < mzv.length; j++) {
if (mzv[j].datapoint.getIntensity() >= minimumHeight) {
dal.add(new Datum(mzv[j].datapoint, i, mzvOriginal[mzv[j].index]));
passed++;
} else {
nopassed++;
}
}
if (j > maxJ)
maxJ = j;
roi[i] = dal.toArray(new Datum[0]);
roiAL.addAll(dal);
}
setProcedure(i, totalScans, 2);
}
logger.info(passed + " intensities >= " + minimumHeight + " of " + (passed + nopassed) + " (" + Math.round(passed * 10000.0 / (double) (passed + nopassed)) / 100.0 + "%) on " + dataFile);
// New "probing" algorithm
// (1) Generate probes all over chromatograms
// (2) Move each probe to their closest maximum until it cannot find a
// new maximum
// (3) assign spot id to each "center" using all points within region
// (1) Generate probes all over
double byMZ = Math.max(mzTol * 2, 1e-6);
int byScan = Math.max(1, tolScans / 4);
logger.info("Creating Grid of probes on " + dataFile + " every " + mzFormat.format(byMZ) + " m/z and " + byScan + " scans");
double m;
int ndata = (int) Math.round((((double) totalScans / (double) byScan) + 1) * ((maxMasa - minMasa + byMZ) / byMZ));
Probe[] probes = new Probe[ndata];
int idata = 0;
for (i = 0; i < totalScans; i += byScan) {
if (i % 100 == 0 && isCanceled())
return;
for (m = minMasa - (i % 2) * byMZ / 2; m <= maxMasa; m += byMZ) {
probes[idata++] = new Probe(m, i);
}
setProcedure(i, totalScans, 3);
}
// (2) Move each probe to their closest center
double mzR = byMZ / 2;
int scanR = Math.max(byScan - 1, 2);
logger.info("Finding local maxima for each probe on " + dataFile + " radius: scans=" + scanR + ", m/z=" + mzR);
int okProbes = 0;
for (i = 0; i < idata; i++) {
if (i % 100 == 0 && isCanceled())
return;
moveProbeToCenter(probes[i], scanR, mzR);
if (probes[i].intensityCenter < minimumHeight) {
probes[i] = null;
} else {
okProbes++;
}
setProcedure(i, idata, 4);
}
if (okProbes > 0) {
Probe[] pArr = new Probe[okProbes];
for (okProbes = i = 0; i < idata; i++) {
if (probes[i] != null) {
pArr[okProbes++] = probes[i];
}
}
probes = pArr;
pArr = null;
}
// (3) Assign spot id to each "center"
logger.info("Sorting probes " + dataFile);
Arrays.sort(probes);
logger.info("Assigning spot id to local maxima on " + dataFile);
SpotByProbes sbp = new SpotByProbes();
ArrayList<SpotByProbes> spots = new ArrayList<SpotByProbes>();
double mzA = -1;
int scanA = -1;
for (i = 0; i < probes.length; i++) {
if (probes[i] != null && probes[i].intensityCenter >= minimumHeight) {
if (probes[i].mzCenter != mzA || probes[i].scanCenter != scanA) {
if (i % 10 == 0 && isCanceled())
return;
if (sbp.size() > 0) {
spots.add(sbp);
sbp.assignSpotId();
// System.out.println(sbp.toString());
}
sbp = new SpotByProbes();
mzA = probes[i].mzCenter;
scanA = probes[i].scanCenter;
}
sbp.addProbe(probes[i]);
}
setProcedure(i, probes.length, 5);
}
if (sbp.size() > 0) {
spots.add(sbp);
sbp.assignSpotId();
// System.out.println(sbp.toString());
}
logger.info("Spots:" + spots.size());
// Assign specific datums to spots to avoid using datums to several
// spots
logger.info("Assigning intensities to local maxima on " + dataFile);
i = 0;
for (SpotByProbes sx : spots) {
if (sx.size() > 0) {
if (i % 100 == 0 && isCanceled())
return;
assignSpotIdToDatumsFromScans(sx, scanR, mzR);
}
setProcedure(i++, spots.size(), 6);
}
// (4) Join Tolerable Centers
logger.info("Joining tolerable maxima on " + dataFile);
int criticScans = Math.max(1, tolScans / 2);
int joins = 0;
for (i = 0; i < spots.size() - 1; i++) {
SpotByProbes s1 = spots.get(i);
if (s1.center != null && s1.size() > 0) {
if (i % 100 == 0 && isCanceled())
return;
for (j = i; j > 0 && j < spots.size() && spots.get(j - 1).center != null && spots.get(j - 1).center.mzCenter + mzTol > s1.center.mzCenter; j--) ;
for (; j < spots.size(); j++) {
SpotByProbes s2 = spots.get(j);
if (i != j && s2.center != null) {
if (s2.center.mzCenter - s1.center.mzCenter > mzTol)
break;
int l = Math.min(Math.abs(s1.minScan - s2.minScan), Math.abs(s1.minScan - s2.maxScan));
int r = Math.min(Math.abs(s1.maxScan - s2.minScan), Math.abs(s1.maxScan - s2.maxScan));
int d = Math.min(l, r);
boolean overlap = !(s2.maxScan < s1.minScan || s2.minScan > s1.maxScan);
if ((d <= criticScans || overlap) && (intensityRatio(s1.center.intensityCenter, s2.center.intensityCenter) > intensitySimilarity)) {
if (debug > 2)
System.out.println("Joining s1 id " + s1.spotId + "=" + mzFormat.format(s1.center.mzCenter) + " mz [" + mzFormat.format(s1.minMZ) + " ~ " + mzFormat.format(s1.maxMZ) + "] time=" + timeFormat.format(retentiontime[s1.center.scanCenter]) + " int=" + s1.center.intensityCenter + " with s2 id " + s2.spotId + "=" + mzFormat.format(s2.center.mzCenter) + " mz [" + mzFormat.format(s2.minMZ) + " ~ " + mzFormat.format(s2.maxMZ) + "] time=" + timeFormat.format(retentiontime[s2.center.scanCenter]) + " int=" + s2.center.intensityCenter);
assignSpotIdToDatumsFromSpotId(s1, s2, scanR, mzR);
s1.addProbesFromSpot(s2, true);
// restart
j = i;
joins++;
}
// }
}
}
}
setProcedure(i, spots.size(), 7);
}
logger.info("Joins:" + joins);
// (5) Remove "Large" spanned masses
logger.info("Removing long and comparable 'masses' on " + dataFile);
for (i = 0; i < spots.size() - 1; i++) {
SpotByProbes s1 = spots.get(i);
if (s1.center != null && s1.size() > 0) {
if (i % 100 == 0 && isCanceled())
return;
int totalScans = s1.maxScan - s1.minScan + 1;
int lScan = s1.minScan;
int rScan = s1.maxScan;
ArrayList<Integer> toRemove = new ArrayList<Integer>();
toRemove.add(i);
for (j = i; j > 0 && j < spots.size() && spots.get(j - 1).center != null && spots.get(j - 1).center.mzCenter + mzTol > s1.center.mzCenter; j--) ;
for (; j < spots.size(); j++) {
SpotByProbes s2 = spots.get(j);
if (i != j && s2.center != null) {
if (s2.center.mzCenter - s1.center.mzCenter > mzTol)
break;
if (intensityRatio(s1.center.intensityCenter, s2.center.intensityCenter) > intensitySimilarity) {
int dl = Math.min(Math.abs(lScan - s2.minScan), Math.abs(lScan - s2.maxScan));
int dr = Math.min(Math.abs(rScan - s2.minScan), Math.abs(rScan - s2.maxScan));
int md = Math.min(dl, dr);
if (md <= maxTolScans || !(s2.maxScan < lScan || s2.minScan > rScan)) {
// distancia tolerable o intersectan
totalScans += s2.maxScan - s2.minScan + 1;
toRemove.add(j);
lScan = Math.min(lScan, s2.minScan);
rScan = Math.max(rScan, s2.maxScan);
}
}
}
}
if (totalScans * rtPerScan > maximumTimeSpan) {
if (debug > 2)
System.out.println("Removing " + toRemove.size() + " masses around " + mzFormat.format(s1.center.mzCenter) + " m/z (" + s1.spotId + "), time " + timeFormat.format(retentiontime[s1.center.scanCenter]) + ", intensity " + s1.center.intensityCenter + ", Total Scans=" + totalScans + " (" + Math.round(totalScans * rtPerScan * 1000.0) / 1000.0 + " min).");
for (Integer J : toRemove) {
// System.out.println("Removing: "+spots.get(J).spotId);
spots.get(J).clear();
}
}
}
setProcedure(i, spots.size(), 8);
}
// Build peaks from assigned datums
logger.info("Building peak rows on " + dataFile + " (tolereance scans=" + tolScans + ")");
i = 0;
for (SpotByProbes sx : spots) {
if (sx.size() > 0 && sx.maxScan - sx.minScan + 1 >= tolScans) {
if (i % 100 == 0 && isCanceled())
return;
sx.buildMaxDatumFromScans(roi, minimumHeight);
if (sx.getMaxDatumScans() >= tolScans && (sx.getContigousMaxDatumScans() >= tolScans || sx.getContigousToMaxDatumScansRatio() > 0.5)) {
Chromatogram peak = new Chromatogram(dataFile, scanNumbers);
if (addMaxDatumFromScans(sx, peak) > 0) {
peak.finishChromatogram();
if (peak.getArea() > 1e-6) {
newPeakID++;
SimplePeakListRow newRow = new SimplePeakListRow(newPeakID);
newRow.addPeak(dataFile, peak);
newRow.setComment(sx.toString(retentiontime));
newPeakList.addRow(newRow);
if (debug > 0)
System.out.println("Peak added id=" + sx.spotId + " " + mzFormat.format(sx.center.mzCenter) + " mz, time=" + timeFormat.format(retentiontime[sx.center.scanCenter]) + ", intensity=" + sx.center.intensityCenter + ", probes=" + sx.size() + ", data scans=" + sx.getMaxDatumScans() + ", cont scans=" + sx.getContigousMaxDatumScans() + ", cont ratio=" + sx.getContigousToMaxDatumScansRatio() + " area = " + peak.getArea());
if (debug > 1) {
// Peak info:
System.out.println(sx.toString());
sx.printDebugInfo();
}
} else {
if (debug > 0)
System.out.println("Ignored by area ~ 0 id=" + sx.spotId + " " + mzFormat.format(sx.center.mzCenter) + " mz, time=" + timeFormat.format(retentiontime[sx.center.scanCenter]) + ", intensity=" + sx.center.intensityCenter + ", probes=" + sx.size() + ", data scans=" + sx.getMaxDatumScans() + ", cont scans=" + sx.getContigousMaxDatumScans() + ", cont ratio=" + sx.getContigousToMaxDatumScansRatio() + " area = " + peak.getArea());
}
}
} else {
if (debug > 0)
System.out.println("Ignored by continous criteria: id=" + sx.spotId + " " + mzFormat.format(sx.center.mzCenter) + " mz, time=" + timeFormat.format(retentiontime[sx.center.scanCenter]) + ", intensity=" + sx.center.intensityCenter + ", probes=" + sx.size() + ", data scans=" + sx.getMaxDatumScans() + ", cont scans=" + sx.getContigousMaxDatumScans() + ", cont ratio=" + sx.getContigousToMaxDatumScansRatio());
}
} else {
if (sx.size() > 0) {
if (debug > 0)
System.out.println("Ignored by time range criteria: id=" + sx.spotId + " " + mzFormat.format(sx.center.mzCenter) + " mz, time=" + timeFormat.format(retentiontime[sx.center.scanCenter]) + ", intensity=" + sx.center.intensityCenter + ", probes=" + sx.size() + ", data scans=" + sx.getMaxDatumScans() + ", cont scans=" + sx.getContigousMaxDatumScans() + ", cont ratio=" + sx.getContigousToMaxDatumScansRatio());
}
}
setProcedure(i++, spots.size(), 9);
}
logger.info("Peaks on " + dataFile + " = " + newPeakList.getNumberOfRows());
// Add new peaklist to the project
project.addPeakList(newPeakList);
// Add quality parameters to peaks
QualityParameters.calculateQualityParameters(newPeakList);
setStatus(TaskStatus.FINISHED);
logger.info("Finished chromatogram builder (RT) on " + dataFile);
}
use of net.sf.mzmine.datamodel.impl.SimplePeakList in project mzmine2 by mzmine.
the class TargetedPeakDetectionModuleTask method run.
public void run() {
setStatus(TaskStatus.PROCESSING);
// Calculate total number of scans in all files
totalScans = dataFile.getNumOfScans(1);
// Create new feature list
processedPeakList = new SimplePeakList(dataFile.getName() + " " + suffix, dataFile);
List<PeakInformation> peaks = this.readFile();
if (peaks == null || peaks.isEmpty()) {
setStatus(TaskStatus.ERROR);
setErrorMessage("Could not read file or the file is empty ");
return;
}
// Fill new feature list with empty rows
for (int row = 0; row < peaks.size(); row++) {
PeakListRow newRow = new SimplePeakListRow(ID++);
processedPeakList.addRow(newRow);
}
// Canceled?
if (isCanceled()) {
return;
}
List<Gap> gaps = new ArrayList<Gap>();
// gaps if necessary
for (int row = 0; row < peaks.size(); row++) {
PeakListRow newRow = processedPeakList.getRow(row);
// Create a new gap
Range<Double> mzRange = mzTolerance.getToleranceRange(peaks.get(row).getMZ());
Range<Double> rtRange = rtTolerance.getToleranceRange(peaks.get(row).getRT());
newRow.addPeakIdentity(new SimplePeakIdentity(peaks.get(row).getName()), true);
Gap newGap = new Gap(newRow, dataFile, mzRange, rtRange, intTolerance, noiseLevel);
gaps.add(newGap);
}
// Stop processing this file if there are no gaps
if (gaps.isEmpty()) {
processedScans += dataFile.getNumOfScans();
}
// Get all scans of this data file
int[] scanNumbers = dataFile.getScanNumbers(msLevel);
if (scanNumbers == null) {
logger.log(Level.WARNING, "Could not read file with the MS level of " + msLevel);
setStatus(TaskStatus.ERROR);
return;
}
// Process each scan
for (int scanNumber : scanNumbers) {
// Canceled?
if (isCanceled()) {
return;
}
// Get the scan
Scan scan = dataFile.getScan(scanNumber);
// Feed this scan to all gaps
for (Gap gap : gaps) {
gap.offerNextScan(scan);
}
processedScans++;
}
// Finalize gaps
for (Gap gap : gaps) {
gap.noMoreOffers();
}
// Append processed feature list to the project
project.addPeakList(processedPeakList);
// Add quality parameters to peaks
QualityParameters.calculateQualityParameters(processedPeakList);
// Add task description to peakList
processedPeakList.addDescriptionOfAppliedTask(new SimplePeakListAppliedMethod("Targeted feature detection ", parameters));
logger.log(Level.INFO, "Finished targeted feature detection on {0}", this.dataFile);
setStatus(TaskStatus.FINISHED);
}
use of net.sf.mzmine.datamodel.impl.SimplePeakList in project mzmine2 by mzmine.
the class HierarAlignerGCTask method run.
/**
* @see Runnable#run()
*/
public void run() {
// Check options validity
if ((Math.abs(mzWeight) < EPSILON) && (Math.abs(rtWeight) < EPSILON)) {
setStatus(TaskStatus.ERROR);
setErrorMessage("Cannot run alignment, all the weight parameters are zero!");
return;
}
setStatus(TaskStatus.PROCESSING);
logger.info("Running join aligner");
// TIME STUFF
long startTime, endTime;
float ms;
//
if (DEBUG)
startTime = System.currentTimeMillis();
// MEMORY STUFF
Runtime run_time = Runtime.getRuntime();
Long prevTotal = 0l;
Long prevFree = run_time.freeMemory();
if (DEBUG)
printMemoryUsage(logger, run_time, prevTotal, prevFree, "START TASK...");
// - third for actual alignment
for (int i = 0; i < peakLists.length; i++) {
totalRows += peakLists[i].getNumberOfRows() * 3;
}
// Collect all data files
Vector<RawDataFile> allDataFiles = new Vector<RawDataFile>();
for (PeakList peakList : peakLists) {
for (RawDataFile dataFile : peakList.getRawDataFiles()) {
// Each data file can only have one column in aligned feature list
if (allDataFiles.contains(dataFile)) {
setStatus(TaskStatus.ERROR);
setErrorMessage("Cannot run alignment, because file " + dataFile + " is present in multiple feature lists");
return;
}
allDataFiles.add(dataFile);
}
}
// Create a new aligned feature list
alignedPeakList = new SimplePeakList(peakListName, allDataFiles.toArray(new RawDataFile[0]));
if (DEBUG)
printMemoryUsage(logger, run_time, prevTotal, prevFree, "COMPOUND DETECTED");
/**
* Alignment mapping *
*/
// Iterate source feature lists
Hashtable<SimpleFeature, Double> rtPeaksBackup = new Hashtable<SimpleFeature, Double>();
Hashtable<PeakListRow, Object[]> infoRowsBackup = new Hashtable<PeakListRow, Object[]>();
// Since clustering is now order independent, option removed!
// Build comparison order
ArrayList<Integer> orderIds = new ArrayList<Integer>();
for (int i = 0; i < peakLists.length; ++i) {
orderIds.add(i);
}
Integer[] newIds = orderIds.toArray(new Integer[orderIds.size()]);
//
// TriangularMatrix distances = null;
DistanceMatrix distancesGNF_Tri = null;
DistanceMatrix distancesGNF_Tri_Bkp = null;
int nbPeaks = 0;
for (int i = 0; i < newIds.length; ++i) {
PeakList peakList = peakLists[newIds[i]];
nbPeaks += peakList.getNumberOfRows();
}
// If 'Hybrid' or no distance matrix: no need for a matrix
if (CLUSTERER_TYPE == ClustererType.HYBRID || !saveRAMratherThanCPU_1) {
// distances = new double[nbPeaks][nbPeaks];
int nRowCount = nbPeaks;
distancesGNF_Tri = new DistanceMatrixTriangular1D2D(nRowCount);
}
full_rows_list = new ArrayList<>();
for (int i = 0; i < newIds.length; ++i) {
PeakList peakList = peakLists[newIds[i]];
PeakListRow[] allRows = peakList.getRows();
for (int j = 0; j < allRows.length; ++j) {
PeakListRow row = allRows[j];
full_rows_list.add(row);
}
}
RowVsRowDistanceProvider distProvider = new RowVsRowDistanceProvider(project, // rtAdjustementMapping,
full_rows_list, mzWeight, rtWeight, // rtToleranceAfter,
maximumScore);
// If 'Hybrid' or no distance matrix: no need for a matrix
if (CLUSTERER_TYPE == ClustererType.HYBRID || !saveRAMratherThanCPU_1) {
for (int x = 0; x < nbPeaks; ++x) {
for (int y = x; y < nbPeaks; ++y) {
float dist = (float) distProvider.getRankedDistance(x, y, mzTolerance.getMzTolerance(), rtTolerance.getTolerance(), minScore);
// if (CLUSTERER_TYPE == ClustererType.CLASSIC_OLD)
// distances.set(x, y , dist);
// else
distancesGNF_Tri.setValue(x, y, dist);
}
processedRows++;
if (DEBUG)
logger.info("Treating lists: " + (Math.round(100 * processedRows / (double) nbPeaks)) + " %");
}
}
if (DEBUG)
printMemoryUsage(logger, run_time, prevTotal, prevFree, "DISTANCES COMPUTED");
// ////
// Math.abs(row.getBestPeak().getRT() -
double max_dist = maximumScore;
// k_row.getBestPeak().getRT()) /
// ((RangeUtils.rangeLength(rtRange) /
// 2.0));
// String newickCluster;
List<List<Integer>> gnfClusters = null;
// ////
boolean do_verbose = true;
boolean do_cluster = true;
boolean do_print = (exportDendrogramAsTxt);
boolean do_data = false;
org.gnf.clustering.Node[] arNodes = null;
int nRowCount = full_rows_list.size();
String[] rowNames = null;
if (do_print) {
rowNames = new String[nRowCount];
for (int i = 0; i < nRowCount; i++) {
// rowNames[i] = "ID_" + i + "_" +
// full_rows_list.get(i).getID();
Feature peak = full_rows_list.get(i).getBestPeak();
double rt = peak.getRT();
int end = peak.getDataFile().getName().indexOf(" ");
String short_fname = peak.getDataFile().getName().substring(0, end);
rowNames[i] = "@" + rtFormat.format(rt) + "^[" + short_fname + "]";
}
}
String outputPrefix = null;
if (CLUSTERER_TYPE == ClustererType.CLASSIC) {
// Pure Hierar!
outputPrefix = "hierar_0";
throw new IllegalStateException("'" + ClustererType.CLASSIC.toString() + "' algorithm not yet implemented!");
} else if (CLUSTERER_TYPE == ClustererType.CACHED) {
// TODO: ...!
if (DEBUG_2)
logger.info(distancesGNF_Tri.toString());
if (saveRAMratherThanCPU_2) {
// Requires: distances values will be
// recomputed on demand during
// "getValidatedClusters_3()"
// No duplicate backup storage!
distancesGNF_Tri_Bkp = null;
} else {
// Otherwise, backing up the distance matrix (matrix being
// deeply changed during "clusterDM()", then no more
// exploitable)
distancesGNF_Tri_Bkp = new DistanceMatrixTriangular1D2D(distancesGNF_Tri);
if (DEBUG)
printMemoryUsage(logger, run_time, prevTotal, prevFree, "GNF CLUSTERER BACKUP MATRIX");
}
if (DEBUG)
logger.info("Clustering...");
if (distancesGNF_Tri != null)
arNodes = org.gnf.clustering.sequentialcache.SequentialCacheClustering.clusterDM(distancesGNF_Tri, linkageStartegyType, null, nRowCount);
distancesGNF_Tri = null;
System.gc();
if (DEBUG)
printMemoryUsage(logger, run_time, prevTotal, prevFree, "GNF CLUSTERER DONE");
if (DEBUG_2)
logger.info(distancesGNF_Tri.toString());
if (DEBUG_2)
for (int i = 0; i < arNodes.length; i++) {
logger.info("Node " + i + ": " + arNodes[i]);
}
// TODO: Use usual interfacing ...
// ClusteringResult<org.gnf.clustering.Node> clust_res = new
// ClusteringResult<>(
// Arrays.asList(arNodes), null, 0, null);
outputPrefix = "hierar_1";
} else if (CLUSTERER_TYPE == ClustererType.HYBRID) {
throw new IllegalStateException("'" + ClustererType.HYBRID.toString() + "' algorithm not yet implemented!");
}
// Sort Nodes by correlation score (Required in
// 'getValidatedClusters_3')
int[] rowOrder = new int[nRowCount];
if (DEBUG)
logger.info("Sorting tree nodes...");
org.gnf.clustering.Utils.NodeSort(arNodes, nRowCount - 2, 0, rowOrder);
if (do_cluster) {
gnfClusters = getValidatedClusters_3(arNodes, 0.0f, newIds.length, max_dist, distancesGNF_Tri_Bkp, distProvider);
// -- Print
if (DEBUG_2 && do_verbose)
for (int i = 0; i < gnfClusters.size(); i++) {
List<Integer> cl = gnfClusters.get(i);
String str = "";
for (int j = 0; j < cl.size(); j++) {
int r = cl.get(j);
str += cl.get(j) + "^(" + full_rows_list.get(r).getID() + ", " + full_rows_list.get(r).getAverageRT() + ")" + " ";
}
logger.info(str);
}
}
// File output
int ext_pos = dendrogramTxtFilename.getAbsolutePath().lastIndexOf(".");
outputPrefix = dendrogramTxtFilename.getAbsolutePath().substring(0, ext_pos);
String outGtr = outputPrefix + ".gtr";
String outCdt = outputPrefix + ".cdt";
if (DEBUG)
logger.info("Writing output to file...");
int nColCount = 1;
String[] colNames = new String[nColCount];
colNames[nColCount - 1] = "Id";
String sep = "\t";
if (do_print) {
try {
float[] arFloats = new float[nRowCount];
for (int i = 0; i < arFloats.length; i++) {
arFloats[i] = i / 2.0f;
}
DataSource source = (do_data) ? new FloatSource1D(arFloats, nRowCount, nColCount) : null;
/* org.gnf.clustering.Utils. */
HierarAlignerGCTask.GenerateCDT(outCdt, source, /* null */
nRowCount, nColCount, sep, rowNames, colNames, rowOrder);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
org.gnf.clustering.Utils.WriteTreeToFile(outGtr, nRowCount - 1, arNodes, true);
if (DEBUG)
printMemoryUsage(logger, run_time, prevTotal, prevFree, "GNF CLUSTERER FILES PRINTED");
}
// //// Arrange row clustered list with method 0,1,2
List<List<PeakListRow>> clustersList = new ArrayList<>();
// Build feature list row clusters
for (List<Integer> cl : gnfClusters) {
List<PeakListRow> rows_cluster = new ArrayList<>();
for (int i = 0; i < cl.size(); i++) {
rows_cluster.add(full_rows_list.get(cl.get(i)));
}
clustersList.add(rows_cluster);
//
processedRows += rows_cluster.size();
}
if (DEBUG)
printMemoryUsage(logger, run_time, prevTotal, prevFree, "GNF CLUSTERER CLUSTER_LIST");
// Fill alignment table: One row per cluster
for (List<PeakListRow> cluster : clustersList) {
if (isCanceled())
return;
PeakListRow targetRow = new SimplePeakListRow(newRowID);
newRowID++;
alignedPeakList.addRow(targetRow);
//
infoRowsBackup.put(targetRow, new Object[] { new HashMap<RawDataFile, Double[]>(), new HashMap<RawDataFile, PeakIdentity>(), new HashMap<RawDataFile, Double>() });
for (PeakListRow row : cluster) {
// Add all non-existing identities from the original row to the
// aligned row
// Set the preferred identity
targetRow.setPreferredPeakIdentity(row.getPreferredPeakIdentity());
// for (RawDataFile file : row.getRawDataFiles()) {
for (RawDataFile file : alignedPeakList.getRawDataFiles()) {
if (Arrays.asList(row.getRawDataFiles()).contains(file)) {
Feature originalPeak = row.getPeak(file);
if (originalPeak != null) {
targetRow.addPeak(file, originalPeak);
} else {
setStatus(TaskStatus.ERROR);
setErrorMessage("Cannot run alignment, no originalPeak");
return;
}
}
}
// present
for (PeakIdentity identity : row.getPeakIdentities()) {
PeakIdentity clonedIdentity = (PeakIdentity) identity.clone();
if (!PeakUtils.containsIdentity(targetRow, clonedIdentity))
targetRow.addPeakIdentity(clonedIdentity, false);
}
// processedRows++;
}
}
// of the "targetRow.update()" used down there
for (SimpleFeature peak : rtPeaksBackup.keySet()) {
peak.setRT((double) rtPeaksBackup.get(peak));
}
/**
* Post-processing... *
*/
// Build reference RDFs index: We need an ordered reference here, to be
// able to parse
// correctly while reading back stored info
RawDataFile[] rdf_sorted = alignedPeakList.getRawDataFiles().clone();
Arrays.sort(rdf_sorted, new RawDataFileSorter(SortingDirection.Ascending));
// Process
for (PeakListRow targetRow : infoRowsBackup.keySet()) {
if (isCanceled())
return;
// Refresh averaged RTs...
((SimplePeakListRow) targetRow).update();
}
//
if (DEBUG) {
endTime = System.currentTimeMillis();
ms = (endTime - startTime);
logger.info("## >> Whole JoinAlignerGCTask processing took " + Float.toString(ms) + " ms.");
}
// ----------------------------------------------------------------------
// Add new aligned feature list to the project
this.project.addPeakList(alignedPeakList);
if (DEBUG) {
for (RawDataFile rdf : alignedPeakList.getRawDataFiles()) logger.info("RDF: " + rdf);
}
// Add task description to peakList
alignedPeakList.addDescriptionOfAppliedTask(new SimplePeakListAppliedMethod(HierarAlignerGCTask.TASK_NAME, parameters));
logger.info("Finished join aligner GC");
setStatus(TaskStatus.FINISHED);
}
use of net.sf.mzmine.datamodel.impl.SimplePeakList in project mzmine2 by mzmine.
the class JoinAlignerTask method run.
/**
* @see Runnable#run()
*/
@Override
public void run() {
if ((mzWeight == 0) && (rtWeight == 0)) {
setStatus(TaskStatus.ERROR);
setErrorMessage("Cannot run alignment, all the weight parameters are zero");
return;
}
setStatus(TaskStatus.PROCESSING);
logger.info("Running join aligner");
// twice, first for score calculation, second for actual alignment.
for (int i = 0; i < peakLists.length; i++) {
totalRows += peakLists[i].getNumberOfRows() * 2;
}
// Collect all data files
Vector<RawDataFile> allDataFiles = new Vector<RawDataFile>();
for (PeakList peakList : peakLists) {
for (RawDataFile dataFile : peakList.getRawDataFiles()) {
// Each data file can only have one column in aligned feature list
if (allDataFiles.contains(dataFile)) {
setStatus(TaskStatus.ERROR);
setErrorMessage("Cannot run alignment, because file " + dataFile + " is present in multiple feature lists");
return;
}
allDataFiles.add(dataFile);
}
}
// Create a new aligned feature list
alignedPeakList = new SimplePeakList(peakListName, allDataFiles.toArray(new RawDataFile[0]));
// Iterate source feature lists
for (PeakList peakList : peakLists) {
// Create a sorted set of scores matching
TreeSet<RowVsRowScore> scoreSet = new TreeSet<RowVsRowScore>();
PeakListRow[] allRows = peakList.getRows();
// Calculate scores for all possible alignments of this row
for (PeakListRow row : allRows) {
if (isCanceled())
return;
// Calculate limits for a row with which the row can be aligned
Range<Double> mzRange = mzTolerance.getToleranceRange(row.getAverageMZ());
Range<Double> rtRange = rtTolerance.getToleranceRange(row.getAverageRT());
// Get all rows of the aligned peaklist within parameter limits
PeakListRow[] candidateRows = alignedPeakList.getRowsInsideScanAndMZRange(rtRange, mzRange);
// Calculate scores and store them
for (PeakListRow candidate : candidateRows) {
if (sameChargeRequired) {
if (!PeakUtils.compareChargeState(row, candidate))
continue;
}
if (sameIDRequired) {
if (!PeakUtils.compareIdentities(row, candidate))
continue;
}
if (compareIsotopePattern) {
IsotopePattern ip1 = row.getBestIsotopePattern();
IsotopePattern ip2 = candidate.getBestIsotopePattern();
if ((ip1 != null) && (ip2 != null)) {
ParameterSet isotopeParams = parameters.getParameter(JoinAlignerParameters.compareIsotopePattern).getEmbeddedParameters();
if (!IsotopePatternScoreCalculator.checkMatch(ip1, ip2, isotopeParams)) {
continue;
}
}
}
// compare the similarity of spectra mass lists on MS1 or MS2 level
if (compareSpectraSimilarity) {
DataPoint[] rowDPs = null;
DataPoint[] candidateDPs = null;
SpectralSimilarity sim = null;
// get data points of mass list of the representative scans
if (msLevel == 1) {
rowDPs = row.getBestPeak().getRepresentativeScan().getMassList(massList).getDataPoints();
candidateDPs = candidate.getBestPeak().getRepresentativeScan().getMassList(massList).getDataPoints();
}
// get data points of mass list of the best fragmentation scans
if (msLevel == 2) {
if (row.getBestFragmentation() != null && candidate.getBestFragmentation() != null) {
rowDPs = row.getBestFragmentation().getMassList(massList).getDataPoints();
candidateDPs = candidate.getBestFragmentation().getMassList(massList).getDataPoints();
} else
continue;
}
// compare mass list data points of selected scans
if (rowDPs != null && candidateDPs != null) {
// calculate similarity using SimilarityFunction
sim = createSimilarity(rowDPs, candidateDPs);
// user set threshold
if (sim == null) {
continue;
}
}
}
RowVsRowScore score = new RowVsRowScore(row, candidate, RangeUtils.rangeLength(mzRange) / 2.0, mzWeight, RangeUtils.rangeLength(rtRange) / 2.0, rtWeight);
scoreSet.add(score);
}
processedRows++;
}
// Create a table of mappings for best scores
Hashtable<PeakListRow, PeakListRow> alignmentMapping = new Hashtable<PeakListRow, PeakListRow>();
// Iterate scores by descending order
Iterator<RowVsRowScore> scoreIterator = scoreSet.iterator();
while (scoreIterator.hasNext()) {
RowVsRowScore score = scoreIterator.next();
// Check if the row is already mapped
if (alignmentMapping.containsKey(score.getPeakListRow()))
continue;
// Check if the aligned row is already filled
if (alignmentMapping.containsValue(score.getAlignedRow()))
continue;
alignmentMapping.put(score.getPeakListRow(), score.getAlignedRow());
}
// Align all rows using mapping
for (PeakListRow row : allRows) {
PeakListRow targetRow = alignmentMapping.get(row);
// If we have no mapping for this row, add a new one
if (targetRow == null) {
targetRow = new SimplePeakListRow(newRowID);
newRowID++;
alignedPeakList.addRow(targetRow);
}
// Add all peaks from the original row to the aligned row
for (RawDataFile file : row.getRawDataFiles()) {
targetRow.addPeak(file, row.getPeak(file));
}
// Add all non-existing identities from the original row to the
// aligned row
PeakUtils.copyPeakListRowProperties(row, targetRow);
processedRows++;
}
}
// Next feature list
// Add new aligned feature list to the project
project.addPeakList(alignedPeakList);
// Add task description to peakList
alignedPeakList.addDescriptionOfAppliedTask(new SimplePeakListAppliedMethod("Join aligner", parameters));
logger.info("Finished join aligner");
setStatus(TaskStatus.FINISHED);
}
use of net.sf.mzmine.datamodel.impl.SimplePeakList in project mzmine2 by mzmine.
the class ScoreAligner method align.
/*
* (non-Javadoc)
*
* @see gcgcaligner.AbstractAligner#align()
*/
public PeakList align() {
if (// Do the actual alignment if we already do not
alignment == null) // have the result
{
Vector<RawDataFile> allDataFiles = new Vector<RawDataFile>();
for (PeakList list : this.originalPeakList) {
allDataFiles.addAll(Arrays.asList(list.getRawDataFiles()));
}
peaksTotal = 0;
for (int i = 0; i < peakList.size(); i++) {
peaksTotal += peakList.get(i).size();
}
alignment = new SimplePeakList(params.getParameter(PathAlignerParameters.peakListName).getValue(), allDataFiles.toArray(new RawDataFile[0]));
List<AlignmentPath> addedPaths = getAlignmentPaths();
int ID = 1;
for (AlignmentPath p : addedPaths) {
// Convert alignments to original order of files and add them to
// final
// Alignment data structure
PeakListRow row = (PeakListRow) p.convertToAlignmentRow(ID++);
alignment.addRow(row);
}
}
PeakList curAlignment = alignment;
return curAlignment;
}
Aggregations