use of cz1.hmm.data.DataEntry in project polyGembler by c-zhou.
the class Haplotyper method run.
@Override
public void run() {
// TODO Auto-generated method stub
myLogger.info("Random seed - " + Constants.seed);
DataEntry[] de = start_pos == null ? DataCollection.readDataEntry(in_zip, scaff) : DataCollection.readDataEntry(in_zip, scaff, start_pos, end_pos);
// DataEntry[] de = DataCollection.readDataEntry(in_zip, Constants._ploidy_H);
final HiddenMarkovModel hmm = vbt ? new HiddenMarkovModelVBT(de, seperation, reverse, trainExp, field, founder_hap_coeff) : // new HiddenMarkovModelRST(de, seperation, reverse, trainExp, field, resampling):
(hmm_file == null ? new HiddenMarkovModelBWT(de, seperation, reverse, trainExp, field, founder_hap_coeff) : new HiddenMarkovModelBWT(de, seperation, reverse, trainExp, field, founder_hap_coeff, hmm_file));
if (Constants.plot()) {
Runnable run = new Runnable() {
public void run() {
try {
hmmf = new HMMFrame();
hmmf.clearTabs();
if (Constants.showHMM)
hmmp = hmmf.addHMMTab(hmm, hmm.de(), new File(out_prefix));
} catch (Exception e) {
Thread t = Thread.currentThread();
t.getUncaughtExceptionHandler().uncaughtException(t, e);
e.printStackTrace();
}
}
};
Thread th = new Thread(run);
th.run();
}
if (Constants.plot()) {
hmmf.pack();
hmmf.setVisible(true);
}
double ll, ll0 = hmm.loglik();
for (int i = 0; i < max_iter; i++) {
hmm.train();
if (Constants.plot())
hmmp.update();
ll = hmm.loglik();
myLogger.info("----------loglik " + ll);
if (ll < ll0) {
// throw new RuntimeException("Fatal error!!!");
myLogger.info("LOGLIK DECREASED!!!");
// break;
ll0 = ll;
continue;
}
if (ll0 != Double.NEGATIVE_INFINITY && Math.abs((ll - ll0) / ll0) < Constants.minImprov)
break;
ll0 = ll;
}
if (Constants.printPlots()) {
try {
float width = hmmf.jframe.getSize().width, height = hmmf.jframe.getSize().height;
Document document = new Document(new Rectangle(width, height));
PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(plot_pdf));
document.open();
PdfContentByte canvas = writer.getDirectContent();
PdfTemplate template = canvas.createTemplate(width, height);
Graphics2D g2d = new PdfGraphics2D(template, width, height);
hmmf.jframe.paint(g2d);
g2d.dispose();
canvas.addTemplate(template, 0, 0);
document.close();
} catch (FileNotFoundException | DocumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
String scaff_str = scaff[0] + (start_pos == null || start_pos[0] == Integer.MIN_VALUE ? "" : "_" + start_pos[0]) + (end_pos == null || end_pos[0] == Integer.MAX_VALUE ? "" : "_" + end_pos[0]);
for (int i = 1; i < scaff.length; i++) {
if (scaff_str.length() + scaff[i].length() + 32 <= Constants.MAX_FILE_ID_LENGTH)
scaff_str += Constants.scaff_collapsed_str + scaff[i] + (start_pos == null || start_pos[i] == Integer.MIN_VALUE ? "" : "_" + start_pos[i]) + (end_pos == null || end_pos[i] == Integer.MAX_VALUE ? "" : "_" + end_pos[i]);
else {
scaff_str += Constants.scaff_collapsed_str + "etc" + scaff.length;
break;
}
}
hmm.write(out_prefix, expr_id, scaff_str, resampling);
}
use of cz1.hmm.data.DataEntry in project polyGembler by c-zhou.
the class Resampler method run.
@Override
public void run() {
// TODO Auto-generated method stub
myLogger.info("Random seed - " + Constants.seed);
DataEntry[] de = start_pos == null ? DataCollection.readDataEntry(in_zip, scaff) : DataCollection.readDataEntry(in_zip, scaff, start_pos, end_pos);
// DataEntry[] de = DataCollection.readDataEntry(in_zip, Constants._ploidy_H);
final HiddenMarkovModel hmm = vbt ? new HiddenMarkovModelVBT(de, seperation, reverse, trainExp, field) : (hmm_file == null ? new HiddenMarkovModelBWT(de, seperation, reverse, trainExp, field) : new HiddenMarkovModelBWT(de, seperation, reverse, trainExp, field, hmm_file));
double ll, ll0 = hmm.loglik();
String scaff_str = scaff[0] + (start_pos == null || start_pos[0] == Integer.MIN_VALUE ? "" : "_" + start_pos[0]) + (end_pos == null || end_pos[0] == Integer.MAX_VALUE ? "" : "_" + end_pos[0]);
for (int i = 1; i < scaff.length; i++) {
if (scaff_str.length() + scaff[i].length() + 32 <= Constants.MAX_FILE_ID_LENGTH)
scaff_str += Constants.scaff_collapsed_str + scaff[i] + (start_pos == null || start_pos[i] == Integer.MIN_VALUE ? "" : "_" + start_pos[i]) + (end_pos == null || end_pos[i] == Integer.MAX_VALUE ? "" : "_" + end_pos[i]);
else {
scaff_str += Constants.scaff_collapsed_str + "etc" + scaff.length;
break;
}
}
hmm.write(out_prefix, expr_id, scaff_str, resampling);
}
use of cz1.hmm.data.DataEntry in project polyGembler by c-zhou.
the class VCFResampling method run.
@Override
public void run() {
// TODO Auto-generated method stub
FullSiblings fullSib = new FullSiblings(this.ploidy);
final List<String> contigs = DataCollection.getContigList(data_in);
final List<String> samples = DataCollection.getSampleList(data_in);
int[] founder_index = new int[2];
Arrays.fill(founder_index, -1);
for (int i = 0; i != samples.size(); i++) {
if (samples.get(i).equals(founder_haps[0]))
founder_index[0] = i;
else if (samples.get(i).equals(founder_haps[1]))
founder_index[1] = i;
}
for (String contig : contigs) {
DataEntry data = DataCollection.readDataEntry(data_in, new String[] { contig })[0];
double[] position = data.getPosition();
int no = position.length;
if (no <= min_snp) {
int[] out = new int[no];
for (int i = 0; i != no; i++) out[i] = i;
snp_out.put(contig, out);
continue;
}
double[] segs = fullSib.calcSegs(data, founder_index);
int l = 0;
final Map<Integer, List<Integer>> bins = new HashMap<Integer, List<Integer>>();
for (int i = 0; i != no; i++) {
while ((l + 1) * window_size < position[i]) l++;
if (!bins.containsKey(l))
bins.put(l, new ArrayList<Integer>());
bins.get(l).add(i);
}
List<Integer> selected = new ArrayList<Integer>();
final Map<Integer, Iterator<Integer>> bins_it = new HashMap<Integer, Iterator<Integer>>();
for (Map.Entry<Integer, List<Integer>> entry : bins.entrySet()) {
Collections.sort(entry.getValue(), new Comparator<Integer>() {
@Override
public int compare(Integer i0, Integer i1) {
// TODO Auto-generated method stub
return Double.compare(segs[i0], segs[i1]);
}
});
Iterator<Integer> it = entry.getValue().iterator();
selected.add(it.next());
if (it.hasNext())
bins_it.put(entry.getKey(), it);
}
while (selected.size() < min_snp) {
List<Integer> candidates = new ArrayList<Integer>();
Set<Integer> key_rm = new HashSet<Integer>();
for (Map.Entry<Integer, Iterator<Integer>> entry : bins_it.entrySet()) {
Iterator<Integer> it = entry.getValue();
candidates.add(it.next());
if (!it.hasNext())
key_rm.add(entry.getKey());
}
if (candidates.size() == 0)
break;
for (Integer i : key_rm) bins_it.remove(i);
Collections.sort(candidates, new Comparator<Integer>() {
@Override
public int compare(Integer i0, Integer i1) {
// TODO Auto-generated method stub
return Double.compare(segs[i0], segs[i1]);
}
});
for (Integer i : candidates) {
if (selected.size() < min_snp)
selected.add(i);
else
break;
}
}
Collections.sort(selected);
snp_out.put(contig, ArrayUtils.toPrimitive(selected.toArray(new Integer[selected.size()])));
}
int N = 0;
for (Map.Entry<String, int[]> entry : snp_out.entrySet()) N += entry.getValue().length;
System.err.println(N + " SNPs reserved.");
write();
}
Aggregations