Search in sources :

Example 16 with Patterns

use of dr.evolution.alignment.Patterns in project beast-mcmc by beast-dev.

the class SankoffParsimony method main.

public static void main(String[] argv) {
    FlexibleNode tip1 = new FlexibleNode(new Taxon("tip1"));
    FlexibleNode tip2 = new FlexibleNode(new Taxon("tip2"));
    FlexibleNode tip3 = new FlexibleNode(new Taxon("tip3"));
    FlexibleNode tip4 = new FlexibleNode(new Taxon("tip4"));
    FlexibleNode tip5 = new FlexibleNode(new Taxon("tip5"));
    FlexibleNode node1 = new FlexibleNode();
    node1.addChild(tip1);
    node1.addChild(tip2);
    FlexibleNode node2 = new FlexibleNode();
    node2.addChild(tip4);
    node2.addChild(tip5);
    FlexibleNode node3 = new FlexibleNode();
    node3.addChild(tip3);
    node3.addChild(node2);
    FlexibleNode root = new FlexibleNode();
    root.addChild(node1);
    root.addChild(node3);
    FlexibleTree tree = new FlexibleTree(root);
    Patterns patterns = new Patterns(Nucleotides.INSTANCE, tree);
    //patterns.addPattern(new int[] {1, 0, 1, 2, 2});
    //patterns.addPattern(new int[] {2, 1, 1, 1, 2});
    patterns.addPattern(new int[] { 2, 3, 1, 3, 3 });
    FitchParsimony fitch = new FitchParsimony(patterns, false);
    SankoffParsimony sankoff = new SankoffParsimony(patterns);
    for (int i = 0; i < patterns.getPatternCount(); i++) {
        double[] scores = fitch.getSiteScores(tree);
        System.out.println("Pattern = " + i);
        System.out.println("Fitch:");
        System.out.println("  No. Steps = " + scores[i]);
        System.out.println("    state(node1) = " + fitch.getStates(tree, node1)[i]);
        System.out.println("    state(node2) = " + fitch.getStates(tree, node2)[i]);
        System.out.println("    state(node3) = " + fitch.getStates(tree, node3)[i]);
        System.out.println("    state(root) = " + fitch.getStates(tree, root)[i]);
        scores = sankoff.getSiteScores(tree);
        System.out.println("Sankoff:");
        System.out.println("  No. Steps = " + scores[i]);
        System.out.println("    state(node1) = " + sankoff.getStates(tree, node1)[i]);
        System.out.println("    state(node2) = " + sankoff.getStates(tree, node2)[i]);
        System.out.println("    state(node3) = " + sankoff.getStates(tree, node3)[i]);
        System.out.println("    state(root) = " + sankoff.getStates(tree, root)[i]);
        System.out.println();
    }
}
Also used : Taxon(dr.evolution.util.Taxon) Patterns(dr.evolution.alignment.Patterns)

Example 17 with Patterns

use of dr.evolution.alignment.Patterns in project beast-mcmc by beast-dev.

the class MicroSatImporter method importPatterns.

public List<Patterns> importPatterns() throws IOException, Importer.ImportException {
    List<Patterns> microsatPatList = new ArrayList<Patterns>();
    // 1st List<String> is taxon names
    List<List<String>> data = new ArrayList<List<String>>();
    // microsatName[0] is keyword, microsatName[1] is name
    String[] microsatName = new String[2];
    microsatName[1] = "unnamed.microsat";
    String line = reader.readLine();
    while (line.startsWith("#")) {
        // comments
        if (line.toUpperCase().contains("NAME")) {
            microsatName = line.trim().split("[" + delimiter + " ]+");
            if (microsatName[1] == null || microsatName[1].length() < 1)
                throw new Importer.ImportException("Improper microsatellite name : " + microsatName[1]);
        }
        line = reader.readLine();
    }
    // read locus (microsat pattern) names in the 1st row after comments, where 1st element is id
    // trim trailing whitespace ?
    String[] names = line.trim().split("[" + delimiter + " ]+");
    // for validation
    int colLen = names.length;
    if (colLen < 2)
        throw new Importer.ImportException("Import file must have more than 1 columns : " + colLen);
    for (int i = 0; i < colLen; i++) {
        // init data
        List<String> l = new ArrayList<String>();
        data.add(l);
    }
    int min = Integer.MAX_VALUE;
    int max = Integer.MIN_VALUE;
    line = reader.readLine();
    while (line != null) {
        // read data
        String[] dataLine = line.trim().split("[" + delimiter + " ]+");
        if (dataLine.length != colLen)
            throw new Importer.ImportException("The number of name columns are different with values columns," + "\nplease use only letters or numbers in the name.");
        for (int i = 0; i < dataLine.length; i++) {
            data.get(i).add(dataLine[i]);
            if (i > 0) {
                int v = parseInt(dataLine[i]);
                if (v != Microsatellite.UNKNOWN_STATE_LENGTH) {
                    if (min > v)
                        min = v;
                    if (max < v)
                        max = v;
                }
            }
        }
        line = reader.readLine();
    }
    if (max < min)
        throw new Importer.ImportException("Importing invalid data: max < min !");
    //        if (min - 2 < 0) throw new Importer.ImportException("Importing invaild data: min-2 < 0 where min = " + min);
    // The min also = 1 and max should be the longest repeat length + 2.
    microsatellite = new Microsatellite(microsatName[1], 1, max + 2, 1);
    Taxa taxaHaploid = new Taxa();
    for (String name : data.get(0)) {
        Taxon t = new Taxon(name);
        taxaHaploid.addTaxon(t);
    }
    //        unionSetTaxonList.addTaxa(taxaHaploid);
    Patterns microsatPat;
    for (int i = 1; i < data.size(); i++) {
        // create pattern
        //            List<Integer> pattern = new ArrayList<Integer>();
        List<Integer> pattern;
        Taxa taxa = new Taxa();
        if ((i + 1 < data.size()) && names[i].equalsIgnoreCase(names[i + 1])) {
            // diploid: Locus2	Locus2
            Taxa taxaDiploid = new Taxa();
            for (String name : data.get(0)) {
                Taxon t = new Taxon(names[i] + "_1_" + name);
                taxaDiploid.addTaxon(t);
            }
            for (String name : data.get(0)) {
                Taxon t = new Taxon(names[i] + "_2_" + name);
                taxaDiploid.addTaxon(t);
            }
            if (unionSetTaxonList.containsAny(taxaDiploid))
                throw new Importer.ImportException("Importing invalid data: duplicate taxon name in this locus : " + names[i]);
            unionSetTaxonList.addTaxa(taxaDiploid);
            hasDifferentTaxon = true;
            pattern = new ArrayList<Integer>();
            String value;
            int size = data.get(i).size();
            for (int v = 0; v < size; v++) {
                value = data.get(i).get(v);
                //                    if (!isUnknownChar(value)) {
                Taxon t = taxaDiploid.getTaxon(v);
                if (!taxa.contains(t)) {
                    taxa.addTaxon(t);
                    //microsatellite.getState(value);
                    pattern.add(parseInt(value));
                    if (!unionSetTaxonList.contains(t)) {
                        unionSetTaxonList.addTaxon(t);
                        if (i > 1)
                            hasDifferentTaxon = true;
                    }
                }
            //                    }
            }
            for (int v = 0; v < data.get(i + 1).size(); v++) {
                value = data.get(i + 1).get(v);
                //                    if (!isUnknownChar(value)) {
                Taxon t = taxaDiploid.getTaxon(v + size);
                if (!taxa.contains(t)) {
                    taxa.addTaxon(t);
                    //microsatellite.getState(value);
                    pattern.add(parseInt(value));
                    if (!unionSetTaxonList.contains(t)) {
                        unionSetTaxonList.addTaxon(t);
                        if (i > 1)
                            hasDifferentTaxon = true;
                    }
                }
            //                    }
            }
            i++;
        } else {
            // haploid Locus1
            pattern = new ArrayList<Integer>();
            for (int v = 0; v < data.get(i).size(); v++) {
                String value = data.get(i).get(v);
                //                    if (!isUnknownChar(value)) {
                Taxon t = taxaHaploid.getTaxon(v);
                if (!taxa.contains(t)) {
                    taxa.addTaxon(t);
                    //microsatellite.getState(value);
                    pattern.add(parseInt(value));
                    if (!unionSetTaxonList.contains(t)) {
                        unionSetTaxonList.addTaxon(t);
                        if (i > 1)
                            hasDifferentTaxon = true;
                    }
                }
            //                    }
            }
        }
        int[] p = new int[pattern.size()];
        for (int v = 0; v < pattern.size(); v++) {
            p[v] = pattern.get(v);
        }
        microsatPat = new Patterns(microsatellite, taxa);
        microsatPat.addPattern(p);
        microsatPat.setId(names[i]);
        microsatPatList.add(microsatPat);
    }
    return microsatPatList;
}
Also used : Microsatellite(dr.evolution.datatype.Microsatellite) Taxon(dr.evolution.util.Taxon) ArrayList(java.util.ArrayList) Taxa(dr.evolution.util.Taxa) List(java.util.List) ArrayList(java.util.ArrayList) Patterns(dr.evolution.alignment.Patterns)

Example 18 with Patterns

use of dr.evolution.alignment.Patterns in project beast-mcmc by beast-dev.

the class OptimizedBeagleTreeLikelihoodParser method parseXMLObject.

public Object parseXMLObject(XMLObject xo) throws XMLParseException {
    //Default of 100 likelihood calculations for calibration
    int calibrate = 100;
    if (xo.hasAttribute(CALIBRATE)) {
        calibrate = xo.getIntegerAttribute(CALIBRATE);
    }
    //Default: only try the next split up, unless a RETRY value is specified in the XML
    int retry = 0;
    if (xo.hasAttribute(RETRY)) {
        retry = xo.getIntegerAttribute(RETRY);
    }
    int childCount = xo.getChildCount();
    List<Likelihood> likelihoods = new ArrayList<Likelihood>();
    //TEST
    List<Likelihood> originalLikelihoods = new ArrayList<Likelihood>();
    for (int i = 0; i < childCount; i++) {
        likelihoods.add((Likelihood) xo.getChild(i));
        originalLikelihoods.add((Likelihood) xo.getChild(i));
    }
    if (DEBUG) {
        System.err.println("-----");
        System.err.println(childCount + " BeagleTreeLikelihoods added.");
    }
    int[] instanceCounts = new int[childCount];
    for (int i = 0; i < childCount; i++) {
        instanceCounts[i] = 1;
    }
    int[] currentLocation = new int[childCount];
    for (int i = 0; i < childCount; i++) {
        currentLocation[i] = i;
    }
    int[] siteCounts = new int[childCount];
    //store everything for later use
    SitePatterns[] patterns = new SitePatterns[childCount];
    TreeModel[] treeModels = new TreeModel[childCount];
    BranchModel[] branchModels = new BranchModel[childCount];
    GammaSiteRateModel[] siteRateModels = new GammaSiteRateModel[childCount];
    BranchRateModel[] branchRateModels = new BranchRateModel[childCount];
    boolean[] ambiguities = new boolean[childCount];
    PartialsRescalingScheme[] rescalingSchemes = new PartialsRescalingScheme[childCount];
    boolean[] isDelayRescalingUntilUnderflow = new boolean[childCount];
    List<Map<Set<String>, Parameter>> partialsRestrictions = new ArrayList<Map<Set<String>, Parameter>>();
    for (int i = 0; i < likelihoods.size(); i++) {
        patterns[i] = (SitePatterns) ((BeagleTreeLikelihood) likelihoods.get(i)).getPatternsList();
        siteCounts[i] = patterns[i].getPatternCount();
        treeModels[i] = ((BeagleTreeLikelihood) likelihoods.get(i)).getTreeModel();
        branchModels[i] = ((BeagleTreeLikelihood) likelihoods.get(i)).getBranchModel();
        siteRateModels[i] = (GammaSiteRateModel) ((BeagleTreeLikelihood) likelihoods.get(i)).getSiteRateModel();
        branchRateModels[i] = ((BeagleTreeLikelihood) likelihoods.get(i)).getBranchRateModel();
        ambiguities[i] = ((BeagleTreeLikelihood) likelihoods.get(i)).useAmbiguities();
        rescalingSchemes[i] = ((BeagleTreeLikelihood) likelihoods.get(i)).getRescalingScheme();
        isDelayRescalingUntilUnderflow[i] = ((BeagleTreeLikelihood) likelihoods.get(i)).isDelayRescalingUntilUnderflow();
        partialsRestrictions.add(i, ((BeagleTreeLikelihood) likelihoods.get(i)).getPartialsRestrictions());
    }
    if (DEBUG) {
        System.err.println("Pattern counts: ");
        for (int i = 0; i < siteCounts.length; i++) {
            System.err.println(siteCounts[i] + "   vs.    " + patterns[i].getPatternCount());
        }
        System.err.println();
        System.err.println("Instance counts: ");
        for (int i = 0; i < instanceCounts.length; i++) {
            System.err.print(instanceCounts[i] + " ");
        }
        System.err.println();
        System.err.println("Current locations: ");
        for (int i = 0; i < currentLocation.length; i++) {
            System.err.print(currentLocation[i] + " ");
        }
        System.err.println();
    }
    TestThreadedCompoundLikelihood compound = new TestThreadedCompoundLikelihood(likelihoods);
    if (DEBUG) {
        System.err.println("Timing estimates for each of the " + calibrate + " likelihood calculations:");
    }
    double start = System.nanoTime();
    for (int i = 0; i < calibrate; i++) {
        if (DEBUG) {
            //double debugStart = System.nanoTime();
            compound.makeDirty();
            compound.getLogLikelihood();
        //double debugEnd = System.nanoTime();
        //System.err.println(debugEnd - debugStart);
        } else {
            compound.makeDirty();
            compound.getLogLikelihood();
        }
    }
    double end = System.nanoTime();
    double baseResult = end - start;
    if (DEBUG) {
        System.err.println("Starting evaluation took: " + baseResult);
    }
    int longestIndex = 0;
    int longestSize = siteCounts[0];
    //START TEST CODE
    /*System.err.println("Detailed evaluation times: ");
        long[] evaluationTimes = compound.getEvaluationTimes();
        int[] evaluationCounts = compound.getEvaluationCounts();
        long longest = evaluationTimes[0];
        for (int i = 0; i < evaluationTimes.length; i++) {
        	System.err.println(i + ": time=" + evaluationTimes[i] + "   count=" + evaluationCounts[i]);
            if (evaluationTimes[i] > longest) {
            	longest = evaluationTimes[i];
        	}
        }*/
    //END TEST CODE
    /*if (SPLIT_BY_PATTERN_COUNT) {

        	boolean notFinished = true;

        	while (notFinished) {

        		for (int i = 0; i < siteCounts.length; i++) {
        			if (siteCounts[i] > longestSize) {
        				longestIndex = i;
        				longestSize = siteCounts[longestIndex];
        			}
        		}
        		System.err.println("Split likelihood " + longestIndex + " with pattern count " + longestSize);

        		//split it in 2
        		int instanceCount = ++instanceCounts[longestIndex];

        		List<Likelihood> newList = new ArrayList<Likelihood>();
        		for (int i = 0; i < instanceCount; i++) {
        			Patterns subPatterns = new Patterns(patterns[longestIndex], 0, 0, 1, i, instanceCount);

        			BeagleTreeLikelihood treeLikelihood = createTreeLikelihood(
        					subPatterns, treeModels[longestIndex], branchModels[longestIndex], siteRateModels[longestIndex], branchRateModels[longestIndex],
        					null, 
        					ambiguities[longestIndex], rescalingSchemes[longestIndex], partialsRestrictions.get(longestIndex),
        					xo);

        			treeLikelihood.setId(xo.getId() + "_" + instanceCount);
        			newList.add(treeLikelihood);
        		}
        		for (int i = 0; i < newList.size()-1; i++) {
        			likelihoods.remove(currentLocation[longestIndex]);
        		}
        		//likelihoods.remove(longestIndex);
        		//likelihoods.add(longestIndex, new CompoundLikelihood(newList));
        		for (int i = 0; i < newList.size(); i++) {
        			likelihoods.add(currentLocation[longestIndex], newList.get(i));
        		}
        		for (int i = longestIndex+1; i < currentLocation.length; i++) {
        			currentLocation[i]++;
        		}
        		//compound = new ThreadedCompoundLikelihood(likelihoods);
        		compound = new CompoundLikelihood(likelihoods);
        		siteCounts[longestIndex] = (instanceCount-1)*siteCounts[longestIndex]/instanceCount;
        		longestSize = (instanceCount-1)*longestSize/instanceCount;

        		//check number of likelihoods
        		System.err.println("Number of BeagleTreeLikelihoods: " + compound.getLikelihoodCount());
        		System.err.println("Pattern counts: ");
        		for (int i = 0;i < siteCounts.length; i++) {
        			System.err.print(siteCounts[i] + " ");
        		}
        		System.err.println();
        		System.err.println("Instance counts: ");
        		for (int i = 0;i < instanceCounts.length; i++) {
        			System.err.print(instanceCounts[i] + " ");
        		}
        		System.err.println();
        		System.err.println("Current locations: ");
        		for (int i = 0;i < currentLocation.length; i++) {
        			System.err.print(currentLocation[i] + " ");
        		}
        		System.err.println();

        		//evaluate speed
        		start = System.nanoTime();
        		for (int i = 0; i < TEST_RUNS; i++) {
        			compound.makeDirty();
        			compound.getLogLikelihood();
        		}
        		end = System.nanoTime();
        		double newResult = end - start;
        		System.err.println("New evaluation took: " + newResult + " vs. old evaluation: " + baseResult);

        		if (newResult < baseResult) {
            		baseResult = newResult;
            	} else {
            		notFinished = false;

            		//remove 1 instanceCount
            		System.err.print("Removing 1 instance count: " + instanceCount);
            		instanceCount = --instanceCounts[longestIndex];
            		System.err.println(" -> " + instanceCount + " for likelihood " + longestIndex);
            		newList = new ArrayList<Likelihood>();
                	for (int i = 0; i < instanceCount; i++) {
                		Patterns subPatterns = new Patterns(patterns[longestIndex], 0, 0, 1, i, instanceCount);

                		BeagleTreeLikelihood treeLikelihood = createTreeLikelihood(
                                subPatterns, treeModels[longestIndex], branchModels[longestIndex], siteRateModels[longestIndex], branchRateModels[longestIndex],
                                null, 
                                ambiguities[longestIndex], rescalingSchemes[longestIndex], partialsRestrictions.get(longestIndex),
                                xo);

                        treeLikelihood.setId(xo.getId() + "_" + instanceCount);
                        newList.add(treeLikelihood);
                	}
                	for (int i = 0; i < newList.size()+1; i++) {
            			likelihoods.remove(currentLocation[longestIndex]);
            		}
                	for (int i = 0; i < newList.size(); i++) {
                		likelihoods.add(currentLocation[longestIndex], newList.get(i));
                	}
                	for (int i = longestIndex+1; i < currentLocation.length; i++) {
            			currentLocation[i]--;
            		}
                	//likelihoods.remove(longestIndex);
                	//likelihoods.add(longestIndex, new CompoundLikelihood(newList));

                	//compound = new ThreadedCompoundLikelihood(likelihoods);
                	compound = new CompoundLikelihood(likelihoods);
                	siteCounts[longestIndex] = (instanceCount+1)*siteCounts[longestIndex]/instanceCount;
                	longestSize = (instanceCount+1)*longestSize/instanceCount;

                	System.err.println("Pattern counts: ");
                	for (int i = 0;i < siteCounts.length; i++) {
                		System.err.print(siteCounts[i] + " ");
                	}
                	System.err.println();
                	System.err.println("Instance counts: ");
                	for (int i = 0;i < instanceCounts.length; i++) {
                		System.err.print(instanceCounts[i] + " ");
                	}
                	System.err.println();
                	System.err.println("Current locations: ");
            		for (int i = 0;i < currentLocation.length; i++) {
            			System.err.print(currentLocation[i] + " ");
            		}
            		System.err.println();

            	}

        	}

        } else {*/
    //Try splitting the same likelihood until no further improvement, then move on towards the next one
    boolean notFinished = true;
    //construct list with likelihoods to split up
    List<Integer> splitList = new ArrayList<Integer>();
    for (int i = 0; i < siteCounts.length; i++) {
        int top = 0;
        for (int j = 0; j < siteCounts.length; j++) {
            if (siteCounts[j] > siteCounts[top]) {
                top = j;
            }
        }
        siteCounts[top] = 0;
        splitList.add(top);
    }
    for (int i = 0; i < likelihoods.size(); i++) {
        siteCounts[i] = patterns[i].getPatternCount();
        if (DEBUG) {
            System.err.println("Site count " + i + " = " + siteCounts[i]);
        }
    }
    if (DEBUG) {
        //print list
        System.err.print("Ordered list of likelihoods to be evaluated: ");
        for (int i = 0; i < splitList.size(); i++) {
            System.err.print(splitList.get(i) + " ");
        }
        System.err.println();
    }
    int timesRetried = 0;
    while (notFinished) {
        //split it in 1 more piece
        longestIndex = splitList.get(0);
        int instanceCount = ++instanceCounts[longestIndex];
        List<Likelihood> newList = new ArrayList<Likelihood>();
        for (int i = 0; i < instanceCount; i++) {
            Patterns subPatterns = new Patterns(patterns[longestIndex], 0, 0, 1, i, instanceCount);
            BeagleTreeLikelihood treeLikelihood = createTreeLikelihood(subPatterns, treeModels[longestIndex], branchModels[longestIndex], siteRateModels[longestIndex], branchRateModels[longestIndex], null, ambiguities[longestIndex], rescalingSchemes[longestIndex], isDelayRescalingUntilUnderflow[longestIndex], partialsRestrictions.get(longestIndex), xo);
            treeLikelihood.setId(xo.getId() + "_" + longestIndex + "_" + i);
            System.err.println(treeLikelihood.getId() + " created.");
            newList.add(treeLikelihood);
        }
        for (int i = 0; i < newList.size() - 1; i++) {
            likelihoods.remove(currentLocation[longestIndex]);
        }
        //likelihoods.add(longestIndex, new CompoundLikelihood(newList));
        for (int i = 0; i < newList.size(); i++) {
            likelihoods.add(currentLocation[longestIndex], newList.get(i));
        }
        for (int i = longestIndex + 1; i < currentLocation.length; i++) {
            currentLocation[i]++;
        }
        compound = new TestThreadedCompoundLikelihood(likelihoods);
        //compound = new CompoundLikelihood(likelihoods);
        //compound = new ThreadedCompoundLikelihood(likelihoods);
        siteCounts[longestIndex] = (instanceCount - 1) * siteCounts[longestIndex] / instanceCount;
        longestSize = (instanceCount - 1) * longestSize / instanceCount;
        if (DEBUG) {
            //check number of likelihoods
            System.err.println("Number of BeagleTreeLikelihoods: " + compound.getLikelihoodCount());
            System.err.println("Pattern counts: ");
            for (int i = 0; i < siteCounts.length; i++) {
                System.err.print(siteCounts[i] + " ");
            }
            System.err.println();
            System.err.println("Instance counts: ");
            for (int i = 0; i < instanceCounts.length; i++) {
                System.err.print(instanceCounts[i] + " ");
            }
            System.err.println();
            System.err.println("Current locations: ");
            for (int i = 0; i < currentLocation.length; i++) {
                System.err.print(currentLocation[i] + " ");
            }
            System.err.println();
        }
        //evaluate speed
        if (DEBUG) {
            System.err.println("Timing estimates for each of the " + calibrate + " likelihood calculations:");
        }
        start = System.nanoTime();
        for (int i = 0; i < calibrate; i++) {
            if (DEBUG) {
                //double debugStart = System.nanoTime();
                compound.makeDirty();
                compound.getLogLikelihood();
            //double debugEnd = System.nanoTime();
            //System.err.println(debugEnd - debugStart);
            } else {
                compound.makeDirty();
                compound.getLogLikelihood();
            }
        }
        end = System.nanoTime();
        double newResult = end - start;
        if (DEBUG) {
            System.err.println("New evaluation took: " + newResult + " vs. old evaluation: " + baseResult);
        }
        if (newResult < baseResult) {
            //new partitioning is faster, so partition further
            baseResult = newResult;
            //reorder split list
            if (DEBUG) {
                System.err.print("Current split list: ");
                for (int i = 0; i < splitList.size(); i++) {
                    System.err.print(splitList.get(i) + "  ");
                }
                System.err.println();
                System.err.print("Current pattern counts: ");
                for (int i = 0; i < splitList.size(); i++) {
                    System.err.print(siteCounts[splitList.get(i)] + "  ");
                }
                System.err.println();
            }
            int currentPatternCount = siteCounts[longestIndex];
            int findIndex = 0;
            for (int i = 0; i < splitList.size(); i++) {
                if (siteCounts[splitList.get(i)] > currentPatternCount) {
                    findIndex = i;
                }
            }
            if (DEBUG) {
                System.err.println("Current pattern count: " + currentPatternCount);
                System.err.println("Index found: " + findIndex + " with pattern count: " + siteCounts[findIndex]);
                System.err.println("Moving 0 to " + findIndex);
            }
            for (int i = 0; i < findIndex; i++) {
                int temp = splitList.get(i);
                splitList.set(i, splitList.get(i + 1));
                splitList.set(i + 1, temp);
            }
            if (DEBUG) {
                System.err.print("New split list: ");
                for (int i = 0; i < splitList.size(); i++) {
                    System.err.print(splitList.get(i) + "  ");
                }
                System.err.println();
                System.err.print("New pattern counts: ");
                for (int i = 0; i < splitList.size(); i++) {
                    System.err.print(siteCounts[splitList.get(i)] + "  ");
                }
                System.err.println();
            }
            timesRetried = 0;
        } else {
            if (DEBUG) {
                System.err.println("timesRetried = " + timesRetried + " vs. retry = " + retry);
            }
            //new partitioning is slower, so reinstate previous state unless RETRY is specified
            if (timesRetried < retry) {
                //try splitting further any way
                //do not set baseResult
                timesRetried++;
                if (DEBUG) {
                    System.err.println("RETRY number " + timesRetried);
                }
            } else {
                splitList.remove(0);
                if (splitList.size() == 0) {
                    notFinished = false;
                }
                //remove timesTried instanceCount(s)
                if (DEBUG) {
                    System.err.print("Removing " + (timesRetried + 1) + " instance count(s): " + instanceCount);
                }
                //instanceCount = --instanceCounts[longestIndex];
                instanceCounts[longestIndex] = instanceCounts[longestIndex] - (timesRetried + 1);
                instanceCount = instanceCounts[longestIndex];
                if (DEBUG) {
                    System.err.println(" -> " + instanceCount + " for likelihood " + longestIndex);
                }
                newList = new ArrayList<Likelihood>();
                for (int i = 0; i < instanceCount; i++) {
                    Patterns subPatterns = new Patterns(patterns[longestIndex], 0, 0, 1, i, instanceCount);
                    BeagleTreeLikelihood treeLikelihood = createTreeLikelihood(subPatterns, treeModels[longestIndex], branchModels[longestIndex], siteRateModels[longestIndex], branchRateModels[longestIndex], null, ambiguities[longestIndex], rescalingSchemes[longestIndex], isDelayRescalingUntilUnderflow[longestIndex], partialsRestrictions.get(longestIndex), xo);
                    treeLikelihood.setId(xo.getId() + "_" + longestIndex + "_" + i);
                    System.err.println(treeLikelihood.getId() + " created.");
                    newList.add(treeLikelihood);
                }
                /*for (int i = 0; i < newList.size()+1; i++) {
                        likelihoods.remove(currentLocation[longestIndex]);
                    }*/
                for (int i = 0; i < newList.size() + timesRetried + 1; i++) {
                    //TEST CODE START
                    unregisterAllModels((BeagleTreeLikelihood) likelihoods.get(currentLocation[longestIndex]));
                    //TEST CODE END
                    likelihoods.remove(currentLocation[longestIndex]);
                }
                for (int i = 0; i < newList.size(); i++) {
                    likelihoods.add(currentLocation[longestIndex], newList.get(i));
                }
                for (int i = longestIndex + 1; i < currentLocation.length; i++) {
                    currentLocation[i] -= (timesRetried + 1);
                }
                //likelihoods.remove(longestIndex);
                //likelihoods.add(longestIndex, new CompoundLikelihood(newList));
                compound = new TestThreadedCompoundLikelihood(likelihoods);
                //compound = new CompoundLikelihood(likelihoods);
                //compound = new ThreadedCompoundLikelihood(likelihoods);
                siteCounts[longestIndex] = (instanceCount + timesRetried + 1) * siteCounts[longestIndex] / instanceCount;
                longestSize = (instanceCount + timesRetried + 1) * longestSize / instanceCount;
                if (DEBUG) {
                    System.err.println("Pattern counts: ");
                    for (int i = 0; i < siteCounts.length; i++) {
                        System.err.print(siteCounts[i] + " ");
                    }
                    System.err.println();
                    System.err.println("Instance counts: ");
                    for (int i = 0; i < instanceCounts.length; i++) {
                        System.err.print(instanceCounts[i] + " ");
                    }
                    System.err.println();
                    System.err.println("Current locations: ");
                    for (int i = 0; i < currentLocation.length; i++) {
                        System.err.print(currentLocation[i] + " ");
                    }
                    System.err.println();
                }
                timesRetried = 0;
            }
        }
    }
    for (int i = 0; i < originalLikelihoods.size(); i++) {
        unregisterAllModels((BeagleTreeLikelihood) originalLikelihoods.get(i));
    }
    return compound;
}
Also used : Set(java.util.Set) BeagleTreeLikelihood(dr.evomodel.treelikelihood.BeagleTreeLikelihood) ArrayList(java.util.ArrayList) PartialsRescalingScheme(dr.evomodel.treelikelihood.PartialsRescalingScheme) BranchModel(dr.evomodel.branchmodel.BranchModel) TreeModel(dr.evomodel.tree.TreeModel) Patterns(dr.evolution.alignment.Patterns) SitePatterns(dr.evolution.alignment.SitePatterns) SitePatterns(dr.evolution.alignment.SitePatterns) BeagleTreeLikelihood(dr.evomodel.treelikelihood.BeagleTreeLikelihood) GammaSiteRateModel(dr.evomodel.siteratemodel.GammaSiteRateModel) BranchRateModel(dr.evomodel.branchratemodel.BranchRateModel) Map(java.util.Map)

Example 19 with Patterns

use of dr.evolution.alignment.Patterns in project beast-mcmc by beast-dev.

the class BeastGenerator method checkOptions.

/**
     * Checks various options to check they are valid. Throws IllegalArgumentExceptions with
     * descriptions of the problems.
     *
     * @throws IllegalArgumentException if there is a problem with the current settings
     */
public void checkOptions() throws GeneratorException {
    try {
        if (options.contains(Microsatellite.INSTANCE)) {
            // clear all masks
            for (PartitionPattern partitionPattern : options.getPartitionPattern()) {
                partitionPattern.getPatterns().clearMask();
            }
            // set mask
            for (PartitionTreeModel model : options.getPartitionTreeModels()) {
                // if a tree only has 1 data partition, which mostly mean unlinked trees
                if (options.getDataPartitions(model).size() == 1) {
                    PartitionPattern partition = (PartitionPattern) options.getDataPartitions(model).get(0);
                    Patterns patterns = partition.getPatterns();
                    for (int i = 0; i < patterns.getTaxonCount(); i++) {
                        int state = patterns.getPatternState(i, 0);
                        // mask ? from data
                        if (state < 0) {
                            patterns.addMask(i);
                        }
                    }
                //                        System.out.println("mask set = " + patterns.getMaskSet() + " in partition " + partition.getName());
                }
            }
        }
        //++++++++++++++++ Taxon List ++++++++++++++++++
        TaxonList taxonList = options.taxonList;
        Set<String> ids = new HashSet<String>();
        ids.add(TaxaParser.TAXA);
        ids.add(AlignmentParser.ALIGNMENT);
        ids.add(TraitData.TRAIT_SPECIES);
        if (taxonList != null) {
            if (taxonList.getTaxonCount() < 2) {
                throw new GeneratorException("BEAST requires at least two taxa to run.");
            }
            for (int i = 0; i < taxonList.getTaxonCount(); i++) {
                Taxon taxon = taxonList.getTaxon(i);
                if (ids.contains(taxon.getId())) {
                    throw new GeneratorException("A taxon has the same id," + taxon.getId() + MESSAGE_CAL);
                }
                ids.add(taxon.getId());
            }
        }
        //++++++++++++++++ Taxon Sets ++++++++++++++++++
        for (PartitionTreeModel model : options.getPartitionTreeModels()) {
            // should be only 1 calibrated internal node with a proper prior and monophyletic for each tree at moment
            if (model.getPartitionTreePrior().getNodeHeightPrior() == TreePriorType.YULE_CALIBRATION) {
                if (// invalid node calibration
                options.treeModelOptions.isNodeCalibrated(model) < 0)
                    throw new GeneratorException(MESSAGE_CAL_YULE);
                if (options.treeModelOptions.isNodeCalibrated(model) > 0) {
                    // internal node calibration
                    List taxonSetsList = options.getKeysFromValue(options.taxonSetsTreeModel, model);
                    if (taxonSetsList.size() != 1 || !options.taxonSetsMono.get(taxonSetsList.get(0))) {
                        // 1 tmrca per tree && monophyletic
                        throw new GeneratorException(MESSAGE_CAL_YULE, BeautiFrame.TAXON_SETS);
                    }
                }
            }
        }
        for (Taxa taxa : options.taxonSets) {
            // AR - we should allow single taxon taxon sets...
            if (// && !options.taxonSetsIncludeStem.get(taxa)
            taxa.getTaxonCount() < 1) {
                throw new GeneratorException("Taxon set, " + taxa.getId() + ", should contain \n" + "at least one taxa. Please go back to Taxon Sets \n" + "panel to correct this.", BeautiFrame.TAXON_SETS);
            }
            if (ids.contains(taxa.getId())) {
                throw new GeneratorException("A taxon set has the same id," + taxa.getId() + MESSAGE_CAL, BeautiFrame.TAXON_SETS);
            }
            ids.add(taxa.getId());
        }
        //++++++++++++++++ *BEAST ++++++++++++++++++
        if (options.useStarBEAST) {
            if (!options.traitExists(TraitData.TRAIT_SPECIES))
                throw new GeneratorException("A trait labelled \"species\" is required for *BEAST species designations." + "\nPlease create or import the species designations in the Traits table.", BeautiFrame.TRAITS);
            // should be only 1 calibrated internal node with monophyletic at moment
            if (options.getPartitionTreePriors().get(0).getNodeHeightPrior() == TreePriorType.SPECIES_YULE_CALIBRATION) {
                if (options.speciesSets.size() != 1 || !options.speciesSetsMono.get(options.speciesSets.get(0))) {
                    throw new GeneratorException(MESSAGE_CAL_YULE, BeautiFrame.TAXON_SETS);
                }
            }
            for (Taxa species : options.speciesSets) {
                if (species.getTaxonCount() < 2) {
                    throw new GeneratorException("Species set, " + species.getId() + ",\n should contain" + "at least two species. \nPlease go back to Species Sets panel to select included species.", BeautiFrame.TAXON_SETS);
                }
                if (ids.contains(species.getId())) {
                    throw new GeneratorException("A species set has the same id," + species.getId() + MESSAGE_CAL, BeautiFrame.TAXON_SETS);
                }
                ids.add(species.getId());
            }
            int tId = options.starBEASTOptions.getEmptySpeciesIndex();
            if (tId >= 0) {
                throw new GeneratorException("The taxon " + options.taxonList.getTaxonId(tId) + " has NULL value for \"species\" trait", BeautiFrame.TRAITS);
            }
        }
        //        if (options.isShareSameTreePrior()) {
        if (options.getPartitionTreeModels().size() > 1) {
            //TODO not allowed multi-prior yet
            for (PartitionTreePrior prior : options.getPartitionTreePriors()) {
                if (prior.getNodeHeightPrior() == TreePriorType.GMRF_SKYRIDE) {
                    throw new GeneratorException("For the Skyride, tree model/tree prior combination not implemented by BEAST." + "\nThe Skyride is only available for a single tree model partition in this release.", BeautiFrame.TREES);
                }
            }
        }
        //+++++++++++++++ Starting tree ++++++++++++++++
        for (PartitionTreeModel model : options.getPartitionTreeModels()) {
            if (model.getStartingTreeType() == StartingTreeType.USER) {
                if (model.getUserStartingTree() == null) {
                    throw new GeneratorException("Please select a starting tree in " + BeautiFrame.TREES + " panel, " + "\nwhen choosing user specified starting tree option.", BeautiFrame.TREES);
                }
            }
        }
        //++++++++++++++++ Random local clock model validation ++++++++++++++++++
        for (PartitionClockModel model : options.getPartitionClockModels()) {
            // 1 random local clock CANNOT have different tree models
            if (model.getClockType() == ClockType.RANDOM_LOCAL_CLOCK) {
                // || AUTOCORRELATED_LOGNORMAL
                PartitionTreeModel treeModel = null;
                for (AbstractPartitionData pd : options.getDataPartitions(model)) {
                    // only the PDs linked to this tree model
                    if (treeModel != null && treeModel != pd.getPartitionTreeModel()) {
                        throw new GeneratorException("A single random local clock cannot be applied to multiple trees.", BeautiFrame.CLOCK_MODELS);
                    }
                    treeModel = pd.getPartitionTreeModel();
                }
            }
        }
        //++++++++++++++++ Tree Model ++++++++++++++++++
        for (PartitionTreeModel model : options.getPartitionTreeModels()) {
            int numOfTaxa = -1;
            for (AbstractPartitionData pd : options.getDataPartitions(model)) {
                if (pd.getTaxonCount() > 0) {
                    if (numOfTaxa > 0) {
                        if (numOfTaxa != pd.getTaxonCount()) {
                            throw new GeneratorException("Partitions with different taxa cannot share the same tree.", BeautiFrame.DATA_PARTITIONS);
                        }
                    } else {
                        numOfTaxa = pd.getTaxonCount();
                    }
                }
            }
        }
        //++++++++++++++++ Prior Bounds ++++++++++++++++++
        for (Parameter param : options.selectParameters()) {
            if (param.getInitial() != Double.NaN) {
                if (param.isTruncated && (param.getInitial() < param.truncationLower || param.getInitial() > param.truncationUpper)) {
                    throw new GeneratorException("Parameter \"" + param.getName() + "\":" + "\ninitial value " + param.getInitial() + " is NOT in the range [" + param.truncationLower + ", " + param.truncationUpper + "]," + "\nor this range is wrong. Please check the Prior panel.", BeautiFrame.PRIORS);
                } else if (param.priorType == PriorType.UNIFORM_PRIOR && (param.getInitial() < param.uniformLower || param.getInitial() > param.uniformUpper)) {
                    throw new GeneratorException("Parameter \"" + param.getName() + "\":" + "\ninitial value " + param.getInitial() + " is NOT in the range [" + param.uniformLower + ", " + param.uniformUpper + "]," + "\nor this range is wrong. Please check the Prior panel.", BeautiFrame.PRIORS);
                }
                if (param.isNonNegative && param.getInitial() < 0.0) {
                    throw new GeneratorException("Parameter \"" + param.getName() + "\":" + "\ninitial value " + param.getInitial() + " should be non-negative. Please check the Prior panel.", BeautiFrame.PRIORS);
                }
                if (param.isZeroOne && (param.getInitial() < 0.0 || param.getInitial() > 1.0)) {
                    throw new GeneratorException("Parameter \"" + param.getName() + "\":" + "\ninitial value " + param.getInitial() + " should lie in the interval [0, 1]. Please check the Prior panel.", BeautiFrame.PRIORS);
                }
            }
        }
        checkComponentOptions();
    // add other tests and warnings here
    // Speciation model with dated tips
    // Sampling rates without dated tips or priors on rate or nodes
    } catch (Exception e) {
        // catch any other exceptions here and rethrow to generate messages
        throw new GeneratorException(e.getMessage());
    }
}
Also used : TaxonList(dr.evolution.util.TaxonList) Taxon(dr.evolution.util.Taxon) IOException(java.io.IOException) Taxa(dr.evolution.util.Taxa) TaxonList(dr.evolution.util.TaxonList) Patterns(dr.evolution.alignment.Patterns)

Example 20 with Patterns

use of dr.evolution.alignment.Patterns in project beast-mcmc by beast-dev.

the class MicrosatelliteSimulatorParser method parseXMLObject.

public Object parseXMLObject(XMLObject xo) throws XMLParseException {
    Microsatellite msatDataType = (Microsatellite) xo.getChild(Microsatellite.class);
    Taxa taxa = (Taxa) xo.getChild(Taxa.class);
    Tree tree = (Tree) xo.getChild(Tree.class);
    MicrosatelliteModel msatModel = (MicrosatelliteModel) xo.getChild(MicrosatelliteModel.class);
    BranchRateModel brModel = (BranchRateModel) xo.getChild(BranchRateModel.class);
    if (brModel == null) {
        brModel = new DefaultBranchRateModel();
    }
    MicrosatelliteSimulator msatSim = new MicrosatelliteSimulator(msatDataType, taxa, tree, new GammaSiteModel(msatModel), brModel);
    Patterns patterns = msatSim.simulateMsatPattern();
    String msatPatId = xo.getAttribute("id", "simMsatPat");
    patterns.setId(msatPatId);
    MicrosatellitePatternParser.printDetails(patterns);
    MicrosatellitePatternParser.printMicrosatContent(patterns);
    return patterns;
}
Also used : Microsatellite(dr.evolution.datatype.Microsatellite) Taxa(dr.evolution.util.Taxa) MicrosatelliteModel(dr.oldevomodel.substmodel.MicrosatelliteModel) GammaSiteModel(dr.oldevomodel.sitemodel.GammaSiteModel) BranchRateModel(dr.evomodel.branchratemodel.BranchRateModel) DefaultBranchRateModel(dr.evomodel.branchratemodel.DefaultBranchRateModel) MicrosatelliteSimulator(dr.app.seqgen.MicrosatelliteSimulator) Tree(dr.evolution.tree.Tree) Patterns(dr.evolution.alignment.Patterns) DefaultBranchRateModel(dr.evomodel.branchratemodel.DefaultBranchRateModel)

Aggregations

Patterns (dr.evolution.alignment.Patterns)21 Microsatellite (dr.evolution.datatype.Microsatellite)9 Taxa (dr.evolution.util.Taxa)9 Taxon (dr.evolution.util.Taxon)9 TreeModel (dr.evomodel.tree.TreeModel)9 ArrayList (java.util.ArrayList)9 Tree (dr.evolution.tree.Tree)7 BranchRateModel (dr.evomodel.branchratemodel.BranchRateModel)6 PatternList (dr.evolution.alignment.PatternList)4 NewickImporter (dr.evolution.io.NewickImporter)4 BranchModel (dr.evomodel.branchmodel.BranchModel)4 GammaSiteRateModel (dr.evomodel.siteratemodel.GammaSiteRateModel)4 PartialsRescalingScheme (dr.evomodel.treelikelihood.PartialsRescalingScheme)4 Parameter (dr.inference.model.Parameter)4 GammaSiteModel (dr.oldevomodel.sitemodel.GammaSiteModel)4 AsymmetricQuadraticModel (dr.oldevomodel.substmodel.AsymmetricQuadraticModel)4 SitePatterns (dr.evolution.alignment.SitePatterns)3 TaxonList (dr.evolution.util.TaxonList)3 HomogeneousBranchModel (dr.evomodel.branchmodel.HomogeneousBranchModel)3 FrequencyModel (dr.evomodel.substmodel.FrequencyModel)3