Search in sources :

Example 6 with Sequence

use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.

the class FastaImporter method importAlignment.

/**
     * importAlignment.
     */
public Alignment importAlignment() throws IOException, ImportException {
    SimpleAlignment alignment = null;
    try {
        // find fasta line start
        while (read() != FASTA_FIRST_CHAR) {
        }
        do {
            final String name = readLine().trim();
            StringBuffer seq = new StringBuffer();
            readSequence(seq, dataType, "" + FASTA_FIRST_CHAR, Integer.MAX_VALUE, "-", "?", "", "");
            if (alignment == null) {
                alignment = new SimpleAlignment();
            }
            alignment.addSequence(new Sequence(new Taxon(name.toString()), seq.toString()));
        } while (getLastDelimiter() == FASTA_FIRST_CHAR);
    } catch (EOFException e) {
    // catch end of file the ugly way.
    }
    return alignment;
}
Also used : SimpleAlignment(dr.evolution.alignment.SimpleAlignment) Taxon(dr.evolution.util.Taxon) EOFException(java.io.EOFException) Sequence(dr.evolution.sequence.Sequence)

Example 7 with Sequence

use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.

the class Defects method main.

public static void main(String[] args) throws java.io.IOException {
    String alignmentFileName = null;
    if (args != null && args.length > 0) {
        alignmentFileName = args[0];
    }
    Alignment alignment = readNexusFile(alignmentFileName);
    Defects defects = new Defects(alignment);
    int siteCount = alignment.getSiteCount() / 3;
    int sequenceCount = alignment.getSequenceCount();
    int totalSites = siteCount * sequenceCount;
    int totalReads = defects.getTotalCodonsMinusGaps();
    int defectiveSites = defects.getDefectiveSiteCount();
    int defectiveSequences = defects.getDefectiveSequenceCount();
    int totalDefects = defects.getDefectCount();
    int totalStops = defects.getStopCodonCount();
    double siteP = (double) defectiveSites / (double) siteCount;
    double sequenceP = (double) defectiveSequences / (double) sequenceCount;
    double totalP = (double) totalDefects / (double) totalReads;
    double totalSP = (double) totalStops / (double) totalReads;
    System.out.println("Matrix size=" + totalSites);
    System.out.println("Non-gap codons=" + totalReads);
    System.out.println(defectiveSequences + "/" + sequenceCount + "(" + sequenceP + ") defective sequences.");
    System.out.println(defectiveSites + "/" + siteCount + "(" + siteP + ") defective sites.");
    System.out.println(totalDefects + "/" + totalReads + "(" + totalP + ") defects.");
    System.out.println(totalStops + "/" + totalReads + "(" + totalSP + ") stop codons.");
    double mean = (double) totalDefects / (double) sequenceCount;
    System.out.println(mean + " defects per sequence");
    double meanS = (double) totalStops / (double) sequenceCount;
    System.out.println(meanS + " stops per sequence");
    int maxDefects = defects.getMaxDefectiveSites();
    int maxStops = defects.getMaxStopSites();
    System.out.println("defective sequences:");
    //}
    for (int d = 1; d <= maxDefects; d++) {
        Set<Integer> seqs = defects.getSequences(d);
        for (Integer seq : seqs) {
            String name = alignment.getTaxonId(seq);
            System.out.println(d + "  " + name);
        }
    }
    System.out.println("Defects\tSequences\texpected");
    double probTerm;
    // probability of 0
    probTerm = Math.exp(-mean);
    for (int i = 0; i < 10; i++) {
        System.out.println(i + "\t" + defects.getSequenceCount(i) + "\t" + probTerm * sequenceCount);
        // compute probability of n from prob. of n-1
        probTerm *= mean / (i + 1);
    }
    System.out.println("Stops\tSequences\texpected");
    // probability of 0
    probTerm = Math.exp(-meanS);
    for (int i = 0; i < 10; i++) {
        System.out.println(i + "\t" + defects.getStopSequenceCount(i) + "\t" + probTerm * sequenceCount);
        // compute probability of n from prob. of n-1
        probTerm *= meanS / (i + 1);
    }
    System.out.println("stop-codon sequences:");
    for (int d = 1; d <= maxStops; d++) {
        Set<Integer> seqs = defects.getSequencesByStopCount(d);
        for (Integer index : seqs) {
            String name = alignment.getTaxonId(index);
            System.out.println(d + "  " + name);
        }
    }
    Consensus con = new Consensus("mode", alignment, true);
    Sequence consensus = con.getConsensusSequence();
    SimpleAlignment a = new SimpleAlignment();
    a.addSequence(new Sequence(consensus));
    ConvertAlignment ca = new ConvertAlignment(Codons.UNIVERSAL, a);
    double[] pstop = new double[ca.getSiteCount()];
    int[] counts = new int[10];
    for (double kappa = 2.0; kappa <= 2.0; kappa += 1.0) {
        for (int i = 0; i < ca.getSiteCount(); i++) {
            int state = ca.getState(0, i);
            if (Codons.UNIVERSAL.isStopCodon(state)) {
                throw new RuntimeException("Consensus has a stop codon in it at position " + i + "!");
            }
            int[] triplet = Codons.UNIVERSAL.getTripletStates(state);
            int[][] tvmutants = generateTransversionMutants(triplet);
            int[][] timutants = generateTransitionMutants(triplet);
            int tvStops = 0;
            for (int j = 0; j < 6; j++) {
                if (Codons.UNIVERSAL.isStopCodon(Codons.UNIVERSAL.getState(tvmutants[j][0], tvmutants[j][1], tvmutants[j][2]))) {
                    tvStops += 1;
                }
            }
            int tiStops = 0;
            for (int j = 0; j < 3; j++) {
                if (Codons.UNIVERSAL.isStopCodon(Codons.UNIVERSAL.getState(timutants[j][0], timutants[j][1], timutants[j][2]))) {
                    tiStops += 1;
                }
            }
            pstop[i] = (tvStops + kappa * tiStops) / (6.0 + kappa * 3.0);
            counts[tvStops + tiStops] += 1;
        }
        System.out.println("kappa = " + kappa + " pstop=" + dr.stats.DiscreteStatistics.mean(pstop));
    }
    System.out.println("stop-mutations\tcodons");
    for (int i = 0; i < 10; i++) {
        System.out.println(i + "\t" + counts[i]);
    }
    /*int count = 0;
        int reps = 100000;
        for (int i = 0; i < reps; i++) {
            SimpleAlignment simpleAlignment = new SimpleAlignment();
            simpleAlignment.addSequence(new Sequence(consensus));

            //pick a random position
            int pos = random.nextInt(simpleAlignment.getSiteCount());
            int state = simpleAlignment.getState(0,pos);
            int newState = random.nextInt(4);
            while (newState == state) {
                newState = random.nextInt(4);
            }
            simpleAlignment.setState(0,pos,newState);
            defects = new Defects(simpleAlignment);
            count += defects.getDefectCount();
        }

        double p = (double)count/(double)reps;
        */
    double p = dr.stats.DiscreteStatistics.mean(pstop);
    double rate = totalSP * (1.0 - p) / p;
    System.out.println("Total inferred point-mutation rate = " + rate);
    System.out.println("Total inferred point-mutations = " + (totalStops * (1 - p) / p));
    System.out.println("Proportion of point-mutations that produce premature stop codons = " + p);
}
Also used : Sequence(dr.evolution.sequence.Sequence)

Example 8 with Sequence

use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.

the class Consensus method getConsensusSequence.

public final Sequence getConsensusSequence() {
    StringBuffer buffer = new StringBuffer();
    for (int i = 0; i < consensus.length; i++) {
        buffer.append(dataType.getChar(getState(i)));
    }
    Sequence sequence = new Sequence(new Taxon(name), buffer.toString());
    sequence.setDataType(dataType);
    return sequence;
}
Also used : Taxon(dr.evolution.util.Taxon) Sequence(dr.evolution.sequence.Sequence)

Example 9 with Sequence

use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.

the class SelectionMapping method main.

public static void main(String[] args) throws java.io.IOException {
    if (args.length != 1)
        throw new RuntimeException("Expect a file name containing parameters!");
    FileInputStream fi = new FileInputStream(args[0]);
    Properties props = new Properties();
    props.load(fi);
    fi.close();
    int genomeLength = Integer.parseInt(props.getProperty("genomeLength", "1000"));
    int populationSize = Integer.parseInt(props.getProperty("populationSize", "250"));
    int replicates = Integer.parseInt(props.getProperty("replicates", "1000"));
    double mu = Double.parseDouble(props.getProperty("mu", "1e-4"));
    double alpha = Double.parseDouble(props.getProperty("alpha", "0.06"));
    double pInv = Double.parseDouble(props.getProperty("pinv", "0.5"));
    double beta = Double.parseDouble(props.getProperty("beta", "1"));
    int stateSize = Integer.parseInt(props.getProperty("stateSize", "4"));
    int n = Integer.parseInt(props.getProperty("sampleSize", "40"));
    boolean randomFittest = Boolean.valueOf(props.getProperty("randomizeFittest", "false"));
    boolean outputAlignments = Boolean.valueOf(props.getProperty("outputAlignments", "false"));
    String alignmentFileName = props.getProperty("alignment.filename", "alignment.fasta");
    String fileName = props.getProperty("output.filename", "out.txt");
    int burninFactor = Integer.parseInt(props.getProperty("burninFactor", "10"));
    PrintWriter writer = new PrintWriter(new FileWriter(fileName));
    writer.println("// WMD v1.0");
    writer.println("// genomeLength: " + genomeLength);
    writer.println("// populationSize: " + populationSize);
    writer.println("// alpha: " + alpha);
    writer.println("// beta: " + beta);
    writer.println("// replicates: " + replicates);
    writer.println("// output.filename: " + fileName);
    writer.println("// mu: " + mu);
    writer.println("// outputAlignments: " + outputAlignments);
    writer.println("// randomFittest: " + randomFittest);
    writer.println("// alignment.filename: " + alignmentFileName);
    int[] totalMutationalDensity = new int[200];
    int[] totalLineages = new int[200];
    int[] nMutants = new int[genomeLength * 3];
    double[] mrcaFitness = new double[1];
    final int generations = populationSize;
    ArrayList[] unfoldedSites = new ArrayList[populationSize + 1];
    for (int i = 0; i < populationSize + 1; i++) {
        unfoldedSites[i] = new ArrayList();
    }
    long start = System.currentTimeMillis();
    int dnaSingles = 0;
    int aaSingles = 0;
    int dnaSegs = 0;
    int aaSegs = 0;
    for (int reps = 0; reps < replicates; reps++) {
        //			FitnessFunction f = null;
        //			if (alpha == 0.0) {
        //				f = new NeutralModel();
        //			}
        FitnessFunction f = new CodonFitnessFunction(genomeLength, alpha, beta, pInv);
        Population p = new Population(populationSize, genomeLength * 3, new SimpleMutator(mu, stateSize), f, randomFittest);
        Genome master = new SimpleGenome(genomeLength * 3, f, randomFittest);
        p = Population.forwardSimulation(p, burninFactor * populationSize);
        int age = -1;
        while (age == -1) {
            p = Population.forwardSimulation(p, generations);
            age = p.getAgeOfMRCA(mrcaFitness);
        }
        Population children = Population.forwardSimulation(p, 1);
        writer.println(reps + "\t" + age + "\t" + p.getMeanParentFitness() + "\t" + mrcaFitness[0] + "\t" + p.getProportionAsFit(mrcaFitness[0]));
        if (reps % 10 == 0) {
            System.out.print(reps + "\t" + age + "\t" + p.getMeanParentFitness() + "\t" + mrcaFitness[0] + "\t" + p.getProportionAsFit(mrcaFitness[0]));
            long millis = System.currentTimeMillis() - start;
            double seconds = millis / 1000.0;
            if (reps != 0) {
                double seconds2go = (replicates - reps) * seconds / reps;
                System.out.println(" -- " + (Math.round(seconds2go / 36) / 100.0) + " hours");
            } else {
                System.out.println();
            }
        }
        //p.unfoldedSiteFrequencies(unfoldedSites);
        // ************************************************************************************
        // get the mutational density per lineage as a function of time
        // ************************************************************************************
        List<Integer>[] mutations = new ArrayList[200];
        for (int i = 0; i < mutations.length; i++) {
            mutations[i] = new ArrayList<Integer>(200);
        }
        int sampleSize = Math.min(n, populationSize);
        p.getMutationDensity(sampleSize, mutations);
        for (int i = 0; i < mutations.length; i++) {
            for (int j = 0; j < mutations[i].size(); j++) {
                totalMutationalDensity[i] += mutations[i].get(j);
            }
            totalLineages[i] += mutations[i].size();
        }
        // ************************************************************************************
        // output alignments if requested
        // ************************************************************************************
        SimpleAlignment alignment = new SimpleAlignment();
        SimpleAlignment aaAlignment = new SimpleAlignment();
        alignment.setDataType(Nucleotides.INSTANCE);
        aaAlignment.setDataType(AminoAcids.INSTANCE);
        if (outputAlignments) {
            PrintWriter pw = new PrintWriter(new FileWriter(alignmentFileName));
            //pw.println(sampleSize + "\t" + p.getGenomeLength());
            for (int i = 0; i < sampleSize; i++) {
                pw.print(">sequence_");
                if (i < 10) {
                    pw.print("0");
                }
                pw.println("" + i);
                String dnaSequence = p.getGenome(i).getDNASequenceString();
                String aaSequence = p.getGenome(i).getAminoAcidSequenceString();
                pw.println(dnaSequence);
                alignment.addSequence(new Sequence(dnaSequence));
                aaAlignment.addSequence(new Sequence(aaSequence));
            }
            pw.println();
            pw.close();
        }
        // ************************************************************************************
        for (int i = 0; i < sampleSize; i++) {
            nMutants[master.hammingDistance(p.getGenome(i))] += 1;
        }
        dnaSingles += countSingletons(alignment);
        aaSingles += countSingletons(aaAlignment);
        dnaSegs += countSegregating(alignment);
        aaSegs += countSegregating(aaAlignment);
    }
    for (int i = 0; i < unfoldedSites.length; i++) {
        double meanFitness = 0.0;
        double proportionNeutral = 0.0;
        double proportionPositive = 0.0;
        double proportionNegative = 0.0;
        for (int j = 0; j < unfoldedSites[i].size(); j++) {
            double relativeFitness = (Double) unfoldedSites[i].get(j);
            meanFitness += relativeFitness;
            if (relativeFitness == 1.0) {
                proportionNeutral += 1.0;
            } else if (relativeFitness > 1.0) {
                proportionPositive += 1.0;
            } else {
                proportionNegative += 1.0;
            }
        }
        meanFitness /= (double) unfoldedSites[i].size();
        proportionNeutral /= (double) unfoldedSites[i].size();
        proportionPositive /= (double) unfoldedSites[i].size();
        proportionNegative /= (double) unfoldedSites[i].size();
        writer.println(i + "\t" + unfoldedSites[i].size() + "\t" + meanFitness + "\t" + proportionNeutral + "\t" + proportionPositive + "\t" + proportionNegative);
    }
    writer.println("--------------------");
    writer.println("SINGLETON COUNTS");
    writer.println("--------------------");
    writer.println("dna singletons = " + dnaSingles);
    writer.println("aa singletons = " + aaSingles);
    int dnaNons = dnaSegs - dnaSingles;
    int aaNons = aaSegs - aaSingles;
    System.out.println("dna singletons = " + dnaSingles);
    System.out.println("aa singletons = " + aaSingles);
    System.out.println("dna segregating = " + dnaSegs);
    System.out.println("aa segregating = " + aaSegs);
    System.out.println("dna non-singles = " + dnaNons);
    System.out.println("aa non-singles = " + aaNons);
    System.out.println("ratio = " + ((double) dnaSingles / (double) aaSingles));
    System.out.println("ratio(non) = " + ((double) dnaNons / (double) aaNons));
    writer.println("--------------------");
    writer.println("MUTATIONAL DENSITIES");
    writer.println("--------------------");
    for (int i = 0; i < 200; i++) {
        writer.println(totalMutationalDensity[i] + "\t" + totalLineages[i]);
    }
    // ************************************************************************************
    // output hamming distances
    // ************************************************************************************
    writer.println("--------------------");
    writer.println("Hamming distance distribution");
    writer.println("--------------------");
    for (int i = 0; i < nMutants.length; i++) {
        writer.println(i + "\t" + nMutants[i]);
    }
    writer.close();
}
Also used : FileWriter(java.io.FileWriter) ArrayList(java.util.ArrayList) Properties(java.util.Properties) SimpleAlignment(dr.evolution.alignment.SimpleAlignment) List(java.util.List) ArrayList(java.util.ArrayList) PrintWriter(java.io.PrintWriter) Sequence(dr.evolution.sequence.Sequence) FileInputStream(java.io.FileInputStream)

Example 10 with Sequence

use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.

the class NexusImporter method readSequenceData.

/**
     * Reads sequences in a 'DATA' or 'CHARACTERS' block.
     */
private void readSequenceData(Sequences sequences, TaxonList taxonList) throws ImportException, IOException {
    int n, i;
    String firstSequence = null;
    if (isInterleaved) {
        boolean firstLoop = true;
        int readCount = 0;
        while (readCount < siteCount) {
            n = -1;
            for (i = 0; i < taxonCount; i++) {
                String token = readToken().trim();
                Sequence sequence;
                if (firstLoop) {
                    sequence = new Sequence();
                    sequence.setDataType(dataType);
                    sequences.addSequence(sequence);
                    Taxon taxon;
                    if (taxonList != null) {
                        int index = taxonList.getTaxonIndex(token.trim());
                        if (index == -1) {
                            // ...perhaps it is a numerical taxon reference?
                            throw new UnknownTaxonException(token);
                        } else {
                            taxon = taxonList.getTaxon(index);
                        }
                    } else {
                        taxon = new Taxon(token.trim());
                    }
                    sequence.setTaxon(taxon);
                } else {
                    sequence = sequences.getSequence(i);
                    Taxon taxon = sequence.getTaxon();
                    if (!taxon.getId().equals(token)) {
                        throw new UnknownTaxonException("Unknown taxon label: expecting '" + taxon.getId() + "', found '" + token + "'");
                    }
                }
                StringBuffer buffer = new StringBuffer();
                readSequenceLine(buffer, dataType, ";", gapCharacters, missingCharacters, matchCharacters, firstSequence);
                String seqString = buffer.toString();
                sequence.appendSequenceString(seqString);
                if (i == 0) {
                    firstSequence = seqString;
                }
                if (getLastDelimiter() == ';') {
                    if (i < taxonCount - 1) {
                        throw new TooFewTaxaException();
                    }
                    if (readCount + n < siteCount) {
                        throw new ShortSequenceException(sequence.getTaxon().getId());
                    }
                }
                if (n == -1) {
                    n = seqString.length();
                }
                if (n != seqString.length()) {
                    throw new ShortSequenceException(sequence.getTaxon().getId());
                }
            }
            firstLoop = false;
            readCount += n;
        }
        if (getLastDelimiter() != ';') {
            throw new BadFormatException("Expecting ';' after sequences data");
        }
    } else {
        for (i = 0; i < taxonCount; i++) {
            String token = readToken().trim();
            Sequence sequence = new Sequence();
            sequence.setDataType(dataType);
            sequences.addSequence(sequence);
            Taxon taxon;
            if (taxonList != null) {
                int index = taxonList.getTaxonIndex(token);
                if (index == -1) {
                    // ...perhaps it is a numerical taxon reference?
                    throw new UnknownTaxonException(token);
                } else {
                    taxon = taxonList.getTaxon(index);
                }
            } else {
                taxon = new Taxon(token);
            }
            sequence.setTaxon(taxon);
            StringBuffer buffer = new StringBuffer();
            readSequence(buffer, dataType, ";", siteCount, gapCharacters, missingCharacters, matchCharacters, firstSequence);
            String seqString = buffer.toString();
            if (seqString.length() != siteCount) {
                throw new ShortSequenceException(sequence.getTaxon().getId());
            }
            sequence.appendSequenceString(seqString);
            if (i == 0) {
                firstSequence = seqString;
            }
            if (getLastDelimiter() == ';' && i < taxonCount - 1) {
                throw new TooFewTaxaException();
            }
        }
        if (getLastDelimiter() != ';') {
            throw new BadFormatException("Expecting ';' after sequences data, has '" + (char) getLastDelimiter() + "' in line " + getLineNumber());
        }
    }
}
Also used : Sequence(dr.evolution.sequence.Sequence)

Aggregations

Sequence (dr.evolution.sequence.Sequence)31 SimpleAlignment (dr.evolution.alignment.SimpleAlignment)15 Taxon (dr.evolution.util.Taxon)12 DataType (dr.evolution.datatype.DataType)6 BranchRateModel (dr.evomodel.branchratemodel.BranchRateModel)6 GammaSiteRateModel (dr.evomodel.siteratemodel.GammaSiteRateModel)5 TreeModel (dr.evomodel.tree.TreeModel)5 Parameter (dr.inference.model.Parameter)5 Tree (dr.evolution.tree.Tree)4 Date (dr.evolution.util.Date)4 HomogeneousBranchModel (dr.evomodel.branchmodel.HomogeneousBranchModel)4 DefaultBranchRateModel (dr.evomodel.branchratemodel.DefaultBranchRateModel)4 FrequencyModel (dr.evomodel.substmodel.FrequencyModel)4 Partition (dr.app.beagle.tools.Partition)3 NewickImporter (dr.evolution.io.NewickImporter)3 HKY (dr.evomodel.substmodel.nucleotide.HKY)3 ArrayList (java.util.ArrayList)3 BeagleSequenceSimulator (dr.app.beagle.tools.BeagleSequenceSimulator)2 ImportException (dr.evolution.io.Importer.ImportException)2 Taxa (dr.evolution.util.Taxa)2