Search in sources :

Example 51 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.

the class VDJCAlignerAbstract method init.

@Override
protected void init() {
    DAlignerParameters dAlignerParameters = parameters.getDAlignerParameters();
    List<VDJCGene> dGenes = genesToAlign.get(GeneType.Diversity);
    if (dAlignerParameters != null && dGenes.size() != 0)
        singleDAligner = new SingleDAligner(dAlignerParameters, genesToAlign.get(GeneType.Diversity));
    vAligner = createKAligner(GeneType.Variable);
    jAligner = createKAligner(GeneType.Joining);
    cAligner = createKAligner(GeneType.Constant);
    Chains chains = new Chains();
    for (VDJCGene gene : getUsedGenes()) chains = chains.merge(gene.getChains());
    filters = new EnumMap<>(GeneType.class);
    for (GeneType geneType : GeneType.VJC_REFERENCE) {
        HashMap<String, BitArray> f = new HashMap<>();
        for (final String chain : chains) {
            BatchAlignerWithBaseWithFilter<NucleotideSequence, VDJCGene, AlignmentHit<NucleotideSequence, VDJCGene>> aligner = getAligner(geneType);
            if (aligner != null)
                f.put(chain, aligner.createFilter(new Filter<VDJCGene>() {

                    @Override
                    public boolean accept(VDJCGene object) {
                        return object.getChains().contains(chain);
                    }
                }));
        }
        filters.put(geneType, f);
    }
}
Also used : HashMap(java.util.HashMap) Chains(io.repseq.core.Chains) AlignmentHit(com.milaboratory.core.alignment.batch.AlignmentHit) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCGene(io.repseq.core.VDJCGene) GeneType(io.repseq.core.GeneType) BitArray(com.milaboratory.util.BitArray)

Example 52 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project repseqio by repseqio.

the class CompileAction method compile.

public static void compile(Path source, Path destination, int surroundings) throws IOException {
    VDJCLibraryRegistry.resetDefaultRegistry();
    VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
    reg.registerLibraries(source, "lib");
    List<VDJCLibraryData> result = new ArrayList<>();
    for (VDJCLibrary lib : reg.getLoadedLibraries()) {
        VDJCDataUtils.FragmentsBuilder fragmentsBuilder = new VDJCDataUtils.FragmentsBuilder();
        for (KnownSequenceFragmentData fragment : lib.getData().getSequenceFragments()) fragmentsBuilder.addRegion(fragment);
        for (VDJCGene gene : lib.getGenes()) {
            if (!gene.getData().getBaseSequence().isPureOriginalSequence())
                throw new IllegalArgumentException("Don't support mutated sequences yet.");
            URI uri = gene.getData().getBaseSequence().getOrigin();
            Range region = gene.getPartitioning().getContainingRegion();
            region = region.expand(surroundings);
            NucleotideSequence seq;
            try {
                seq = gene.getSequenceProvider().getRegion(region);
            } catch (SequenceProviderIndexOutOfBoundsException e) {
                region = e.getAvailableRange();
                if (region == null)
                    throw new IllegalArgumentException("Wrong anchor points for " + gene.getName() + " ?");
                seq = gene.getSequenceProvider().getRegion(region);
            }
            fragmentsBuilder.addRegion(uri, region, seq);
        }
        result.add(new VDJCLibraryData(lib.getTaxonId(), lib.getData().getSpeciesNames(), lib.getData().getGenes(), lib.getData().getMeta(), fragmentsBuilder.getFragments()));
    }
    VDJCDataUtils.writeToFile(result, destination, true);
    log.info("{} compiled successfully.", source);
}
Also used : VDJCDataUtils(io.repseq.dto.VDJCDataUtils) ArrayList(java.util.ArrayList) Range(com.milaboratory.core.Range) URI(java.net.URI) KnownSequenceFragmentData(io.repseq.dto.KnownSequenceFragmentData) SequenceProviderIndexOutOfBoundsException(com.milaboratory.core.sequence.provider.SequenceProviderIndexOutOfBoundsException) VDJCLibraryData(io.repseq.dto.VDJCLibraryData) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCLibrary(io.repseq.core.VDJCLibrary) VDJCGene(io.repseq.core.VDJCGene) VDJCLibraryRegistry(io.repseq.core.VDJCLibraryRegistry)

Example 53 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project repseqio by repseqio.

the class ExportCloneSequencesAction method go.

@Override
public void go(ActionHelper helper) throws Exception {
    Chains chains = params.getChains();
    GeneFeature geneFeature = params.getGeneFeature();
    RandomGenerator random = new Well19937c(1232434);
    try (GRepertoireReader input = new GRepertoireReader(createBufferedReader(params.getInput()));
        FastaWriter<NucleotideSequence> output = createSingleFastaWriter(params.getOutput())) {
        List<DescriptionExtractor> extractors = params.getExtractors(input.getLibrary());
        long i = 0;
        for (GClone clone : CUtils.it(input)) {
            int f = params.factor == null ? 1 : randomizedRound(clone.abundance * params.factor, random);
            for (int j = 0; j < f; j++) for (Map.Entry<String, GGene> e : clone.genes.entrySet()) if (chains.contains(e.getKey())) {
                StringBuilder descriptionLine = new StringBuilder("GClone");
                for (DescriptionExtractor extractor : extractors) descriptionLine.append("|").append(extractor.extract(clone, e.getValue(), e.getKey()));
                output.write(new FastaRecord<>(i++, descriptionLine.toString(), e.getValue().getFeature(geneFeature)));
            }
        }
    }
}
Also used : GeneFeature(io.repseq.core.GeneFeature) Chains(io.repseq.core.Chains) Well19937c(org.apache.commons.math3.random.Well19937c) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence)

Example 54 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project repseqio by repseqio.

the class FromFastaActionAbstract method go.

@Override
public void go(ActionHelper helper) throws Exception {
    Pattern functionalityRegexp = params.getFunctionalityRegexp();
    GeneType geneType = params.getGeneType();
    Map<String, VDJCGeneData> genes = new HashMap<>();
    Path libraryPath = Paths.get(params.getOutputJSON()).toAbsolutePath();
    // Parsing -P or --gene-feature parameters
    Map<ReferencePoint, Integer> points = new HashMap<>();
    if (params.geneFeature != null) {
        GeneFeature gf = params.getGeneFeature();
        points.put(gf.getFirstPoint(), 0);
        points.put(gf.getLastPoint(), -1);
    } else
        for (Map.Entry<String, String> p : params.points.entrySet()) {
            ReferencePoint anchorPoint = ReferencePoint.getPointByName(p.getKey());
            if (anchorPoint == null)
                throw new IllegalArgumentException("Unknown anchor point: " + p.getKey());
            int position = Integer.decode(p.getValue());
            points.put(anchorPoint, position);
        }
    // Check
    for (Map.Entry<ReferencePoint, Integer> entry : points.entrySet()) if (entry.getKey().getGeneType() != null && entry.getKey().getGeneType() != geneType)
        throw new IllegalArgumentException("Incompatible anchor point and gene type: " + entry.getKey() + " / " + geneType);
    try (FastaReader reader = new FastaReader<>(params.getInput(), null);
        SequenceStorage storage = params.doEmbedSequences() ? new EmbeddedWriter() : params.getOutputFasta() == null ? new ExistingFileWriter(libraryPath, Paths.get(params.getInput()).toAbsolutePath()) : new FastaSequenceStorage(libraryPath, Paths.get(params.getOutputFasta()).toAbsolutePath())) {
        for (FastaReader.RawFastaRecord record : CUtils.it((OutputPortCloseable<FastaReader.RawFastaRecord>) reader.asRawRecordsPort())) {
            StringWithMapping swm = StringWithMapping.removeSymbol(record.sequence, params.getPaddingCharacter());
            NucleotideSequence seq = new NucleotideSequence(swm.getModifiedString());
            if (seq.containsWildcards()) {
                System.out.println("Sequence dropped because contain wildcards: " + record.description);
                continue;
            }
            String[] fields = record.description.split("\\|");
            String geneName = fields[params.nameIndex];
            boolean functionality = true;
            if (params.functionalityIndex != null)
                functionality = functionalityRegexp.matcher(fields[params.functionalityIndex]).matches();
            SortedMap<ReferencePoint, Long> anchorPoints = new TreeMap<>();
            for (Map.Entry<String, String> p : params.patterns.entrySet()) {
                ReferencePoint anchorPoint = ReferencePoint.getPointByName(p.getKey());
                if (anchorPoint == null)
                    throw new IllegalArgumentException("Unknown anchor point: " + p.getKey());
                if (anchorPoint.getGeneType() != null && anchorPoint.getGeneType() != geneType)
                    throw new IllegalArgumentException("Incompatible anchor point and gene type: " + anchorPoint + " / " + geneType);
                Pattern pattern = Pattern.compile(p.getValue());
                int position = -1;
                for (boolean stops : new boolean[] { false, true }) for (int f = 0; f < 3; f++) {
                    if (position != -1)
                        continue;
                    TranslationParameters tp = TranslationParameters.withoutIncompleteCodon(f);
                    AminoAcidSequence aa = AminoAcidSequence.translate(seq, tp);
                    if (!stops && aa.containStops())
                        continue;
                    String str = aa.toString();
                    Matcher matcher = pattern.matcher(str);
                    if (matcher.find()) {
                        int aaPosition = matcher.start(1);
                        position = AminoAcidSequence.convertAAPositionToNt(aaPosition, seq.size(), tp);
                    }
                }
                if (position == -1)
                    continue;
                anchorPoints.put(anchorPoint, (long) position);
            }
            for (Map.Entry<ReferencePoint, Integer> p : points.entrySet()) {
                // AA patterns have priority over positional anchor points
                if (anchorPoints.containsKey(p.getKey()))
                    continue;
                // Converting position using
                int position = swm.convertPosition(p.getValue());
                // Can't be converted (e.g. position of padding symbol) skipping
                if (position == -1)
                    continue;
                anchorPoints.put(p.getKey(), (long) position);
            }
            if (genes.containsKey(geneName)) {
                if (params.getIgnoreDuplicates()) {
                    System.out.println("Ignored: Duplicate records for " + geneName);
                    continue;
                } else
                    throw new IllegalArgumentException("Duplicate records for " + geneName);
            }
            BaseSequence baseSequence = storage.storeSequence(seq, geneName, record.description);
            VDJCGeneData gene = new VDJCGeneData(baseSequence, geneName, geneType, functionality, new Chains(params.chain), new TreeMap<String, SortedSet<String>>(), anchorPoints).addMetaValue(KnownVDJCGeneMetaFields.COMMENTS, record.description);
            genes.put(geneName, gene);
        }
        VDJCLibraryData library = new VDJCLibraryData(params.taxonId, params.speciesNames, new ArrayList<>(genes.values()), new TreeMap<String, SortedSet<String>>(), storage.getBase()).addMetaValue(KnownVDJCLibraryMetaFields.COMMENTS, "Imported from: " + params.getInput());
        VDJCDataUtils.writeToFile(new VDJCLibraryData[] { library }, params.getOutputJSON(), false);
    }
}
Also used : FastaReader(com.milaboratory.core.io.sequence.fasta.FastaReader) Matcher(java.util.regex.Matcher) AminoAcidSequence(com.milaboratory.core.sequence.AminoAcidSequence) Path(java.nio.file.Path) Pattern(java.util.regex.Pattern) StringWithMapping(io.repseq.util.StringWithMapping) TranslationParameters(com.milaboratory.core.sequence.TranslationParameters) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence)

Example 55 with NucleotideSequence

use of com.milaboratory.core.sequence.NucleotideSequence in project repseqio by repseqio.

the class BasicGGeneModel method create.

@Override
public GGeneGenerator create(RandomGenerator random, VDJCLibrary library) {
    final VDJCGenesGenerator vdjcGenesGenerator = vdjcGenesModel.create(random, library);
    List<VDJCGene> vGenes = vdjcGenesGenerator.genes(GeneType.Variable);
    List<VDJCGene> dGenes = vdjcGenesGenerator.genes(GeneType.Diversity);
    List<VDJCGene> jGenes = vdjcGenesGenerator.genes(GeneType.Joining);
    List<VDJCGene> cGenes = vdjcGenesGenerator.genes(GeneType.Constant);
    final VDJTrimmingGenerator trimmingGenerator = trimmingModel.create(random, vGenes, dGenes, jGenes, cGenes);
    final InsertGenerator vInsertGenerator = vInsertModel.create(random, true, vGenes, dGenes, jGenes, cGenes);
    final InsertGenerator djInsertGenerator = dGenes.isEmpty() ? null : djInsertModel.create(random, false, vGenes, dGenes, jGenes, cGenes);
    return new GGeneGenerator() {

        @Override
        public GGene generate() {
            VDJCGenes vdjcGenes = vdjcGenesGenerator.sample();
            VDJTrimming trimming = trimmingGenerator.sample(vdjcGenes);
            assert !vdjcGenes.isDDefined() || (vdjcGenes.isDDefined() && djInsertGenerator != null);
            NucleotideSequence vInsert;
            NucleotideSequence djInsert;
            if (vdjcGenes.isDDefined()) {
                assert djInsertGenerator != null;
                GGene tempGene = new GGene(null, vdjcGenes, trimming, NucleotideSequence.EMPTY, NucleotideSequence.EMPTY);
                vInsert = vInsertGenerator.generate(tempGene);
                djInsert = djInsertGenerator.generate(tempGene);
            } else {
                GGene tempGene = new GGene(null, vdjcGenes, trimming, NucleotideSequence.EMPTY, null);
                vInsert = vInsertGenerator.generate(tempGene);
                djInsert = null;
            }
            return new GGene(null, vdjcGenes, trimming, vInsert, djInsert);
        }
    };
}
Also used : VDJCGenes(io.repseq.gen.VDJCGenes) VDJTrimming(io.repseq.gen.VDJTrimming) GGene(io.repseq.gen.GGene) NucleotideSequence(com.milaboratory.core.sequence.NucleotideSequence) VDJCGene(io.repseq.core.VDJCGene)

Aggregations

NucleotideSequence (com.milaboratory.core.sequence.NucleotideSequence)81 Test (org.junit.Test)32 Range (com.milaboratory.core.Range)19 VDJCGene (io.repseq.core.VDJCGene)15 GeneType (io.repseq.core.GeneType)14 NSequenceWithQuality (com.milaboratory.core.sequence.NSequenceWithQuality)13 GeneFeature (io.repseq.core.GeneFeature)9 VDJCAlignments (com.milaboratory.mixcr.basictypes.VDJCAlignments)8 VDJCHit (com.milaboratory.mixcr.basictypes.VDJCHit)8 ReferencePoint (io.repseq.core.ReferencePoint)7 VDJCLibrary (io.repseq.core.VDJCLibrary)7 ArrayList (java.util.ArrayList)7 VDJCAlignerParameters (com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters)6 AlignmentHit (com.milaboratory.core.alignment.batch.AlignmentHit)5 PairedRead (com.milaboratory.core.io.sequence.PairedRead)5 AminoAcidSequence (com.milaboratory.core.sequence.AminoAcidSequence)5 Path (java.nio.file.Path)5 Alignment (com.milaboratory.core.alignment.Alignment)4 Pattern (java.util.regex.Pattern)4 AlignmentHelper (com.milaboratory.core.alignment.AlignmentHelper)3