use of com.milaboratory.core.sequence.NucleotideSequence in project mixcr by milaboratory.
the class VDJCAlignerAbstract method init.
@Override
protected void init() {
DAlignerParameters dAlignerParameters = parameters.getDAlignerParameters();
List<VDJCGene> dGenes = genesToAlign.get(GeneType.Diversity);
if (dAlignerParameters != null && dGenes.size() != 0)
singleDAligner = new SingleDAligner(dAlignerParameters, genesToAlign.get(GeneType.Diversity));
vAligner = createKAligner(GeneType.Variable);
jAligner = createKAligner(GeneType.Joining);
cAligner = createKAligner(GeneType.Constant);
Chains chains = new Chains();
for (VDJCGene gene : getUsedGenes()) chains = chains.merge(gene.getChains());
filters = new EnumMap<>(GeneType.class);
for (GeneType geneType : GeneType.VJC_REFERENCE) {
HashMap<String, BitArray> f = new HashMap<>();
for (final String chain : chains) {
BatchAlignerWithBaseWithFilter<NucleotideSequence, VDJCGene, AlignmentHit<NucleotideSequence, VDJCGene>> aligner = getAligner(geneType);
if (aligner != null)
f.put(chain, aligner.createFilter(new Filter<VDJCGene>() {
@Override
public boolean accept(VDJCGene object) {
return object.getChains().contains(chain);
}
}));
}
filters.put(geneType, f);
}
}
use of com.milaboratory.core.sequence.NucleotideSequence in project repseqio by repseqio.
the class CompileAction method compile.
public static void compile(Path source, Path destination, int surroundings) throws IOException {
VDJCLibraryRegistry.resetDefaultRegistry();
VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
reg.registerLibraries(source, "lib");
List<VDJCLibraryData> result = new ArrayList<>();
for (VDJCLibrary lib : reg.getLoadedLibraries()) {
VDJCDataUtils.FragmentsBuilder fragmentsBuilder = new VDJCDataUtils.FragmentsBuilder();
for (KnownSequenceFragmentData fragment : lib.getData().getSequenceFragments()) fragmentsBuilder.addRegion(fragment);
for (VDJCGene gene : lib.getGenes()) {
if (!gene.getData().getBaseSequence().isPureOriginalSequence())
throw new IllegalArgumentException("Don't support mutated sequences yet.");
URI uri = gene.getData().getBaseSequence().getOrigin();
Range region = gene.getPartitioning().getContainingRegion();
region = region.expand(surroundings);
NucleotideSequence seq;
try {
seq = gene.getSequenceProvider().getRegion(region);
} catch (SequenceProviderIndexOutOfBoundsException e) {
region = e.getAvailableRange();
if (region == null)
throw new IllegalArgumentException("Wrong anchor points for " + gene.getName() + " ?");
seq = gene.getSequenceProvider().getRegion(region);
}
fragmentsBuilder.addRegion(uri, region, seq);
}
result.add(new VDJCLibraryData(lib.getTaxonId(), lib.getData().getSpeciesNames(), lib.getData().getGenes(), lib.getData().getMeta(), fragmentsBuilder.getFragments()));
}
VDJCDataUtils.writeToFile(result, destination, true);
log.info("{} compiled successfully.", source);
}
use of com.milaboratory.core.sequence.NucleotideSequence in project repseqio by repseqio.
the class ExportCloneSequencesAction method go.
@Override
public void go(ActionHelper helper) throws Exception {
Chains chains = params.getChains();
GeneFeature geneFeature = params.getGeneFeature();
RandomGenerator random = new Well19937c(1232434);
try (GRepertoireReader input = new GRepertoireReader(createBufferedReader(params.getInput()));
FastaWriter<NucleotideSequence> output = createSingleFastaWriter(params.getOutput())) {
List<DescriptionExtractor> extractors = params.getExtractors(input.getLibrary());
long i = 0;
for (GClone clone : CUtils.it(input)) {
int f = params.factor == null ? 1 : randomizedRound(clone.abundance * params.factor, random);
for (int j = 0; j < f; j++) for (Map.Entry<String, GGene> e : clone.genes.entrySet()) if (chains.contains(e.getKey())) {
StringBuilder descriptionLine = new StringBuilder("GClone");
for (DescriptionExtractor extractor : extractors) descriptionLine.append("|").append(extractor.extract(clone, e.getValue(), e.getKey()));
output.write(new FastaRecord<>(i++, descriptionLine.toString(), e.getValue().getFeature(geneFeature)));
}
}
}
}
use of com.milaboratory.core.sequence.NucleotideSequence in project repseqio by repseqio.
the class FromFastaActionAbstract method go.
@Override
public void go(ActionHelper helper) throws Exception {
Pattern functionalityRegexp = params.getFunctionalityRegexp();
GeneType geneType = params.getGeneType();
Map<String, VDJCGeneData> genes = new HashMap<>();
Path libraryPath = Paths.get(params.getOutputJSON()).toAbsolutePath();
// Parsing -P or --gene-feature parameters
Map<ReferencePoint, Integer> points = new HashMap<>();
if (params.geneFeature != null) {
GeneFeature gf = params.getGeneFeature();
points.put(gf.getFirstPoint(), 0);
points.put(gf.getLastPoint(), -1);
} else
for (Map.Entry<String, String> p : params.points.entrySet()) {
ReferencePoint anchorPoint = ReferencePoint.getPointByName(p.getKey());
if (anchorPoint == null)
throw new IllegalArgumentException("Unknown anchor point: " + p.getKey());
int position = Integer.decode(p.getValue());
points.put(anchorPoint, position);
}
// Check
for (Map.Entry<ReferencePoint, Integer> entry : points.entrySet()) if (entry.getKey().getGeneType() != null && entry.getKey().getGeneType() != geneType)
throw new IllegalArgumentException("Incompatible anchor point and gene type: " + entry.getKey() + " / " + geneType);
try (FastaReader reader = new FastaReader<>(params.getInput(), null);
SequenceStorage storage = params.doEmbedSequences() ? new EmbeddedWriter() : params.getOutputFasta() == null ? new ExistingFileWriter(libraryPath, Paths.get(params.getInput()).toAbsolutePath()) : new FastaSequenceStorage(libraryPath, Paths.get(params.getOutputFasta()).toAbsolutePath())) {
for (FastaReader.RawFastaRecord record : CUtils.it((OutputPortCloseable<FastaReader.RawFastaRecord>) reader.asRawRecordsPort())) {
StringWithMapping swm = StringWithMapping.removeSymbol(record.sequence, params.getPaddingCharacter());
NucleotideSequence seq = new NucleotideSequence(swm.getModifiedString());
if (seq.containsWildcards()) {
System.out.println("Sequence dropped because contain wildcards: " + record.description);
continue;
}
String[] fields = record.description.split("\\|");
String geneName = fields[params.nameIndex];
boolean functionality = true;
if (params.functionalityIndex != null)
functionality = functionalityRegexp.matcher(fields[params.functionalityIndex]).matches();
SortedMap<ReferencePoint, Long> anchorPoints = new TreeMap<>();
for (Map.Entry<String, String> p : params.patterns.entrySet()) {
ReferencePoint anchorPoint = ReferencePoint.getPointByName(p.getKey());
if (anchorPoint == null)
throw new IllegalArgumentException("Unknown anchor point: " + p.getKey());
if (anchorPoint.getGeneType() != null && anchorPoint.getGeneType() != geneType)
throw new IllegalArgumentException("Incompatible anchor point and gene type: " + anchorPoint + " / " + geneType);
Pattern pattern = Pattern.compile(p.getValue());
int position = -1;
for (boolean stops : new boolean[] { false, true }) for (int f = 0; f < 3; f++) {
if (position != -1)
continue;
TranslationParameters tp = TranslationParameters.withoutIncompleteCodon(f);
AminoAcidSequence aa = AminoAcidSequence.translate(seq, tp);
if (!stops && aa.containStops())
continue;
String str = aa.toString();
Matcher matcher = pattern.matcher(str);
if (matcher.find()) {
int aaPosition = matcher.start(1);
position = AminoAcidSequence.convertAAPositionToNt(aaPosition, seq.size(), tp);
}
}
if (position == -1)
continue;
anchorPoints.put(anchorPoint, (long) position);
}
for (Map.Entry<ReferencePoint, Integer> p : points.entrySet()) {
// AA patterns have priority over positional anchor points
if (anchorPoints.containsKey(p.getKey()))
continue;
// Converting position using
int position = swm.convertPosition(p.getValue());
// Can't be converted (e.g. position of padding symbol) skipping
if (position == -1)
continue;
anchorPoints.put(p.getKey(), (long) position);
}
if (genes.containsKey(geneName)) {
if (params.getIgnoreDuplicates()) {
System.out.println("Ignored: Duplicate records for " + geneName);
continue;
} else
throw new IllegalArgumentException("Duplicate records for " + geneName);
}
BaseSequence baseSequence = storage.storeSequence(seq, geneName, record.description);
VDJCGeneData gene = new VDJCGeneData(baseSequence, geneName, geneType, functionality, new Chains(params.chain), new TreeMap<String, SortedSet<String>>(), anchorPoints).addMetaValue(KnownVDJCGeneMetaFields.COMMENTS, record.description);
genes.put(geneName, gene);
}
VDJCLibraryData library = new VDJCLibraryData(params.taxonId, params.speciesNames, new ArrayList<>(genes.values()), new TreeMap<String, SortedSet<String>>(), storage.getBase()).addMetaValue(KnownVDJCLibraryMetaFields.COMMENTS, "Imported from: " + params.getInput());
VDJCDataUtils.writeToFile(new VDJCLibraryData[] { library }, params.getOutputJSON(), false);
}
}
use of com.milaboratory.core.sequence.NucleotideSequence in project repseqio by repseqio.
the class BasicGGeneModel method create.
@Override
public GGeneGenerator create(RandomGenerator random, VDJCLibrary library) {
final VDJCGenesGenerator vdjcGenesGenerator = vdjcGenesModel.create(random, library);
List<VDJCGene> vGenes = vdjcGenesGenerator.genes(GeneType.Variable);
List<VDJCGene> dGenes = vdjcGenesGenerator.genes(GeneType.Diversity);
List<VDJCGene> jGenes = vdjcGenesGenerator.genes(GeneType.Joining);
List<VDJCGene> cGenes = vdjcGenesGenerator.genes(GeneType.Constant);
final VDJTrimmingGenerator trimmingGenerator = trimmingModel.create(random, vGenes, dGenes, jGenes, cGenes);
final InsertGenerator vInsertGenerator = vInsertModel.create(random, true, vGenes, dGenes, jGenes, cGenes);
final InsertGenerator djInsertGenerator = dGenes.isEmpty() ? null : djInsertModel.create(random, false, vGenes, dGenes, jGenes, cGenes);
return new GGeneGenerator() {
@Override
public GGene generate() {
VDJCGenes vdjcGenes = vdjcGenesGenerator.sample();
VDJTrimming trimming = trimmingGenerator.sample(vdjcGenes);
assert !vdjcGenes.isDDefined() || (vdjcGenes.isDDefined() && djInsertGenerator != null);
NucleotideSequence vInsert;
NucleotideSequence djInsert;
if (vdjcGenes.isDDefined()) {
assert djInsertGenerator != null;
GGene tempGene = new GGene(null, vdjcGenes, trimming, NucleotideSequence.EMPTY, NucleotideSequence.EMPTY);
vInsert = vInsertGenerator.generate(tempGene);
djInsert = djInsertGenerator.generate(tempGene);
} else {
GGene tempGene = new GGene(null, vdjcGenes, trimming, NucleotideSequence.EMPTY, null);
vInsert = vInsertGenerator.generate(tempGene);
djInsert = null;
}
return new GGene(null, vdjcGenes, trimming, vInsert, djInsert);
}
};
}
Aggregations