use of io.repseq.core.VDJCLibraryRegistry in project repseqio by repseqio.
the class Main method main.
public static void main(String[] args) throws Exception {
Signal.handle(new Signal("PIPE"), new SignalHandler() {
@Override
public void handle(Signal signal) {
System.exit(0);
}
});
if (System.getProperty("localOnly") == null) {
Path cachePath = Paths.get(System.getProperty("user.home"), ".repseqio", "cache");
SequenceResolvers.initDefaultResolver(cachePath);
}
// Setting up main helper
JCommanderBasedMain main = new JCommanderBasedMain("repseqio", new ListAction(), new FilterAction(), new MergeAction(), new CompileAction(), new GenerateClonesAction(), new NormalizeCloneAbundancesAction(), new ExportCloneSequencesAction(), new FastaAction(), new TsvAction(), new InferAnchorPointsAction(), new DebugAction(), new FormatAction(), new StatAction(), new FromFastaAction(), new FromPaddedFastaAction());
main.setVersionInfoCallback(new Runnable() {
@Override
public void run() {
VersionInfo milib = VersionInfo.getVersionInfoForArtifact("milib");
VersionInfo repseqio = VersionInfo.getVersionInfoForArtifact("repseqio");
StringBuilder builder = new StringBuilder();
builder.append("RepSeq.IO.CLI v").append(repseqio.getVersion()).append(" (built ").append(repseqio.getTimestamp()).append("; rev=").append(repseqio.getRevision()).append("; branch=").append(repseqio.getBranch()).append("; host=").append(repseqio.getHost()).append(")").append("\n");
builder.append("MiLib v").append(milib.getVersion()).append(" (rev=").append(milib.getRevision()).append("; branch=").append(milib.getBranch()).append(")").append("\n");
builder.append("Built-in libraries:\n");
VDJCLibraryRegistry reg = VDJCLibraryRegistry.createDefaultRegistry();
reg.loadAllLibraries("default");
for (VDJCLibrary lib : reg.getLoadedLibraries()) builder.append(lib.getLibraryId()).append("\n");
System.out.print(builder.toString());
}
});
main.main(args);
}
use of io.repseq.core.VDJCLibraryRegistry in project repseqio by repseqio.
the class TsvAction method go.
@Override
public void go(ActionHelper helper) throws Exception {
VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
if (!"default".equals(params.getInput()))
reg.registerLibraries(params.getInput());
else
reg.loadAllLibraries("default");
Pattern chainPattern = params.chain == null ? null : Pattern.compile(params.chain);
Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
Long taxonFilter = params.taxonId;
if (taxonFilter == null && params.species != null)
taxonFilter = reg.resolveSpecies(params.species);
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(params.getOutputStream(), StandardCharsets.UTF_8))) {
writer.write("Gene\tChains\tFeature\tStart\tStop\tSource\tSequence\n");
for (VDJCLibrary lib : reg.getLoadedLibraries()) {
if (taxonFilter != null && taxonFilter != lib.getTaxonId())
continue;
for (VDJCGene gene : lib.getGenes()) {
if (chainPattern != null) {
boolean y = false;
for (String s : gene.getChains()) if (y |= chainPattern.matcher(s).matches())
break;
if (!y)
continue;
}
if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
continue;
for (GeneFeatureWithOriginalName feature : params.features) {
GeneFeature geneFeature = feature.feature;
NucleotideSequence featureSequence = gene.getFeature(geneFeature);
if (featureSequence == null)
continue;
// Don't output start and end positions for composite gene features
Long start = geneFeature.isComposite() ? null : gene.getData().getAnchorPoints().get(geneFeature.getFirstPoint());
Long end = geneFeature.isComposite() ? null : gene.getData().getAnchorPoints().get(geneFeature.getLastPoint());
NucleotideSequence nSequence = gene.getFeature(geneFeature);
List<String> tokens = Arrays.asList(gene.getGeneName(), gene.getChains().toString(), feature.originalName, // (so essentially 1-based inclusive). Report both as 1-based.
(start == null ? "" : params.isOneBased() ? String.valueOf(start + 1) : String.valueOf(start)), (end == null ? "" : String.valueOf(end)), gene.getData().getBaseSequence().getOrigin().toString(), nSequence.toString());
String delim = "";
for (String t : tokens) {
writer.write(delim);
writer.write(t);
delim = "\t";
}
writer.write('\n');
}
}
}
}
}
use of io.repseq.core.VDJCLibraryRegistry in project repseqio by repseqio.
the class DebugAction method go.
@Override
public void go(ActionHelper helper) throws Exception {
VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
reg.registerLibraries(params.getInput());
Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
GeneFeature cdr3FirstTriplet = new GeneFeature(ReferencePoint.CDR3Begin, 0, 3);
GeneFeature cdr3LastTriplet = new GeneFeature(ReferencePoint.CDR3End, -3, 0);
GeneFeature vIntronDonor = new GeneFeature(ReferencePoint.VIntronBegin, 0, 2);
GeneFeature vIntronAcceptor = new GeneFeature(ReferencePoint.VIntronEnd, -2, 0);
for (VDJCLibrary lib : reg.getLoadedLibraries()) {
for (VDJCGene gene : lib.getGenes()) {
if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
continue;
// First generate list of warning messages
List<String> warnings = new ArrayList<>();
if (gene.isFunctional() || params.getCheckAll()) {
NucleotideSequence l3;
switch(gene.getGeneType()) {
case Variable:
// Flag AA residues flanking CDR3
l3 = gene.getFeature(cdr3FirstTriplet);
if (l3 == null)
warnings.add("Unable to find CDR3 start");
else if (l3.size() != 3)
warnings.add("Unable to translate sequence: " + gene.getName() + " / " + l3);
else if (AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.C)
warnings.add("CDR3 does not start with C, was: " + l3.toString() + " / " + AminoAcidSequence.translate(l3).toString() + " / CDR3Begin: " + gene.getData().getAnchorPoints().get(ReferencePoint.CDR3Begin));
// Flag suspicious exon borders
// https://schneider.ncifcrf.gov/gallery/SequenceLogoSculpture.gif
NucleotideSequence vIntronDonorSeq = gene.getFeature(vIntronDonor);
if (vIntronDonorSeq != null && !vIntronDonorSeq.toString().equals("GT") && !vIntronDonorSeq.toString().equals("GC"))
warnings.add("Expected VIntron sequence to start with GT, was: " + vIntronDonorSeq.toString());
NucleotideSequence vIntronAcceptorSeq = gene.getFeature(vIntronAcceptor);
if (vIntronAcceptorSeq != null && !vIntronAcceptorSeq.toString().equals("AG"))
warnings.add("Expected VIntron sequence to end with AG, was: " + vIntronAcceptorSeq.toString());
ReferencePoints partitioning = gene.getPartitioning();
if (partitioning.isAvailable(GeneFeature.VTranscriptWithout5UTR)) {
// Iterating over all reading-frame bound anchor points of V gene
for (ReferencePoint anchorPoint : ReferencePoint.DefaultReferencePoints) {
if (anchorPoint.getGeneType() != GeneType.Variable || !anchorPoint.isTripletBoundary())
continue;
// And checking that they are in the same frame as Start (L1Begin)
int relativePosition = partitioning.getRelativePosition(GeneFeature.VTranscriptWithout5UTR, anchorPoint);
if (// Point is defined
relativePosition >= 0 && relativePosition % 3 != 0)
warnings.add("Expected " + anchorPoint + " to have position dividable by three inside VTranscriptWithout5UTR. " + "This may indicate an error in the L2 boundaries.");
}
}
break;
case Joining:
// Flag AA residues flanking CDR3
l3 = gene.getFeature(cdr3LastTriplet);
if (l3 == null)
warnings.add("Unable to find CDR3 end");
else if (l3.size() != 3)
warnings.add("Unable to translate sequence: " + gene.getName() + " / " + l3);
else if (AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.W && AminoAcidSequence.translate(l3).codeAt(0) != AminoAcidAlphabet.F)
warnings.add("CDR3 does not end with W or F, was: " + l3.toString() + " / " + AminoAcidSequence.translate(l3).toString() + " / CDR3End: " + gene.getData().getAnchorPoints().get(ReferencePoint.CDR3End));
break;
}
for (GeneFeature geneFeature : aaGeneFeatures.get(gene.getGeneType())) {
AminoAcidSequence aaSequence = getAminoAcidSequence(gene, geneFeature, gene.getFeature(geneFeature));
if (aaSequence != null) {
// Flag if contains stop codon
if (aaSequence.numberOfStops() > 0)
warnings.add(GeneFeature.encode(geneFeature) + " contains a stop codon");
}
}
}
if (params.getProblemOnly() && warnings.isEmpty())
continue;
System.out.println(gene.getName() + " (" + (gene.isFunctional() ? "F" : "P") + ") " + gene.getChains() + " : " + lib.getTaxonId());
if (!warnings.isEmpty()) {
System.out.println();
System.out.println("WARNINGS: ");
for (String warning : warnings) {
System.out.println(warning);
}
System.out.println();
}
for (GeneFeature geneFeature : geneFeatures.get(gene.getGeneType())) {
System.out.println();
System.out.println(GeneFeature.encode(geneFeature));
NucleotideSequence nSequence = gene.getFeature(geneFeature);
AminoAcidSequence aaSequence = getAminoAcidSequence(gene, geneFeature, nSequence);
System.out.print("N: ");
if (nSequence == null)
System.out.println("Not Available");
else
System.out.println(nSequence);
if (GeneFeature.getFrameReference(geneFeature) != null) {
System.out.print("AA: ");
if (aaSequence == null)
System.out.println("Not Available");
else
System.out.println(aaSequence);
}
}
System.out.println("=========");
System.out.println();
}
}
}
use of io.repseq.core.VDJCLibraryRegistry in project repseqio by repseqio.
the class FastaAction method go.
@Override
public void go(ActionHelper helper) throws Exception {
GeneFeature geneFeature = params.getGeneFeature();
VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
if (!"default".equals(params.getInput()))
reg.registerLibraries(params.getInput());
else
reg.loadAllLibraries("default");
Pattern chainPattern = params.chain == null ? null : Pattern.compile(params.chain);
Pattern namePattern = params.name == null ? null : Pattern.compile(params.name);
Long taxonFilter = params.taxonId;
if (taxonFilter == null && params.species != null)
taxonFilter = reg.resolveSpecies(params.species);
try (FastaWriter<NucleotideSequence> writer = CLIUtils.createSingleFastaWriter(params.getOutput())) {
for (VDJCLibrary lib : reg.getLoadedLibraries()) {
if (taxonFilter != null && taxonFilter != lib.getTaxonId())
continue;
for (VDJCGene gene : lib.getGenes()) {
if (chainPattern != null) {
boolean y = false;
for (String s : gene.getChains()) if (y |= chainPattern.matcher(s).matches())
break;
if (!y)
continue;
}
if (namePattern != null && !namePattern.matcher(gene.getName()).matches())
continue;
NucleotideSequence featureSequence = gene.getFeature(geneFeature);
if (featureSequence == null)
continue;
writer.write(gene.getName() + "|" + (gene.isFunctional() ? "F" : "P") + "|taxonId=" + gene.getParentLibrary().getTaxonId(), featureSequence);
}
}
}
}
use of io.repseq.core.VDJCLibraryRegistry in project repseqio by repseqio.
the class CompileAction method compile.
public static void compile(Path source, Path destination, int surroundings) throws IOException {
VDJCLibraryRegistry.resetDefaultRegistry();
VDJCLibraryRegistry reg = VDJCLibraryRegistry.getDefault();
reg.registerLibraries(source, "lib");
List<VDJCLibraryData> result = new ArrayList<>();
for (VDJCLibrary lib : reg.getLoadedLibraries()) {
VDJCDataUtils.FragmentsBuilder fragmentsBuilder = new VDJCDataUtils.FragmentsBuilder();
for (KnownSequenceFragmentData fragment : lib.getData().getSequenceFragments()) fragmentsBuilder.addRegion(fragment);
for (VDJCGene gene : lib.getGenes()) {
if (!gene.getData().getBaseSequence().isPureOriginalSequence())
throw new IllegalArgumentException("Don't support mutated sequences yet.");
URI uri = gene.getData().getBaseSequence().getOrigin();
Range region = gene.getPartitioning().getContainingRegion();
region = region.expand(surroundings);
NucleotideSequence seq;
try {
seq = gene.getSequenceProvider().getRegion(region);
} catch (SequenceProviderIndexOutOfBoundsException e) {
region = e.getAvailableRange();
if (region == null)
throw new IllegalArgumentException("Wrong anchor points for " + gene.getName() + " ?");
seq = gene.getSequenceProvider().getRegion(region);
}
fragmentsBuilder.addRegion(uri, region, seq);
}
result.add(new VDJCLibraryData(lib.getTaxonId(), lib.getData().getSpeciesNames(), lib.getData().getGenes(), lib.getData().getMeta(), fragmentsBuilder.getFragments()));
}
VDJCDataUtils.writeToFile(result, destination, true);
log.info("{} compiled successfully.", source);
}
Aggregations