use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class FullToReducedSequenceFile method main.
/**
* Converts a full MMTF Hadoop Sequence File to a reduced representation.
* @param args args[0] input directory (full),
* args[1] output directory (reduced)
* @throws FileNotFoundException
*/
public static void main(String[] args) throws FileNotFoundException {
if (args.length != 2) {
System.out.println("Usage: FullToReducedSequenceFile <path_to_full> <path_to_reduced>");
System.exit(-1);
}
String fullPath = args[0];
long start = System.nanoTime();
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(FullToReducedSequenceFile.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
// read PDB in MMTF format
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readSequenceFile(fullPath, sc).mapValues(s -> ReducedEncoder.getReduced(s));
String reducedPath = args[1];
MmtfWriter.writeSequenceFile(reducedPath, sc, pdb);
System.out.println("# structures converted: " + pdb.count());
long end = System.nanoTime();
System.out.println("Time: " + (end - start) / 1E9 + "sec.");
sc.close();
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class AuthorSearchDemo method main.
public static void main(String[] args) throws IOException {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(AuthorSearchDemo.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
// query to find PDB structures for Doudna, J.A. as a deposition (audit) author
// or as an author in the primary PDB citation
String sqlQuery = "SELECT pdbid from audit_author " + "WHERE name LIKE 'Doudna%J.A.%' " + "UNION " + "SELECT pdbid from citation_author " + "WHERE citation_id = 'primary' AND name LIKE 'Doudna%J.A.%'";
// read PDB and filter by author
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readReducedSequenceFile(sc).filter(new PdbjMineSearch(sqlQuery));
System.out.println("Number of entries matching query: " + pdb.count());
sc.close();
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class CreateRepresentativeSet method main.
/**
* @throws IOException
*/
public static void main(String[] args) throws IOException {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(CreateRepresentativeSet.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
// filter by representative protein chains at 40% sequence identify
// and 2.5 A resolution using the Pisces filter. Any pair of protein
// chains in the representative set will have <= 40% sequence identity.
int sequenceIdentity = 40;
double resolution = 2.5;
// read PDB, split entries into polymer chains, and filter by Pisces filter
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readReducedSequenceFile(sc).flatMapToPair(new StructureToPolymerChains()).filter(new Pisces(sequenceIdentity, resolution));
System.out.println("# representative chains: " + pdb.count());
// coalesce partitions to avoid saving many small files
pdb = pdb.coalesce(12);
// save representative set
String path = MmtfReader.getMmtfReducedPath();
MmtfWriter.writeSequenceFile(path + "_representatives_i40_r2.5", sc, pdb);
sc.close();
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class ColumnarStructureTest method testGetAtomNames.
@Test
public void testGetAtomNames() {
StructureDataInterface s = pdb.values().first();
ColumnarStructure cs = new ColumnarStructure(s, true);
assertEquals("CG2", cs.getAtomNames()[900]);
}
use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class ColumnarStructureTest method testIsPolymer.
@Test
public void testIsPolymer() {
StructureDataInterface s = pdb.values().first();
ColumnarStructure cs = new ColumnarStructure(s, true);
// chain A
assertEquals(true, cs.isPolymer()[100]);
// BTN
assertEquals(false, cs.isPolymer()[901]);
// HOH
assertEquals(false, cs.isPolymer()[917]);
}
Aggregations