use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class CoordinationGeometryTest method test.
public void test() {
StructureDataInterface structure = pdb.values().first();
ColumnarStructure cs = new ColumnarStructure(structure, true);
// ZN A.101.ZN
Point3d center = getCoords(cs, 459);
Point3d[] neighbors = new Point3d[6];
// CYS A.7.SG
neighbors[0] = getCoords(cs, 28);
// CYS A.10.SG
neighbors[1] = getCoords(cs, 44);
// HIS A.31.ND1
neighbors[2] = getCoords(cs, 223);
// CYS A.34.SG
neighbors[3] = getCoords(cs, 245);
// CYS A.10.N
neighbors[4] = getCoords(cs, 45);
// HIS A.31.O
neighbors[5] = getCoords(cs, 220);
CoordinationGeometry geom = new CoordinationGeometry(center, neighbors);
double q3Expected = 0.9730115379131878;
assertEquals(q3Expected, geom.q3(), 0.0001);
double q4Expected = 0.9691494056145086;
assertEquals(q4Expected, geom.q4(), 0.0001);
double q5Expected = 0.5126001729084566;
assertEquals(q5Expected, geom.q5(), 0.0001);
double q6Expected = 0.2723305441457363;
assertEquals(q6Expected, geom.q6(), 0.0001);
use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class KeywordSearch method main.
public static void main(String[] args) throws IOException {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(KeywordSearch.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
String sqlQuery = "SELECT pdbid from keyword_search('porin')";
// read PDB and filter by keyword search
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readReducedSequenceFile(sc).filter(new PdbjMineSearch(sqlQuery));
pdb.keys().foreach(k -> System.out.println(k));
System.out.println("Number of entries matching query: " + pdb.count());
use of org.rcsb.mmtf.api.StructureDataInterface in project mmtf-spark by sbl-sdsc.
the class KinaseSearch method main.
public static void main(String[] args) throws IOException {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(KinaseSearch.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
// query for human protein-serine/threonine kinases using SIFTS data
String sql = "SELECT t.pdbid, t.chain FROM sifts.pdb_chain_taxonomy AS t " + "JOIN sifts.pdb_chain_enzyme AS e ON (t.pdbid = e.pdbid AND t.chain = e.chain) " + "WHERE t.scientific_name = 'Homo sapiens' AND e.ec_number = ''";
// read PDB in MMTF format, split into polymer chains and search using
// PdbJMineSearch
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readReducedSequenceFile(sc).flatMapToPair(new StructureToPolymerChains()).filter(new PdbjMineSearch(sql));
System.out.println("Number of entries matching query: " + pdb.count());
use of org.rcsb.mmtf.api.StructureDataInterface in project mm-dev by sbl-sdsc.
the class ArgLigandInteractions method main.
public static void main(String[] args) throws IOException, ParseException {
String timeStamp = new SimpleDateFormat("yyyyMMdd_HHmm").format(Calendar.getInstance().getTime());
long start = System.nanoTime();
// process command line options (defaults are provided)
CommandLine cmd = getCommandLine(args);
String outputPath = cmd.getOptionValue("output-path");
String resolution = cmd.getOptionValue("resolution", "2");
String minInteractions = cmd.getOptionValue("min-interactions", "2");
String maxInteractions = cmd.getOptionValue("max-interactions", "4");
String distanceCutoff = cmd.getOptionValue("distance-cutoff", "3");
String bFactorCutoff = cmd.getOptionValue("b-factor-cutoff", "1.645");
boolean includeWaters = cmd.hasOption("include-waters");
// get path to MMTF Hadoop Sequence file
String path = MmtfReader.getMmtfFullPath();
// initialize Spark
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ArgLigandInteractions.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
// read PDB structures and filter by resolution and only include proteins
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readSequenceFile(path, sc).filter(new Resolution(0.0, Float.parseFloat(resolution))).filter(new ContainsLProteinChain(true));
// setup interaction criteria
InteractionFilter filter = new InteractionFilter();
filter.setQueryGroups(true, "ARG");
// only use water oxygen
filter.setQueryElements(true, "N");
filter.setTargetElements(true, "O", "N", "S");
filter.setTargetGroups(false, new HashSet<>(PolymerComposition.AMINO_ACIDS_20));
// exclude "uninteresting" ligands
Set<String> prohibitedGroups = new HashSet<>();
if (!includeWaters) {
// calculate interactions
Dataset<Row> data = GroupInteractionExtractor.getInteractions(pdb, filter);
// only consider interactions with ARG sidechain nitrogens
data = data.filter("atom0 = 'NE' OR atom0 = 'NH1' OR atom0 = 'NH2'");
// the interacting group should be an organic ligand (LGO)
data = data.filter("type1 = 'LGO'");
data ="pdbId", "atom0", "groupNum0", "chain0", "atom1", "group1", "groupNum1", "chain1", "distance1");
Dataset<Row> data2 = data;
Dataset<Row> joint = data.join(data2, (data.col("pdbId").equalTo(data2.col("pdbId"))).and(data.col("atom0").notEqual(data2.col("atom0"))).and(data.col("groupNum1").equalTo(data2.col("groupNum1")).and(data.col("chain1").equalTo(data2.col("chain1")).and(data.col("atom1").notEqual(data2.col("atom1"))))));;
// data ="pdbId",
// "atom0", "groupNum0", "chain0",
// "atom1", "groupNum1", "chain1", "distance1",
// "atom2", "groupNum2", "chain2", "distance2");
// // only consider interactions with ARG sidechain nitrogens
// data = data.filter("atom0 = 'NE' OR atom0 = 'NH1' OR atom0 = 'NH2'");
// // the interacting group should be an organic ligand (LGO)
// data = data.filter("type1 = 'LGO' AND type2 = 'LGO'").cache();
// // the two interacting atoms must come from the same group and chain
// data = data.filter("group1 = group2 AND groupNum1 = groupNum2 AND chain1 = chain2");
// Dataset<Row> data2 = data;
// Dataset<Row> joint = data.join(data2,
// data.col("pdbId").equalTo(data2.col("pdbId")).and
// (data.col("groupNum1").equalTo(data2.col("groupNum1")).and
// (data.col("chain1").equalTo(data2.col("chain1")))
// ));
// RelationalGroupedDataset groupBy = data.groupBy("pdbId", "groupNum0", "chain0", "group1", "groupNum1");
// groupBy.count().show(1000);
// show some results
// System.out.println("Hits(all): " + data.count());
// // save interactions to a .parquet file
// String waterTag = includeWaters ? "_w" : "";
// String filename = outputPath + "/arg_lig" + "_r" + resolution
// + "_d" + distanceCutoff
// + "_b" + bFactorCutoff + "_i" + minInteractions + maxInteractions + waterTag + "_" + timeStamp + ".parquet";
// System.out.println("Saving results to: " + filename);
// data.coalesce(1).write().mode("overwrite").format("parquet").save(filename);
// exit Spark
long end = System.nanoTime();
System.out.println("Time: " + TimeUnit.NANOSECONDS.toSeconds(end - start) + " sec.");
use of org.rcsb.mmtf.api.StructureDataInterface in project mm-dev by sbl-sdsc.
the class ImportMolFile method main.
public static void main(String[] args) throws IOException {
// String fileName = "/Users/peter/Downloads/Pose_prediction/417-1-hciq4/3OOF-FXR_36-1.mol";
// Molmporter mp = new Molmporter();
// StructureDataInterface structure = mp.readFile(fileName);
// TraverseStructureHierarchy.demo(structure);
// instantiate Spark
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("ImportPdbFiles");
JavaSparkContext sc = new JavaSparkContext(conf);
List<String> ligandIds = Arrays.asList("BTN");
JavaPairRDD<String, StructureDataInterface> structures = Molmporter.downloadChemicalComponents(ligandIds, sc);
structures.foreach(t -> TraverseStructureHierarchy.printStructureData(t._2));