use of edu.sdsc.mmtf.spark.interactions.InteractionFilter in project mmtf-spark by sbl-sdsc.
the class InteractionFilterTest method test3.
@Test
public void test3() {
InteractionFilter filter = new InteractionFilter();
filter.setTargetGroups(true, "HOH", "ZN");
assertEquals(true, filter.isTargetGroup("ZN"));
assertEquals(true, filter.isTargetGroup("HOH"));
assertEquals(false, filter.isTargetGroup("MN"));
}
use of edu.sdsc.mmtf.spark.interactions.InteractionFilter in project mmtf-spark by sbl-sdsc.
the class InteractionFilterTest method test1.
@Test
public void test1() {
InteractionFilter filter = new InteractionFilter();
filter.setQueryGroups(true, "HOH", "ZN");
assertEquals(true, filter.isQueryGroup("ZN"));
assertEquals(true, filter.isQueryGroup("HOH"));
assertEquals(false, filter.isQueryGroup("MN"));
}
use of edu.sdsc.mmtf.spark.interactions.InteractionFilter in project mmtf-spark by sbl-sdsc.
the class InteractionFilterTest method test4.
@Test
public void test4() {
InteractionFilter filter = new InteractionFilter();
filter.setTargetGroups(false, "HOH", "ZN");
assertEquals(false, filter.isTargetGroup("ZN"));
assertEquals(false, filter.isTargetGroup("HOH"));
assertEquals(true, filter.isTargetGroup("MN"));
}
use of edu.sdsc.mmtf.spark.interactions.InteractionFilter in project mmtf-spark by sbl-sdsc.
the class Metalnteractions method main.
public static void main(String[] args) throws IOException {
String path = MmtfReader.getMmtfFullPath();
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(Metalnteractions.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
// input parameters
int sequenceIdentityCutoff = 30;
double resolution = 2.5;
int minInteractions = 4;
int maxInteractions = 6;
double distanceCutoff = 3.0;
// chemical component codes of metals in different oxidation states
String[] metals = { "V", "CR", "MN", "MN3", "FE", "FE2", "CO", "3CO", "NI", "3NI", "CU", "CU1", "CU3", "ZN", "MO", "4MO", "6MO" };
// read PDB and create a non-redundant PISCES subset
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readSequenceFile(path, sc).filter(new Pisces(sequenceIdentityCutoff, resolution));
// Setup criteria for metal interactions
InteractionFilter filter = new InteractionFilter();
filter.setDistanceCutoff(distanceCutoff);
filter.setMinInteractions(minInteractions);
filter.setMaxInteractions(maxInteractions);
filter.setQueryGroups(true, metals);
// exclude non-polar interactions
filter.setTargetElements(false, "H", "C", "P");
// tabulate interactions in a dataframe
Dataset<Row> interactions = GroupInteractionExtractor.getInteractions(pdb, filter).cache();
System.out.println("Metal interactions: " + interactions.count());
// select interacting atoms and orientational order parameters (q4 - q6)
// see {@link CoordinationGeometry}
interactions = interactions.select("pdbId", "q4", "q5", "q6", "element0", "groupNum0", "chain0", "element1", "groupNum1", "chain1", "distance1", "element2", "groupNum2", "chain2", "distance2", "element3", "groupNum3", "chain3", "distance3", "element4", "groupNum4", "chain4", "distance4", "element5", "groupNum5", "chain5", "distance5", "element6", "groupNum6", "chain6", "distance6").cache();
// show some example interactions
interactions.dropDuplicates("pdbId").show(10);
System.out.println("Unique interactions by metal:");
interactions.groupBy("element0").count().sort("count").show();
sc.close();
}
use of edu.sdsc.mmtf.spark.interactions.InteractionFilter in project mmtf-spark by sbl-sdsc.
the class WaterInteractions method main.
public static void main(String[] args) throws IOException, ParseException {
String timeStamp = new SimpleDateFormat("yyyyMMdd_HHmm").format(Calendar.getInstance().getTime());
long start = System.nanoTime();
// process command line options (defaults are provided)
CommandLine cmd = getCommandLine(args);
String outputPath = cmd.getOptionValue("output-path");
System.out.println(outputPath);
String resolution = cmd.getOptionValue("resolution", "2");
String minInteractions = cmd.getOptionValue("min-interactions", "2");
String maxInteractions = cmd.getOptionValue("max-interactions", "4");
String distanceCutoff = cmd.getOptionValue("distance-cutoff", "3");
String bFactorCutoff = cmd.getOptionValue("b-factor-cutoff", "1.645");
boolean includeWaters = cmd.hasOption("include-waters");
// get path to MMTF Hadoop Sequence file
String path = MmtfReader.getMmtfFullPath();
// initialize Spark
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(WaterInteractions.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
// read PDB structures and filter by resolution and only include proteins
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readSequenceFile(path, sc).filter(new Resolution(0.0, Float.parseFloat(resolution))).filter(new ContainsLProteinChain(true));
// setup interaction criteria
InteractionFilter filter = new InteractionFilter();
filter.setDistanceCutoff(Float.parseFloat(distanceCutoff));
filter.setNormalizedbFactorCutoff(Float.parseFloat(bFactorCutoff));
filter.setMinInteractions(Integer.parseInt(minInteractions));
filter.setMaxInteractions(Integer.parseInt(maxInteractions));
filter.setQueryGroups(true, "HOH");
// only use water oxygen
filter.setQueryElements(true, "O");
filter.setTargetElements(true, "O", "N", "S");
// exclude "uninteresting" ligands
Set<String> prohibitedGroups = new HashSet<>();
prohibitedGroups.addAll(ExcludedLigandSets.ALL_GROUPS);
if (!includeWaters) {
prohibitedGroups.add("HOH");
}
filter.setProhibitedTargetGroups(prohibitedGroups);
// calculate interactions
Dataset<Row> data = GroupInteractionExtractor.getInteractions(pdb, filter);
// keep only interactions with at least one organic ligand and one protein interaction
data = filterBridgingWaterInteractions(data, maxInteractions).cache();
// show some results
data.show(50);
System.out.println("Hits(all): " + data.count());
// save interactions to a .parquet file
String waterTag = includeWaters ? "_w" : "";
String filename = outputPath + "/water_pl" + "_r" + resolution + "_d" + distanceCutoff + "_b" + bFactorCutoff + "_i" + minInteractions + maxInteractions + waterTag + "_" + timeStamp + ".parquet";
System.out.println("Saving results to: " + filename);
data.coalesce(1).write().mode("overwrite").format("parquet").save(filename);
// exit Spark
sc.close();
long end = System.nanoTime();
System.out.println("Time: " + TimeUnit.NANOSECONDS.toSeconds(end - start) + " sec.");
}
Aggregations