use of edu.sdsc.mmtf.spark.filters.ContainsLProteinChain in project mmtf-spark by sbl-sdsc.
the class FilterByPolymerChainType method main.
public static void main(String[] args) throws FileNotFoundException {
String path = MmtfReader.getMmtfReducedPath();
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(FilterByPolymerChainType.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
long count = MmtfReader.readSequenceFile(path, // read MMTF hadoop sequence file
sc).filter(new ContainsPolymerChainType(ContainsPolymerChainType.DNA_LINKING, ContainsPolymerChainType.RNA_LINKING)).filter(new NotFilter(new ContainsLProteinChain())).filter(new NotFilter(new ContainsDSaccharideChain())).count();
System.out.println("# pure DNA and RNA entries: " + count);
sc.close();
}
use of edu.sdsc.mmtf.spark.filters.ContainsLProteinChain in project mmtf-spark by sbl-sdsc.
the class FilterExclusivelyByLProteins method main.
public static void main(String[] args) throws FileNotFoundException {
String path = MmtfReader.getMmtfReducedPath();
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(FilterExclusivelyByLProteins.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
boolean exclusive = true;
long count = MmtfReader.readSequenceFile(path, // read MMTF hadoop sequence file
sc).filter(new ContainsLProteinChain(exclusive)).count();
System.out.println("# L-proteins: " + count);
sc.close();
}
use of edu.sdsc.mmtf.spark.filters.ContainsLProteinChain in project mm-dev by sbl-sdsc.
the class TestRosettaMmtf method main.
/**
* Test: Read MMTF-Hadoop Sequence file.
*
* @param args args[0] <path-to-mmtf-haddop-sequence-file>
*
* @throws FileNotFoundException
*/
public static void main(String[] args) throws FileNotFoundException {
// instantiate Spark
// TODO set to local[1] !!!!
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("TestSwissModelMmtf");
JavaSparkContext sc = new JavaSparkContext(conf);
long start = System.nanoTime();
// read PDB files recursively starting the specified directory
JavaPairRDD<String, StructureDataInterface> structures = MmtfReader.readSequenceFile(args[0], sc);
// total: 639 structures
// structures = structures.filter(new ContainsDnaChain()); // ?
// structures = structures.filter(new ContainsLProteinChain()); // 639?
// structures = structures.filter(new ContainsGroup("ZN")); // 0
// structures = structures.filter(new ContainsGroup("ATP")); //
// debug: print structure data
// structures.foreach(t -> TraverseStructureHierarchy.demo(t._2));
// structures.foreach(t -> System.out.println(t._1));
System.out.println(structures.map(t -> t._2.getNumEntities()).reduce((a, b) -> a + b));
System.out.println("Number of structures read: " + structures.count());
long end = System.nanoTime();
System.out.println("Time: " + (end - start) / 1E9 + " sec.");
// close Spark
sc.close();
}
use of edu.sdsc.mmtf.spark.filters.ContainsLProteinChain in project mmtf-spark by sbl-sdsc.
the class FilterProteinDnaComplexes method main.
public static void main(String[] args) throws FileNotFoundException {
String path = MmtfReader.getMmtfReducedPath();
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(FilterProteinDnaComplexes.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
long count = MmtfReader.readSequenceFile(path, // read MMTF hadoop sequence file
sc).filter(// retain pdb entries that contain L-peptide chains
new ContainsLProteinChain()).filter(// retain pdb entries that contain L-Dna chains
new ContainsDnaChain()).filter(// filter out an RNA containing entries
new NotFilter(new ContainsRnaChain())).count();
System.out.println("# L-peptide/DNA complexes: " + count);
sc.close();
}
use of edu.sdsc.mmtf.spark.filters.ContainsLProteinChain in project mmtf-spark by sbl-sdsc.
the class NotFilterExample method main.
public static void main(String[] args) throws FileNotFoundException {
String path = MmtfReader.getMmtfReducedPath();
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(NotFilterExample.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
long count = MmtfReader.readSequenceFile(path, // read MMTF hadoop sequence file
sc).filter(// retain pdb entries that exclusively contain L-peptide chains
new ContainsLProteinChain()).filter(// should not contain any DNA chains
new NotFilter(new ContainsDnaChain())).count();
System.out.println("# PDB entries with L-protein and without DNA chains: " + count);
sc.close();
}
Aggregations