use of com.cerner.bunsen.codes.broadcast.BroadcastableValueSets in project bunsen by cerner.
the class ValueSetUdfs method pushUdf.
/**
* Pushes an "in_valueset" UDF that uses the given {@link BroadcastableValueSets} for its content.
*
* @param spark the spark session
* @param valueSets the valuesets to use in the UDF
*/
public static synchronized void pushUdf(SparkSession spark, BroadcastableValueSets valueSets) {
JavaSparkContext ctx = new JavaSparkContext(spark.sparkContext());
Broadcast<BroadcastableValueSets> broadcast = ctx.broadcast(valueSets);
spark.udf().register("in_valueset", new InValuesetUdf(broadcast), DataTypes.BooleanType);
// Push the broadcast variable
valueSetStack.push(broadcast);
}
use of com.cerner.bunsen.codes.broadcast.BroadcastableValueSets in project bunsen by cerner.
the class ValueSetUdfsTest method setUp.
/**
* Sets up Spark and loads test value sets.
*/
@BeforeClass
public static void setUp() throws IOException {
// Create a local spark session using an in-memory metastore.
// We must also use Hive and set the partition mode to non-strict to
// support dynamic partitions.
spark = SparkSession.builder().master("local[2]").appName("UdfsTest").enableHiveSupport().config("javax.jdo.option.ConnectionURL", "jdbc:derby:memory:metastore_db;create=true").config("hive.exec.dynamic.partition.mode", "nonstrict").config("spark.sql.warehouse.dir", Files.createTempDirectory("spark_warehouse").toString()).getOrCreate();
spark.sql("create database " + ConceptMaps.MAPPING_DATABASE);
Hierarchies withLoinc = Loinc.withLoincHierarchy(spark, Hierarchies.getEmpty(spark), "src/test/resources/LOINC_HIERARCHY_SAMPLE.CSV", "2.56");
Hierarchies withLoincAndSnomed = Snomed.withRelationships(spark, withLoinc, "src/test/resources/SNOMED_RELATIONSHIP_SAMPLE.TXT", "20160901");
ValueSets withGender = ValueSets.getEmpty(spark).withValueSetsFromDirectory("src/test/resources/xml/valuesets");
BroadcastableValueSets valueSets = BroadcastableValueSets.newBuilder().addCode("bp", Loinc.LOINC_CODE_SYSTEM_URI, "8462-4").addCode("albumin", Loinc.LOINC_CODE_SYSTEM_URI, "14959-1").addReference("married", "urn:cerner:bunsen:valueset:married_maritalstatus").addDescendantsOf("leukocytes", Loinc.LOINC_CODE_SYSTEM_URI, "LP14419-3", Loinc.LOINC_HIERARCHY_URI).addDescendantsOf("diabetes", Snomed.SNOMED_CODE_SYSTEM_URI, "73211009", Snomed.SNOMED_HIERARCHY_URI).addDescendantsOf("blood_disorder", Snomed.SNOMED_CODE_SYSTEM_URI, "266992002", Snomed.SNOMED_HIERARCHY_URI).addDescendantsOf("disorder_history", Snomed.SNOMED_CODE_SYSTEM_URI, "312850006", Snomed.SNOMED_HIERARCHY_URI).build(spark, withGender, withLoincAndSnomed);
ValueSetUdfs.pushUdf(spark, valueSets);
Dataset<Observation> loincObservations = spark.createDataset(ImmutableList.of(// "is a" LP14419-3
observation("leukocytes", "5821-4"), // Blood pressure
observation("bp", "8462-4")), encoders.of(Observation.class));
loincObservations.createOrReplaceTempView("test_loinc_obs");
// Conditions include history of anemia, which includes a cycling ancestor
// in our test data. This ensures that can be loaded correctly.
Dataset<Condition> conditions = spark.createDataset(ImmutableList.of(// "is a" 73211009 (diabetes)
condition("diabetes", "44054006"), // 312850006 (history of disorder)
condition("history_of_anemia", "275538002")), encoders.of(Condition.class));
conditions.createOrReplaceTempView("test_snomed_cond");
Dataset<Patient> patients = spark.createDataset(ImmutableList.of(patient("married", "M"), patient("unmarried", "U")), encoders.of(Patient.class));
patients.createOrReplaceTempView("test_valueset_patient");
}
Aggregations