use of com.cloudera.cdk.data.DatasetRepository in project cdk-examples by cloudera.
the class CreateHCatalogUserDatasetGeneric method run.
@Override
public int run(String[] args) throws Exception {
// Construct an HCatalog dataset repository using managed Hive tables
DatasetRepository repo = DatasetRepositories.open("repo:hive");
// Create a dataset of users with the Avro schema in the repository
DatasetDescriptor descriptor = new DatasetDescriptor.Builder().schemaUri("resource:user.avsc").build();
Dataset<GenericRecord> users = repo.create("users", descriptor);
// Get a writer for the dataset and write some users to it
DatasetWriter<GenericRecord> writer = users.newWriter();
try {
writer.open();
String[] colors = { "green", "blue", "pink", "brown", "yellow" };
Random rand = new Random();
GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
for (int i = 0; i < 100; i++) {
GenericRecord record = builder.set("username", "user-" + i).set("creationDate", System.currentTimeMillis()).set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
writer.write(record);
}
} finally {
writer.close();
}
return 0;
}
use of com.cloudera.cdk.data.DatasetRepository in project cdk-examples by cloudera.
the class CreateProductDatasetPojo method run.
@Override
public int run(String[] args) throws Exception {
// Construct a filesystem dataset repository rooted at /tmp/data
DatasetRepository repo = DatasetRepositories.open("repo:hdfs:/tmp/data");
// Create a dataset of products with the Avro schema in the repository
DatasetDescriptor descriptor = new DatasetDescriptor.Builder().schema(Product.class).build();
Dataset<Product> products = repo.create("products", descriptor);
// Get a writer for the dataset and write some products to it
DatasetWriter<Product> writer = products.newWriter();
try {
writer.open();
String[] names = { "toaster", "teapot", "butter dish" };
int i = 0;
for (String name : names) {
Product product = new Product();
product.setName(name);
product.setId(i++);
writer.write(product);
}
} finally {
writer.close();
}
return 0;
}
use of com.cloudera.cdk.data.DatasetRepository in project cdk-examples by cloudera.
the class DeleteHCatalogUserDataset method run.
@Override
public int run(String[] args) throws Exception {
// Construct an HCatalog dataset repository using managed Hive tables
DatasetRepository repo = DatasetRepositories.open("repo:hive");
// Delete the users dataset
boolean success = repo.delete("users");
return success ? 0 : 1;
}
use of com.cloudera.cdk.data.DatasetRepository in project cdk-examples by cloudera.
the class DeleteProductDataset method run.
@Override
public int run(String[] args) throws Exception {
// Construct a filesystem dataset repository rooted at /tmp/data
DatasetRepository repo = DatasetRepositories.open("repo:hdfs:/tmp/data");
// Delete the products dataset
boolean success = repo.delete("products");
return success ? 0 : 1;
}
use of com.cloudera.cdk.data.DatasetRepository in project cdk-examples by cloudera.
the class DeleteUserDataset method run.
@Override
public int run(String[] args) throws Exception {
// Construct a filesystem dataset repository rooted at /tmp/data
DatasetRepository repo = DatasetRepositories.open("repo:hdfs:/tmp/data");
// Delete the users dataset
boolean success = repo.delete("users");
return success ? 0 : 1;
}
Aggregations