use of com.google.cloud.solutions.autotokenize.AutoTokenizeMessages.FlatRecord in project auto-data-tokenize by GoogleCloudPlatform.
the class ContactsFlatRecordSampleGenerator method buildContactRecords.
public ImmutableList<FlatRecord> buildContactRecords(int count) {
ImmutableList.Builder<FlatRecord> recordListBuilder = ImmutableList.builder();
for (int i = 0; i < count; i++) {
HashMap<String, Value> valuesMap = Maps.newHashMap();
HashMap<String, String> flatKeyMap = Maps.newHashMap();
valuesMap.put("$.name", Value.newBuilder().setStringValue(randomName()).build());
flatKeyMap.put("$.name", "$.name");
final int numbers = new Random().nextInt(10);
for (int n = 0; n < numbers; n++) {
String key = "$.contacts[" + n + "].contact.number";
flatKeyMap.put(key, "$.contacts.contact.number");
valuesMap.put(key, Value.newBuilder().setStringValue(randomPhoneNumber(10)).build());
}
final int emails = new Random().nextInt(5);
for (int n = 0; n < emails; n++) {
String key = "$.emails[" + n + "]";
flatKeyMap.put(key, "$.emails");
valuesMap.put(key, Value.newBuilder().setStringValue(randomName()).build());
}
recordListBuilder.add(flatRecordBuilder().putAllValues(valuesMap).putAllFlatKeySchema(flatKeyMap).build());
}
return recordListBuilder.build();
}
use of com.google.cloud.solutions.autotokenize.AutoTokenizeMessages.FlatRecord in project auto-data-tokenize by GoogleCloudPlatform.
the class BatchAndDlpDeIdRecordsTest method expand_valid.
@Test
public void expand_valid() {
ImmutableList<FlatRecord> expectedBase64EncodedContacts = TestResourceLoader.classPath().forProto(FlatRecord.class).loadAllTextFiles(ImmutableList.of("avro_records/contacts_schema/jane_doe_contact_number_base64_avro_record.textpb", "avro_records/contacts_schema/john_doe_contact_number_base64_avro_record.textpb"));
PCollection<FlatRecord> tokenizedRecords = testPipeline.apply(Create.of(CONTACT_RECORDS)).apply(BatchAndDlpDeIdRecords.withEncryptConfig(NUMBER_TOKENIZE_CONFIG).withDlpProjectId("dlp-test-project").withDlpRegion(testDlpRegion).withDlpClientFactory(new StubbingDlpClientFactory(new Base64EncodingDlpStub(PartialBatchAccumulator.RECORD_ID_COLUMN_NAME, ImmutableList.of("$.contacts.[\"contact\"].number"), "dlp-test-project", testDlpRegion))));
PAssert.that(tokenizedRecords).satisfies(new FlatRecordCheckerWithoutRecordIds(expectedBase64EncodedContacts));
testPipeline.run().waitUntilFinish();
}
use of com.google.cloud.solutions.autotokenize.AutoTokenizeMessages.FlatRecord in project auto-data-tokenize by GoogleCloudPlatform.
the class PartialBatchAccumulatorTest method batch_arrayFields_deidConfigContainsOnlyFieldReference.
@Test
public void batch_arrayFields_deidConfigContainsOnlyFieldReference() {
PartialBatchAccumulator accumulator = PartialBatchAccumulator.withConfig(DlpEncryptConfig.newBuilder().addTransforms(ColumnTransform.newBuilder().setColumnId("$.multi_level_arrays.simple_field1").setTransform(CRYPTO_UNWRAPPED_TRANSFORM)).addTransforms(ColumnTransform.newBuilder().setColumnId("$.multi_level_arrays.level1_array.level1_array_record.level2_simple_field").setTransform(CRYPTO_UNWRAPPED_TRANSFORM)).addTransforms(ColumnTransform.newBuilder().setColumnId("$.multi_level_arrays.level1_array.level1_array_record.level2_array").setTransform(CRYPTO_UNWRAPPED_TRANSFORM)).build());
FlatRecord record = RecordFlattener.forGenericRecord().flatten(TestResourceLoader.classPath().forAvro().withSchemaFile("avro_records/records_with_two_levels_of_arrays/two_level_arrays_schema.avsc").loadRecord("avro_records/records_with_two_levels_of_arrays/simple_two_level_array_record.json"));
accumulator.addElement(record.toBuilder().setRecordId(UUID.randomUUID().toString()).build());
BatchPartialColumnDlpTable batch = accumulator.makeBatch();
ImmutableList<FieldId> deidConfigTokenizeFields = batch.get().getDeidentifyConfig().getRecordTransformations().getFieldTransformationsList().stream().map(FieldTransformation::getFieldsList).flatMap(List::stream).collect(toImmutableList());
assertThat(deidConfigTokenizeFields).containsExactlyElementsIn(DeidentifyColumns.fieldIdsFor(ImmutableList.of("$.simple_field1", "$.level1_array.[\"level1_array_record\"].level2_simple_field.string", "$.level1_array.[\"level1_array_record\"].level2_array.string")));
}
use of com.google.cloud.solutions.autotokenize.AutoTokenizeMessages.FlatRecord in project auto-data-tokenize by GoogleCloudPlatform.
the class PartialBatchAccumulatorTest method batch_arrayFields_itemTableContainsFlattenedEntries.
@Test
public void batch_arrayFields_itemTableContainsFlattenedEntries() {
PartialBatchAccumulator accumulator = PartialBatchAccumulator.withConfig(DlpEncryptConfig.newBuilder().addTransforms(ColumnTransform.newBuilder().setColumnId("$.multi_level_arrays.simple_field1").setTransform(CRYPTO_UNWRAPPED_TRANSFORM)).addTransforms(ColumnTransform.newBuilder().setColumnId("$.multi_level_arrays.level1_array.level1_array_record.level2_simple_field").setTransform(CRYPTO_UNWRAPPED_TRANSFORM)).addTransforms(ColumnTransform.newBuilder().setColumnId("$.multi_level_arrays.level1_array.level1_array_record.level2_array").setTransform(CRYPTO_UNWRAPPED_TRANSFORM)).build());
FlatRecord record = RecordFlattener.forGenericRecord().flatten(TestResourceLoader.classPath().forAvro().withSchemaFile("avro_records/records_with_two_levels_of_arrays/two_level_arrays_schema.avsc").loadRecord("avro_records/records_with_two_levels_of_arrays/simple_two_level_array_record.json"));
accumulator.addElement(record.toBuilder().setRecordId(UUID.randomUUID().toString()).build());
BatchPartialColumnDlpTable batch = accumulator.makeBatch();
assertThat(batch.get().getTable().getHeadersList()).containsExactlyElementsIn(DeidentifyColumns.fieldIdsFor(ImmutableList.of("__AUTOTOKENIZE__RECORD_ID__", "$.simple_field1", "$.level1_array[0].[\"level1_array_record\"].level2_simple_field.string", "$.level1_array[1].[\"level1_array_record\"].level2_array[1].string", "$.level1_array[0].[\"level1_array_record\"].level2_array[0].string", "$.level1_array[0].[\"level1_array_record\"].level2_array[1].string", "$.level1_array[1].[\"level1_array_record\"].level2_simple_field.string", "$.level1_array[1].[\"level1_array_record\"].level2_array[0].string")));
}
use of com.google.cloud.solutions.autotokenize.AutoTokenizeMessages.FlatRecord in project auto-data-tokenize by GoogleCloudPlatform.
the class PartialBatchAccumulatorTest method addElement_exceedsSize_returnsFalse.
@Test
public void addElement_exceedsSize_returnsFalse() {
PartialBatchAccumulator accumulator = PartialBatchAccumulator.withConfig(NUMBER_TOKENIZE_CONFIG.toBuilder().addTransforms(ColumnTransform.newBuilder().setColumnId("$.name").setTransform(CRYPTO_UNWRAPPED_TRANSFORM).build()).build());
Value testValue = get1KByteString();
FlatRecord testRecord = FlatRecord.newBuilder().setRecordId("!24").putFlatKeySchema("$.name", "$.name").putValues("$.name", testValue).build();
// Fill the accumulator till its full.
while (accumulator.addElement(testRecord)) ;
assertThat(accumulator.addElement(testRecord)).isFalse();
assertThat(accumulator.makeBatch().get().getTable().getSerializedSize() + testValue.getSerializedSize()).isGreaterThan(PartialBatchAccumulator.MAX_DLP_PAYLOAD_SIZE_BYTES);
}
Aggregations