Search in sources :

Example 1 with Table

use of biblemulticonverter.schema.usx3.Table in project java-docs-samples by GoogleCloudPlatform.

the class DeIdentification method deidentifyWithDateShift.

// [END dlp_reidentify_fpe]
// [START dlp_deidentify_date_shift]
/**
 * @param inputCsvPath The path to the CSV file to deidentify
 * @param outputCsvPath (Optional) path to the output CSV file
 * @param dateFields The list of (date) fields in the CSV file to date shift
 * @param lowerBoundDays The maximum number of days to shift a date backward
 * @param upperBoundDays The maximum number of days to shift a date forward
 * @param contextFieldId (Optional) The column to determine date shift, default : a random shift
 *     amount
 * @param wrappedKey (Optional) The encrypted ('wrapped') AES-256 key to use when shifting dates
 * @param keyName (Optional) The name of the Cloud KMS key used to encrypt ('wrap') the AES-256
 *     key
 * @param projectId ID of Google Cloud project to run the API under.
 */
private static void deidentifyWithDateShift(Path inputCsvPath, Path outputCsvPath, String[] dateFields, int lowerBoundDays, int upperBoundDays, String contextFieldId, String wrappedKey, String keyName, String projectId) throws Exception {
    // instantiate a client
    try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
        // Set the maximum days to shift a day backward (lowerbound), forward (upperbound)
        DateShiftConfig.Builder dateShiftConfigBuilder = DateShiftConfig.newBuilder().setLowerBoundDays(lowerBoundDays).setUpperBoundDays(upperBoundDays);
        // If contextFieldId, keyName or wrappedKey is set: all three arguments must be valid
        if (contextFieldId != null && keyName != null && wrappedKey != null) {
            dateShiftConfigBuilder.setContext(FieldId.newBuilder().setName(contextFieldId).build());
            KmsWrappedCryptoKey kmsWrappedCryptoKey = KmsWrappedCryptoKey.newBuilder().setCryptoKeyName(keyName).setWrappedKey(ByteString.copyFrom(BaseEncoding.base64().decode(wrappedKey))).build();
            dateShiftConfigBuilder.setCryptoKey(CryptoKey.newBuilder().setKmsWrapped(kmsWrappedCryptoKey).build());
        } else if (contextFieldId != null || keyName != null || wrappedKey != null) {
            throw new IllegalArgumentException("You must set either ALL or NONE of {contextFieldId, keyName, wrappedKey}!");
        }
        // Read and parse the CSV file
        BufferedReader br = null;
        String line;
        List<Table.Row> rows = new ArrayList<>();
        List<FieldId> headers;
        br = new BufferedReader(new FileReader(inputCsvPath.toFile()));
        // convert csv header to FieldId
        headers = Arrays.stream(br.readLine().split(",")).map(header -> FieldId.newBuilder().setName(header).build()).collect(Collectors.toList());
        while ((line = br.readLine()) != null) {
            // convert csv rows to Table.Row
            rows.add(convertCsvRowToTableRow(line));
        }
        br.close();
        Table table = Table.newBuilder().addAllHeaders(headers).addAllRows(rows).build();
        List<FieldId> dateFieldIds = Arrays.stream(dateFields).map(field -> FieldId.newBuilder().setName(field).build()).collect(Collectors.toList());
        DateShiftConfig dateShiftConfig = dateShiftConfigBuilder.build();
        FieldTransformation fieldTransformation = FieldTransformation.newBuilder().addAllFields(dateFieldIds).setPrimitiveTransformation(PrimitiveTransformation.newBuilder().setDateShiftConfig(dateShiftConfig).build()).build();
        DeidentifyConfig deidentifyConfig = DeidentifyConfig.newBuilder().setRecordTransformations(RecordTransformations.newBuilder().addFieldTransformations(fieldTransformation).build()).build();
        ContentItem tableItem = ContentItem.newBuilder().setTable(table).build();
        DeidentifyContentRequest request = DeidentifyContentRequest.newBuilder().setParent(ProjectName.of(projectId).toString()).setDeidentifyConfig(deidentifyConfig).setItem(tableItem).build();
        // Execute the deidentification request
        DeidentifyContentResponse response = dlpServiceClient.deidentifyContent(request);
        // Write out the response as a CSV file
        List<FieldId> outputHeaderFields = response.getItem().getTable().getHeadersList();
        List<Table.Row> outputRows = response.getItem().getTable().getRowsList();
        List<String> outputHeaders = outputHeaderFields.stream().map(FieldId::getName).collect(Collectors.toList());
        File outputFile = outputCsvPath.toFile();
        if (!outputFile.exists()) {
            outputFile.createNewFile();
        }
        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(outputFile));
        // write out headers
        bufferedWriter.append(String.join(",", outputHeaders) + "\n");
        // write out each row
        for (Table.Row outputRow : outputRows) {
            String row = outputRow.getValuesList().stream().map(value -> value.getStringValue()).collect(Collectors.joining(","));
            bufferedWriter.append(row + "\n");
        }
        bufferedWriter.flush();
        bufferedWriter.close();
        System.out.println("Successfully saved date-shift output to: " + outputCsvPath.getFileName());
    } catch (Exception e) {
        System.out.println("Error in deidentifyWithDateShift: " + e.getMessage());
    }
}
Also used : Arrays(java.util.Arrays) Date(com.google.type.Date) CryptoKey(com.google.privacy.dlp.v2.CryptoKey) DefaultParser(org.apache.commons.cli.DefaultParser) Path(java.nio.file.Path) Value(com.google.privacy.dlp.v2.Value) DateShiftConfig(com.google.privacy.dlp.v2.DateShiftConfig) InfoTypeTransformation(com.google.privacy.dlp.v2.InfoTypeTransformations.InfoTypeTransformation) FieldTransformation(com.google.privacy.dlp.v2.FieldTransformation) ContentItem(com.google.privacy.dlp.v2.ContentItem) Collectors(java.util.stream.Collectors) ByteString(com.google.protobuf.ByteString) ReidentifyContentRequest(com.google.privacy.dlp.v2.ReidentifyContentRequest) DateTimeParseException(java.time.format.DateTimeParseException) List(java.util.List) ParseException(org.apache.commons.cli.ParseException) LocalDate(java.time.LocalDate) RecordTransformations(com.google.privacy.dlp.v2.RecordTransformations) FfxCommonNativeAlphabet(com.google.privacy.dlp.v2.CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet) CharacterMaskConfig(com.google.privacy.dlp.v2.CharacterMaskConfig) Options(org.apache.commons.cli.Options) KmsWrappedCryptoKey(com.google.privacy.dlp.v2.KmsWrappedCryptoKey) PrimitiveTransformation(com.google.privacy.dlp.v2.PrimitiveTransformation) HelpFormatter(org.apache.commons.cli.HelpFormatter) ArrayList(java.util.ArrayList) CryptoReplaceFfxFpeConfig(com.google.privacy.dlp.v2.CryptoReplaceFfxFpeConfig) ServiceOptions(com.google.cloud.ServiceOptions) DeidentifyConfig(com.google.privacy.dlp.v2.DeidentifyConfig) CommandLine(org.apache.commons.cli.CommandLine) FieldId(com.google.privacy.dlp.v2.FieldId) Option(org.apache.commons.cli.Option) DeidentifyContentResponse(com.google.privacy.dlp.v2.DeidentifyContentResponse) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) InfoTypeTransformations(com.google.privacy.dlp.v2.InfoTypeTransformations) BaseEncoding(com.google.common.io.BaseEncoding) CommandLineParser(org.apache.commons.cli.CommandLineParser) BufferedWriter(java.io.BufferedWriter) Table(com.google.privacy.dlp.v2.Table) FileWriter(java.io.FileWriter) ReidentifyContentResponse(com.google.privacy.dlp.v2.ReidentifyContentResponse) SurrogateType(com.google.privacy.dlp.v2.CustomInfoType.SurrogateType) InfoType(com.google.privacy.dlp.v2.InfoType) DeidentifyContentRequest(com.google.privacy.dlp.v2.DeidentifyContentRequest) File(java.io.File) InspectConfig(com.google.privacy.dlp.v2.InspectConfig) ProjectName(com.google.privacy.dlp.v2.ProjectName) Paths(java.nio.file.Paths) OptionGroup(org.apache.commons.cli.OptionGroup) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) CustomInfoType(com.google.privacy.dlp.v2.CustomInfoType) FileWriter(java.io.FileWriter) ArrayList(java.util.ArrayList) ByteString(com.google.protobuf.ByteString) BufferedWriter(java.io.BufferedWriter) DeidentifyConfig(com.google.privacy.dlp.v2.DeidentifyConfig) FileReader(java.io.FileReader) FieldTransformation(com.google.privacy.dlp.v2.FieldTransformation) DeidentifyContentRequest(com.google.privacy.dlp.v2.DeidentifyContentRequest) Table(com.google.privacy.dlp.v2.Table) DateTimeParseException(java.time.format.DateTimeParseException) ParseException(org.apache.commons.cli.ParseException) DateShiftConfig(com.google.privacy.dlp.v2.DateShiftConfig) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) FieldId(com.google.privacy.dlp.v2.FieldId) BufferedReader(java.io.BufferedReader) KmsWrappedCryptoKey(com.google.privacy.dlp.v2.KmsWrappedCryptoKey) File(java.io.File) ContentItem(com.google.privacy.dlp.v2.ContentItem) DeidentifyContentResponse(com.google.privacy.dlp.v2.DeidentifyContentResponse)

Example 2 with Table

use of biblemulticonverter.schema.usx3.Table in project beam by apache.

the class BigtableWriteIT method testE2EBigtableWrite.

@Test
public void testE2EBigtableWrite() throws Exception {
    final String tableName = bigtableOptions.getInstanceName().toTableNameStr(tableId);
    final String instanceName = bigtableOptions.getInstanceName().toString();
    final int numRows = 1000;
    final List<KV<ByteString, ByteString>> testData = generateTableData(numRows);
    createEmptyTable(instanceName, tableId);
    Pipeline p = Pipeline.create(options);
    p.apply(GenerateSequence.from(0).to(numRows)).apply(ParDo.of(new DoFn<Long, KV<ByteString, Iterable<Mutation>>>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            int index = c.element().intValue();
            Iterable<Mutation> mutations = ImmutableList.of(Mutation.newBuilder().setSetCell(Mutation.SetCell.newBuilder().setValue(testData.get(index).getValue()).setFamilyName(COLUMN_FAMILY_NAME)).build());
            c.output(KV.of(testData.get(index).getKey(), mutations));
        }
    })).apply(BigtableIO.write().withBigtableOptions(bigtableOptions).withTableId(tableId));
    p.run();
    // Test number of column families and column family name equality
    Table table = getTable(tableName);
    assertThat(table.getColumnFamiliesMap().keySet(), Matchers.hasSize(1));
    assertThat(table.getColumnFamiliesMap(), Matchers.hasKey(COLUMN_FAMILY_NAME));
    // Test table data equality
    List<KV<ByteString, ByteString>> tableData = getTableData(tableName);
    assertThat(tableData, Matchers.containsInAnyOrder(testData.toArray()));
}
Also used : Table(com.google.bigtable.admin.v2.Table) ByteString(com.google.protobuf.ByteString) KV(org.apache.beam.sdk.values.KV) Mutation(com.google.bigtable.v2.Mutation) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 3 with Table

use of biblemulticonverter.schema.usx3.Table in project BibleMultiConverter by schierlm.

the class USX3 method doImportBook.

@Override
protected ParatextBook doImportBook(File inputFile) throws Exception {
    if (!inputFile.getName().toLowerCase().endsWith(".usx"))
        return null;
    ValidateXML.validateFileBeforeParsing(getSchema(), inputFile);
    JAXBContext ctx = JAXBContext.newInstance(ObjectFactory.class.getPackage().getName());
    XMLInputFactory xif = XMLInputFactory.newFactory();
    XMLStreamReader xsr = xif.createXMLStreamReader(new FileInputStream(inputFile));
    Unmarshaller u = ctx.createUnmarshaller();
    u.setListener(unmarshallerLocationListener);
    unmarshallerLocationListener.setXMLStreamReader(inputFile.getName(), xsr);
    Usx doc = (Usx) u.unmarshal(xsr);
    xsr.close();
    ParatextBook.ParatextID id = ParatextBook.ParatextID.fromIdentifier(doc.getBook().getCode().toUpperCase());
    if (id == null) {
        System.out.println("WARNING: Skipping book with unknown ID: " + doc.getBook().getCode());
        return null;
    }
    ParatextBook result = new ParatextBook(id, doc.getBook().getContent());
    ParatextCharacterContent charContent = null;
    for (Object o : doc.getParaOrTableOrChapter()) {
        if (o instanceof Para) {
            Para para = (Para) o;
            if (BOOK_HEADER_ATTRIBUTE_TAGS.contains(para.getStyle().value())) {
                String value = "";
                for (Object oo : para.getContent()) {
                    if (oo instanceof String) {
                        value += ((String) oo).replaceAll("[ \r\n\t]+", " ");
                    } else {
                        throw new RuntimeException("Unsupported content in attribute: " + oo.getClass());
                    }
                }
                result.getAttributes().put(para.getStyle().value(), value);
                charContent = null;
            } else if (para.getStyle() == ParaStyle.PB) {
                if (charContent == null) {
                    charContent = new ParatextCharacterContent();
                    result.getContent().add(charContent);
                }
                charContent.getContent().add(new ParatextCharacterContent.AutoClosingFormatting(ParatextCharacterContent.AutoClosingFormattingKind.PAGE_BREAK, false));
            } else if (PARA_STYLE_UNSUPPORTED.contains(para.getStyle())) {
                // skip
                charContent = null;
            } else {
                result.getContent().add(new ParatextBook.ParagraphStart(PARA_STYLE_MAP.get(para.getStyle())));
                charContent = null;
                if (!para.getContent().isEmpty()) {
                    charContent = new ParatextCharacterContent();
                    result.getContent().add(charContent);
                    parseCharContent(para.getContent(), charContent);
                }
            }
        } else if (o instanceof Table) {
            Table table = (Table) o;
            for (Row row : table.getRow()) {
                result.getContent().add(new ParatextBook.ParagraphStart(ParatextBook.ParagraphKind.TABLE_ROW));
                for (Object oo : row.getVerseOrCell()) {
                    if (oo instanceof Verse) {
                        Verse verse = (Verse) oo;
                        ParatextCharacterContent.ParatextCharacterContentPart verseStartOrEnd = handleVerse(verse);
                        charContent = new ParatextCharacterContent();
                        result.getContent().add(charContent);
                        charContent.getContent().add(verseStartOrEnd);
                    } else if (oo instanceof Cell) {
                        Cell cell = (Cell) oo;
                        result.getContent().add(new ParatextBook.TableCellStart(cell.getStyle().value()));
                        charContent = new ParatextCharacterContent();
                        result.getContent().add(charContent);
                        parseCharContent(cell.getContent(), charContent);
                    } else {
                        throw new IOException("Unsupported table row element: " + o.getClass().getName());
                    }
                }
            }
            charContent = null;
        } else if (o instanceof Chapter) {
            Chapter chapter = (Chapter) o;
            if (chapter.getSid() != null) {
                // Assume start chapter
                result.getContent().add(new ParatextBook.ChapterStart(new ChapterIdentifier(result.getId(), ((Chapter) o).getNumber().intValue())));
            } else if (chapter.getEid() != null) {
                // Assume end chapter
                ChapterIdentifier location = ChapterIdentifier.fromLocationString(chapter.getEid());
                if (location == null) {
                    throw new IOException("Invalid chapter eid found: " + chapter.getEid());
                }
                result.getContent().add(new ParatextBook.ChapterEnd(location));
            } else {
                throw new IOException("Invalid chapter found, both sid and eid are undefined: " + chapter);
            }
            charContent = null;
        } else if (o instanceof Note) {
            if (charContent == null) {
                charContent = new ParatextCharacterContent();
                result.getContent().add(charContent);
            }
            Note note = (Note) o;
            ParatextCharacterContent.FootnoteXref nx = new ParatextCharacterContent.FootnoteXref(NOTE_STYLE_MAP.get(note.getStyle()), note.getCaller());
            charContent.getContent().add(nx);
            parseCharContent(note.getContent(), nx);
        } else if (o instanceof Sidebar) {
            System.out.println("WARNING: Skipping sidebar (study bible content)");
            charContent = null;
        } else {
            throw new IOException("Unsupported book level element: " + o.getClass().getName());
        }
    }
    return result;
}
Also used : XMLStreamReader(javax.xml.stream.XMLStreamReader) JAXBContext(javax.xml.bind.JAXBContext) Unmarshaller(javax.xml.bind.Unmarshaller) Cell(biblemulticonverter.schema.usx3.Cell) Table(biblemulticonverter.schema.usx3.Table) Para(biblemulticonverter.schema.usx3.Para) Chapter(biblemulticonverter.schema.usx3.Chapter) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Note(biblemulticonverter.schema.usx3.Note) Usx(biblemulticonverter.schema.usx3.Usx) Row(biblemulticonverter.schema.usx3.Row) ChapterIdentifier(biblemulticonverter.format.paratext.model.ChapterIdentifier) XMLInputFactory(javax.xml.stream.XMLInputFactory) Verse(biblemulticonverter.schema.usx3.Verse) Sidebar(biblemulticonverter.schema.usx3.Sidebar)

Example 4 with Table

use of biblemulticonverter.schema.usx3.Table in project BibleMultiConverter by schierlm.

the class USX3 method doExportBook.

@Override
protected void doExportBook(ParatextBook book, File outFile) throws Exception {
    ObjectFactory of = new ObjectFactory();
    Usx usx = of.createUsx();
    usx.setVersion("3.0");
    usx.setBook(of.createBook());
    usx.getBook().setStyle("id");
    usx.getBook().setCode(book.getId().getIdentifier());
    usx.getBook().setContent(book.getBibleName());
    for (Map.Entry<String, String> attr : book.getAttributes().entrySet()) {
        Para para = new Para();
        para.setStyle(ParaStyle.fromValue(attr.getKey()));
        para.getContent().add(attr.getValue());
        usx.getParaOrTableOrChapter().add(para);
    }
    book.accept(new ParatextBook.ParatextBookContentVisitor<IOException>() {

        List<Object> currentContent = null;

        Table currentTable = null;

        @Override
        public void visitChapterStart(ChapterIdentifier location) throws IOException {
            Chapter ch = new Chapter();
            ch.setStyle("c");
            ch.setSid(location.toString());
            ch.setNumber(BigInteger.valueOf(location.chapter));
            usx.getParaOrTableOrChapter().add(ch);
            currentContent = null;
            currentTable = null;
        }

        @Override
        public void visitChapterEnd(ChapterIdentifier location) throws IOException {
            Chapter ch = new Chapter();
            ch.setEid(location.toString());
            usx.getParaOrTableOrChapter().add(ch);
            currentContent = null;
            currentTable = null;
        }

        @Override
        public void visitParagraphStart(ParatextBook.ParagraphKind kind) throws IOException {
            if (kind == ParatextBook.ParagraphKind.TABLE_ROW) {
                if (currentTable == null) {
                    currentTable = new Table();
                    usx.getParaOrTableOrChapter().add(currentTable);
                }
                Row row = new Row();
                row.setStyle("tr");
                currentTable.getRow().add(row);
                currentContent = currentTable.getRow().get(currentTable.getRow().size() - 1).getVerseOrCell();
            } else {
                Para para = new Para();
                para.setStyle(PARA_KIND_MAP.get(kind));
                usx.getParaOrTableOrChapter().add(para);
                currentContent = para.getContent();
                currentTable = null;
            }
        }

        @Override
        public void visitTableCellStart(String tag) throws IOException {
            if (currentTable == null) {
                System.out.println("WARNING: Table cell outside of table");
                return;
            }
            Row currentRow = currentTable.getRow().get(currentTable.getRow().size() - 1);
            Cell cell = new Cell();
            cell.setAlign(tag.contains("r") ? CellAlign.END : CellAlign.START);
            cell.setStyle(CellStyle.fromValue(tag));
            currentRow.getVerseOrCell().add(cell);
            currentContent = cell.getContent();
        }

        @Override
        public void visitParatextCharacterContent(ParatextCharacterContent content) throws IOException {
            if (currentContent == null)
                visitParagraphStart(ParatextBook.ParagraphKind.PARAGRAPH_P);
            content.accept(new USX3.USXCharacterContentVisitor(currentContent));
        }
    });
    JAXBContext ctx = JAXBContext.newInstance(ObjectFactory.class.getPackage().getName());
    Marshaller m = ctx.createMarshaller();
    if (!Boolean.getBoolean("biblemulticonverter.skipxmlvalidation"))
        m.setSchema(getSchema());
    m.marshal(usx, new UnifiedScriptureXMLWriter(new FileWriter(outFile), "UTF-8"));
}
Also used : Marshaller(javax.xml.bind.Marshaller) Table(biblemulticonverter.schema.usx3.Table) Para(biblemulticonverter.schema.usx3.Para) UnifiedScriptureXMLWriter(biblemulticonverter.format.paratext.utilities.UnifiedScriptureXMLWriter) FileWriter(java.io.FileWriter) Chapter(biblemulticonverter.schema.usx3.Chapter) JAXBContext(javax.xml.bind.JAXBContext) IOException(java.io.IOException) ObjectFactory(biblemulticonverter.schema.usx3.ObjectFactory) Usx(biblemulticonverter.schema.usx3.Usx) Row(biblemulticonverter.schema.usx3.Row) ChapterIdentifier(biblemulticonverter.format.paratext.model.ChapterIdentifier) Map(java.util.Map) EnumMap(java.util.EnumMap) Cell(biblemulticonverter.schema.usx3.Cell)

Aggregations

ChapterIdentifier (biblemulticonverter.format.paratext.model.ChapterIdentifier)2 Cell (biblemulticonverter.schema.usx3.Cell)2 Chapter (biblemulticonverter.schema.usx3.Chapter)2 Para (biblemulticonverter.schema.usx3.Para)2 Row (biblemulticonverter.schema.usx3.Row)2 Table (biblemulticonverter.schema.usx3.Table)2 Usx (biblemulticonverter.schema.usx3.Usx)2 ByteString (com.google.protobuf.ByteString)2 FileWriter (java.io.FileWriter)2 IOException (java.io.IOException)2 JAXBContext (javax.xml.bind.JAXBContext)2 UnifiedScriptureXMLWriter (biblemulticonverter.format.paratext.utilities.UnifiedScriptureXMLWriter)1 Note (biblemulticonverter.schema.usx3.Note)1 ObjectFactory (biblemulticonverter.schema.usx3.ObjectFactory)1 Sidebar (biblemulticonverter.schema.usx3.Sidebar)1 Verse (biblemulticonverter.schema.usx3.Verse)1 Table (com.google.bigtable.admin.v2.Table)1 Mutation (com.google.bigtable.v2.Mutation)1 ServiceOptions (com.google.cloud.ServiceOptions)1 DlpServiceClient (com.google.cloud.dlp.v2.DlpServiceClient)1