Search in sources :

Example 1 with Table

use of org.molgenis.emx2.Table in project beam by apache.

the class BigtableWriteIT method testE2EBigtableWrite.

@Test
public void testE2EBigtableWrite() throws Exception {
    final String tableName = bigtableOptions.getInstanceName().toTableNameStr(tableId);
    final String instanceName = bigtableOptions.getInstanceName().toString();
    final int numRows = 1000;
    final List<KV<ByteString, ByteString>> testData = generateTableData(numRows);
    createEmptyTable(instanceName, tableId);
    Pipeline p = Pipeline.create(options);
    p.apply(GenerateSequence.from(0).to(numRows)).apply(ParDo.of(new DoFn<Long, KV<ByteString, Iterable<Mutation>>>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            int index = c.element().intValue();
            Iterable<Mutation> mutations = ImmutableList.of(Mutation.newBuilder().setSetCell(Mutation.SetCell.newBuilder().setValue(testData.get(index).getValue()).setFamilyName(COLUMN_FAMILY_NAME)).build());
            c.output(KV.of(testData.get(index).getKey(), mutations));
        }
    })).apply(BigtableIO.write().withBigtableOptions(bigtableOptions).withTableId(tableId));
    p.run();
    // Test number of column families and column family name equality
    Table table = getTable(tableName);
    assertThat(table.getColumnFamiliesMap().keySet(), Matchers.hasSize(1));
    assertThat(table.getColumnFamiliesMap(), Matchers.hasKey(COLUMN_FAMILY_NAME));
    // Test table data equality
    List<KV<ByteString, ByteString>> tableData = getTableData(tableName);
    assertThat(tableData, Matchers.containsInAnyOrder(testData.toArray()));
}
Also used : Table(com.google.bigtable.admin.v2.Table) ByteString(com.google.protobuf.ByteString) KV(org.apache.beam.sdk.values.KV) Mutation(com.google.bigtable.v2.Mutation) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 2 with Table

use of org.molgenis.emx2.Table in project BibleMultiConverter by schierlm.

the class USX3 method doImportBook.

@Override
protected ParatextBook doImportBook(File inputFile) throws Exception {
    if (!inputFile.getName().toLowerCase().endsWith(".usx"))
        return null;
    ValidateXML.validateFileBeforeParsing(getSchema(), inputFile);
    JAXBContext ctx = JAXBContext.newInstance(ObjectFactory.class.getPackage().getName());
    XMLInputFactory xif = XMLInputFactory.newFactory();
    XMLStreamReader xsr = xif.createXMLStreamReader(new FileInputStream(inputFile));
    Unmarshaller u = ctx.createUnmarshaller();
    u.setListener(unmarshallerLocationListener);
    unmarshallerLocationListener.setXMLStreamReader(inputFile.getName(), xsr);
    Usx doc = (Usx) u.unmarshal(xsr);
    xsr.close();
    ParatextBook.ParatextID id = ParatextBook.ParatextID.fromIdentifier(doc.getBook().getCode().toUpperCase());
    if (id == null) {
        System.out.println("WARNING: Skipping book with unknown ID: " + doc.getBook().getCode());
        return null;
    }
    ParatextBook result = new ParatextBook(id, doc.getBook().getContent());
    ParatextCharacterContent charContent = null;
    for (Object o : doc.getParaOrTableOrChapter()) {
        if (o instanceof Para) {
            Para para = (Para) o;
            if (BOOK_HEADER_ATTRIBUTE_TAGS.contains(para.getStyle().value())) {
                String value = "";
                for (Object oo : para.getContent()) {
                    if (oo instanceof String) {
                        value += ((String) oo).replaceAll("[ \r\n\t]+", " ");
                    } else {
                        throw new RuntimeException("Unsupported content in attribute: " + oo.getClass());
                    }
                }
                result.getAttributes().put(para.getStyle().value(), value);
                charContent = null;
            } else if (para.getStyle() == ParaStyle.PB) {
                if (charContent == null) {
                    charContent = new ParatextCharacterContent();
                    result.getContent().add(charContent);
                }
                charContent.getContent().add(new ParatextCharacterContent.AutoClosingFormatting(ParatextCharacterContent.AutoClosingFormattingKind.PAGE_BREAK, false));
            } else if (PARA_STYLE_UNSUPPORTED.contains(para.getStyle())) {
                // skip
                charContent = null;
            } else {
                result.getContent().add(new ParatextBook.ParagraphStart(PARA_STYLE_MAP.get(para.getStyle())));
                charContent = null;
                if (!para.getContent().isEmpty()) {
                    charContent = new ParatextCharacterContent();
                    result.getContent().add(charContent);
                    parseCharContent(para.getContent(), charContent);
                }
            }
        } else if (o instanceof Table) {
            Table table = (Table) o;
            for (Row row : table.getRow()) {
                result.getContent().add(new ParatextBook.ParagraphStart(ParatextBook.ParagraphKind.TABLE_ROW));
                for (Object oo : row.getVerseOrCell()) {
                    if (oo instanceof Verse) {
                        Verse verse = (Verse) oo;
                        ParatextCharacterContent.ParatextCharacterContentPart verseStartOrEnd = handleVerse(verse);
                        charContent = new ParatextCharacterContent();
                        result.getContent().add(charContent);
                        charContent.getContent().add(verseStartOrEnd);
                    } else if (oo instanceof Cell) {
                        Cell cell = (Cell) oo;
                        result.getContent().add(new ParatextBook.TableCellStart(cell.getStyle().value()));
                        charContent = new ParatextCharacterContent();
                        result.getContent().add(charContent);
                        parseCharContent(cell.getContent(), charContent);
                    } else {
                        throw new IOException("Unsupported table row element: " + o.getClass().getName());
                    }
                }
            }
            charContent = null;
        } else if (o instanceof Chapter) {
            Chapter chapter = (Chapter) o;
            if (chapter.getSid() != null) {
                // Assume start chapter
                result.getContent().add(new ParatextBook.ChapterStart(new ChapterIdentifier(result.getId(), ((Chapter) o).getNumber().intValue())));
            } else if (chapter.getEid() != null) {
                // Assume end chapter
                ChapterIdentifier location = ChapterIdentifier.fromLocationString(chapter.getEid());
                if (location == null) {
                    throw new IOException("Invalid chapter eid found: " + chapter.getEid());
                }
                result.getContent().add(new ParatextBook.ChapterEnd(location));
            } else {
                throw new IOException("Invalid chapter found, both sid and eid are undefined: " + chapter);
            }
            charContent = null;
        } else if (o instanceof Note) {
            if (charContent == null) {
                charContent = new ParatextCharacterContent();
                result.getContent().add(charContent);
            }
            Note note = (Note) o;
            ParatextCharacterContent.FootnoteXref nx = new ParatextCharacterContent.FootnoteXref(NOTE_STYLE_MAP.get(note.getStyle()), note.getCaller());
            charContent.getContent().add(nx);
            parseCharContent(note.getContent(), nx);
        } else if (o instanceof Sidebar) {
            System.out.println("WARNING: Skipping sidebar (study bible content)");
            charContent = null;
        } else {
            throw new IOException("Unsupported book level element: " + o.getClass().getName());
        }
    }
    return result;
}
Also used : XMLStreamReader(javax.xml.stream.XMLStreamReader) JAXBContext(javax.xml.bind.JAXBContext) Unmarshaller(javax.xml.bind.Unmarshaller) Cell(biblemulticonverter.schema.usx3.Cell) Table(biblemulticonverter.schema.usx3.Table) Para(biblemulticonverter.schema.usx3.Para) Chapter(biblemulticonverter.schema.usx3.Chapter) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Note(biblemulticonverter.schema.usx3.Note) Usx(biblemulticonverter.schema.usx3.Usx) Row(biblemulticonverter.schema.usx3.Row) ChapterIdentifier(biblemulticonverter.format.paratext.model.ChapterIdentifier) XMLInputFactory(javax.xml.stream.XMLInputFactory) Verse(biblemulticonverter.schema.usx3.Verse) Sidebar(biblemulticonverter.schema.usx3.Sidebar)

Example 3 with Table

use of org.molgenis.emx2.Table in project BibleMultiConverter by schierlm.

the class USX3 method doExportBook.

@Override
protected void doExportBook(ParatextBook book, File outFile) throws Exception {
    ObjectFactory of = new ObjectFactory();
    Usx usx = of.createUsx();
    usx.setVersion("3.0");
    usx.setBook(of.createBook());
    usx.getBook().setStyle("id");
    usx.getBook().setCode(book.getId().getIdentifier());
    usx.getBook().setContent(book.getBibleName());
    for (Map.Entry<String, String> attr : book.getAttributes().entrySet()) {
        Para para = new Para();
        para.setStyle(ParaStyle.fromValue(attr.getKey()));
        para.getContent().add(attr.getValue());
        usx.getParaOrTableOrChapter().add(para);
    }
    book.accept(new ParatextBook.ParatextBookContentVisitor<IOException>() {

        List<Object> currentContent = null;

        Table currentTable = null;

        @Override
        public void visitChapterStart(ChapterIdentifier location) throws IOException {
            Chapter ch = new Chapter();
            ch.setStyle("c");
            ch.setSid(location.toString());
            ch.setNumber(BigInteger.valueOf(location.chapter));
            usx.getParaOrTableOrChapter().add(ch);
            currentContent = null;
            currentTable = null;
        }

        @Override
        public void visitChapterEnd(ChapterIdentifier location) throws IOException {
            Chapter ch = new Chapter();
            ch.setEid(location.toString());
            usx.getParaOrTableOrChapter().add(ch);
            currentContent = null;
            currentTable = null;
        }

        @Override
        public void visitParagraphStart(ParatextBook.ParagraphKind kind) throws IOException {
            if (kind == ParatextBook.ParagraphKind.TABLE_ROW) {
                if (currentTable == null) {
                    currentTable = new Table();
                    usx.getParaOrTableOrChapter().add(currentTable);
                }
                Row row = new Row();
                row.setStyle("tr");
                currentTable.getRow().add(row);
                currentContent = currentTable.getRow().get(currentTable.getRow().size() - 1).getVerseOrCell();
            } else {
                Para para = new Para();
                para.setStyle(PARA_KIND_MAP.get(kind));
                usx.getParaOrTableOrChapter().add(para);
                currentContent = para.getContent();
                currentTable = null;
            }
        }

        @Override
        public void visitTableCellStart(String tag) throws IOException {
            if (currentTable == null) {
                System.out.println("WARNING: Table cell outside of table");
                return;
            }
            Row currentRow = currentTable.getRow().get(currentTable.getRow().size() - 1);
            Cell cell = new Cell();
            cell.setAlign(tag.contains("r") ? CellAlign.END : CellAlign.START);
            cell.setStyle(CellStyle.fromValue(tag));
            currentRow.getVerseOrCell().add(cell);
            currentContent = cell.getContent();
        }

        @Override
        public void visitParatextCharacterContent(ParatextCharacterContent content) throws IOException {
            if (currentContent == null)
                visitParagraphStart(ParatextBook.ParagraphKind.PARAGRAPH_P);
            content.accept(new USX3.USXCharacterContentVisitor(currentContent));
        }
    });
    JAXBContext ctx = JAXBContext.newInstance(ObjectFactory.class.getPackage().getName());
    Marshaller m = ctx.createMarshaller();
    if (!Boolean.getBoolean("biblemulticonverter.skipxmlvalidation"))
        m.setSchema(getSchema());
    m.marshal(usx, new UnifiedScriptureXMLWriter(new FileWriter(outFile), "UTF-8"));
}
Also used : Marshaller(javax.xml.bind.Marshaller) Table(biblemulticonverter.schema.usx3.Table) Para(biblemulticonverter.schema.usx3.Para) UnifiedScriptureXMLWriter(biblemulticonverter.format.paratext.utilities.UnifiedScriptureXMLWriter) FileWriter(java.io.FileWriter) Chapter(biblemulticonverter.schema.usx3.Chapter) JAXBContext(javax.xml.bind.JAXBContext) IOException(java.io.IOException) ObjectFactory(biblemulticonverter.schema.usx3.ObjectFactory) Usx(biblemulticonverter.schema.usx3.Usx) Row(biblemulticonverter.schema.usx3.Row) ChapterIdentifier(biblemulticonverter.format.paratext.model.ChapterIdentifier) Map(java.util.Map) EnumMap(java.util.EnumMap) Cell(biblemulticonverter.schema.usx3.Cell)

Example 4 with Table

use of org.molgenis.emx2.Table in project java-docs-samples by GoogleCloudPlatform.

the class DeIdentification method deidentifyWithDateShift.

// [END dlp_reidentify_fpe]
// [START dlp_deidentify_date_shift]
/**
 * @param inputCsvPath The path to the CSV file to deidentify
 * @param outputCsvPath (Optional) path to the output CSV file
 * @param dateFields The list of (date) fields in the CSV file to date shift
 * @param lowerBoundDays The maximum number of days to shift a date backward
 * @param upperBoundDays The maximum number of days to shift a date forward
 * @param contextFieldId (Optional) The column to determine date shift, default : a random shift
 *     amount
 * @param wrappedKey (Optional) The encrypted ('wrapped') AES-256 key to use when shifting dates
 * @param keyName (Optional) The name of the Cloud KMS key used to encrypt ('wrap') the AES-256
 *     key
 * @param projectId ID of Google Cloud project to run the API under.
 */
private static void deidentifyWithDateShift(Path inputCsvPath, Path outputCsvPath, String[] dateFields, int lowerBoundDays, int upperBoundDays, String contextFieldId, String wrappedKey, String keyName, String projectId) throws Exception {
    // instantiate a client
    try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
        // Set the maximum days to shift a day backward (lowerbound), forward (upperbound)
        DateShiftConfig.Builder dateShiftConfigBuilder = DateShiftConfig.newBuilder().setLowerBoundDays(lowerBoundDays).setUpperBoundDays(upperBoundDays);
        // If contextFieldId, keyName or wrappedKey is set: all three arguments must be valid
        if (contextFieldId != null && keyName != null && wrappedKey != null) {
            dateShiftConfigBuilder.setContext(FieldId.newBuilder().setName(contextFieldId).build());
            KmsWrappedCryptoKey kmsWrappedCryptoKey = KmsWrappedCryptoKey.newBuilder().setCryptoKeyName(keyName).setWrappedKey(ByteString.copyFrom(BaseEncoding.base64().decode(wrappedKey))).build();
            dateShiftConfigBuilder.setCryptoKey(CryptoKey.newBuilder().setKmsWrapped(kmsWrappedCryptoKey).build());
        } else if (contextFieldId != null || keyName != null || wrappedKey != null) {
            throw new IllegalArgumentException("You must set either ALL or NONE of {contextFieldId, keyName, wrappedKey}!");
        }
        // Read and parse the CSV file
        BufferedReader br = null;
        String line;
        List<Table.Row> rows = new ArrayList<>();
        List<FieldId> headers;
        br = new BufferedReader(new FileReader(inputCsvPath.toFile()));
        // convert csv header to FieldId
        headers = Arrays.stream(br.readLine().split(",")).map(header -> FieldId.newBuilder().setName(header).build()).collect(Collectors.toList());
        while ((line = br.readLine()) != null) {
            // convert csv rows to Table.Row
            rows.add(convertCsvRowToTableRow(line));
        }
        br.close();
        Table table = Table.newBuilder().addAllHeaders(headers).addAllRows(rows).build();
        List<FieldId> dateFieldIds = Arrays.stream(dateFields).map(field -> FieldId.newBuilder().setName(field).build()).collect(Collectors.toList());
        DateShiftConfig dateShiftConfig = dateShiftConfigBuilder.build();
        FieldTransformation fieldTransformation = FieldTransformation.newBuilder().addAllFields(dateFieldIds).setPrimitiveTransformation(PrimitiveTransformation.newBuilder().setDateShiftConfig(dateShiftConfig).build()).build();
        DeidentifyConfig deidentifyConfig = DeidentifyConfig.newBuilder().setRecordTransformations(RecordTransformations.newBuilder().addFieldTransformations(fieldTransformation).build()).build();
        ContentItem tableItem = ContentItem.newBuilder().setTable(table).build();
        DeidentifyContentRequest request = DeidentifyContentRequest.newBuilder().setParent(ProjectName.of(projectId).toString()).setDeidentifyConfig(deidentifyConfig).setItem(tableItem).build();
        // Execute the deidentification request
        DeidentifyContentResponse response = dlpServiceClient.deidentifyContent(request);
        // Write out the response as a CSV file
        List<FieldId> outputHeaderFields = response.getItem().getTable().getHeadersList();
        List<Table.Row> outputRows = response.getItem().getTable().getRowsList();
        List<String> outputHeaders = outputHeaderFields.stream().map(FieldId::getName).collect(Collectors.toList());
        File outputFile = outputCsvPath.toFile();
        if (!outputFile.exists()) {
            outputFile.createNewFile();
        }
        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(outputFile));
        // write out headers
        bufferedWriter.append(String.join(",", outputHeaders) + "\n");
        // write out each row
        for (Table.Row outputRow : outputRows) {
            String row = outputRow.getValuesList().stream().map(value -> value.getStringValue()).collect(Collectors.joining(","));
            bufferedWriter.append(row + "\n");
        }
        bufferedWriter.flush();
        bufferedWriter.close();
        System.out.println("Successfully saved date-shift output to: " + outputCsvPath.getFileName());
    } catch (Exception e) {
        System.out.println("Error in deidentifyWithDateShift: " + e.getMessage());
    }
}
Also used : Arrays(java.util.Arrays) Date(com.google.type.Date) CryptoKey(com.google.privacy.dlp.v2.CryptoKey) DefaultParser(org.apache.commons.cli.DefaultParser) Path(java.nio.file.Path) Value(com.google.privacy.dlp.v2.Value) DateShiftConfig(com.google.privacy.dlp.v2.DateShiftConfig) InfoTypeTransformation(com.google.privacy.dlp.v2.InfoTypeTransformations.InfoTypeTransformation) FieldTransformation(com.google.privacy.dlp.v2.FieldTransformation) ContentItem(com.google.privacy.dlp.v2.ContentItem) Collectors(java.util.stream.Collectors) ByteString(com.google.protobuf.ByteString) ReidentifyContentRequest(com.google.privacy.dlp.v2.ReidentifyContentRequest) DateTimeParseException(java.time.format.DateTimeParseException) List(java.util.List) ParseException(org.apache.commons.cli.ParseException) LocalDate(java.time.LocalDate) RecordTransformations(com.google.privacy.dlp.v2.RecordTransformations) FfxCommonNativeAlphabet(com.google.privacy.dlp.v2.CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet) CharacterMaskConfig(com.google.privacy.dlp.v2.CharacterMaskConfig) Options(org.apache.commons.cli.Options) KmsWrappedCryptoKey(com.google.privacy.dlp.v2.KmsWrappedCryptoKey) PrimitiveTransformation(com.google.privacy.dlp.v2.PrimitiveTransformation) HelpFormatter(org.apache.commons.cli.HelpFormatter) ArrayList(java.util.ArrayList) CryptoReplaceFfxFpeConfig(com.google.privacy.dlp.v2.CryptoReplaceFfxFpeConfig) ServiceOptions(com.google.cloud.ServiceOptions) DeidentifyConfig(com.google.privacy.dlp.v2.DeidentifyConfig) CommandLine(org.apache.commons.cli.CommandLine) FieldId(com.google.privacy.dlp.v2.FieldId) Option(org.apache.commons.cli.Option) DeidentifyContentResponse(com.google.privacy.dlp.v2.DeidentifyContentResponse) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) InfoTypeTransformations(com.google.privacy.dlp.v2.InfoTypeTransformations) BaseEncoding(com.google.common.io.BaseEncoding) CommandLineParser(org.apache.commons.cli.CommandLineParser) BufferedWriter(java.io.BufferedWriter) Table(com.google.privacy.dlp.v2.Table) FileWriter(java.io.FileWriter) ReidentifyContentResponse(com.google.privacy.dlp.v2.ReidentifyContentResponse) SurrogateType(com.google.privacy.dlp.v2.CustomInfoType.SurrogateType) InfoType(com.google.privacy.dlp.v2.InfoType) DeidentifyContentRequest(com.google.privacy.dlp.v2.DeidentifyContentRequest) File(java.io.File) InspectConfig(com.google.privacy.dlp.v2.InspectConfig) ProjectName(com.google.privacy.dlp.v2.ProjectName) Paths(java.nio.file.Paths) OptionGroup(org.apache.commons.cli.OptionGroup) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) CustomInfoType(com.google.privacy.dlp.v2.CustomInfoType) FileWriter(java.io.FileWriter) ArrayList(java.util.ArrayList) ByteString(com.google.protobuf.ByteString) BufferedWriter(java.io.BufferedWriter) DeidentifyConfig(com.google.privacy.dlp.v2.DeidentifyConfig) FileReader(java.io.FileReader) FieldTransformation(com.google.privacy.dlp.v2.FieldTransformation) DeidentifyContentRequest(com.google.privacy.dlp.v2.DeidentifyContentRequest) Table(com.google.privacy.dlp.v2.Table) DateTimeParseException(java.time.format.DateTimeParseException) ParseException(org.apache.commons.cli.ParseException) DateShiftConfig(com.google.privacy.dlp.v2.DateShiftConfig) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) FieldId(com.google.privacy.dlp.v2.FieldId) BufferedReader(java.io.BufferedReader) KmsWrappedCryptoKey(com.google.privacy.dlp.v2.KmsWrappedCryptoKey) File(java.io.File) ContentItem(com.google.privacy.dlp.v2.ContentItem) DeidentifyContentResponse(com.google.privacy.dlp.v2.DeidentifyContentResponse)

Example 5 with Table

use of org.molgenis.emx2.Table in project bootique-jdbc by bootique.

the class CsvDataSetBuilderTest method createTable.

@BeforeAll
public static void createTable() {
    DbColumnMetadata[] columns = new DbColumnMetadata[] { new DbColumnMetadata("c1", Types.VARCHAR, false, true), new DbColumnMetadata("c2", Types.INTEGER, false, true), new DbColumnMetadata("c3", Types.VARBINARY, false, true), new DbColumnMetadata("c4", Types.BIGINT, false, true), new DbColumnMetadata("c5", Types.DECIMAL, false, true) };
    DbTableMetadata metadata = new DbTableMetadata(new TableFQName(null, null, "t1"), columns);
    table = new Table(mock(DbConnector.class), metadata);
}
Also used : DbTableMetadata(io.bootique.jdbc.junit5.metadata.DbTableMetadata) Table(io.bootique.jdbc.junit5.Table) DbColumnMetadata(io.bootique.jdbc.junit5.metadata.DbColumnMetadata) TableFQName(io.bootique.jdbc.junit5.metadata.TableFQName) BeforeAll(org.junit.jupiter.api.BeforeAll)

Aggregations

Test (org.junit.Test)43 Table (com.google.bigtable.admin.v2.Table)20 Table (com.google.privacy.dlp.v2.Table)20 ByteString (com.google.protobuf.ByteString)18 Table (org.molgenis.emx2.Table)18 Row (org.molgenis.emx2.Row)14 ColumnFamily (com.google.bigtable.admin.v2.ColumnFamily)13 AbstractMessage (com.google.protobuf.AbstractMessage)13 Schema (org.molgenis.emx2.Schema)12 MolgenisException (org.molgenis.emx2.MolgenisException)10 HashMap (java.util.HashMap)9 List (java.util.List)8 Collectors (java.util.stream.Collectors)8 ColumnType (org.molgenis.emx2.ColumnType)8 IOException (java.io.IOException)6 TableMetadata (org.molgenis.emx2.TableMetadata)6 InstanceName (com.google.bigtable.admin.v2.InstanceName)5 Path (java.nio.file.Path)5 ArrayList (java.util.ArrayList)5 DlpServiceClient (com.google.cloud.dlp.v2.DlpServiceClient)4