use of com.google.cloud.automl.v1.Dataset in project java-datalabeling by googleapis.
the class LabelTextIT method setUp.
@Before
public void setUp() {
System.setOut(new PrintStream(new ByteArrayOutputStream()));
try (DataLabelingServiceClient dataLabelingServiceClient = DataLabelingServiceClient.create()) {
// Create the dataset
CreateDataset.createDataset(PROJECT_ID, datasetName);
ProjectName projectName = ProjectName.of(PROJECT_ID);
// Get the Dataset
ListDatasetsRequest datasetsRequest = ListDatasetsRequest.newBuilder().setParent(projectName.toString()).build();
ListDatasetsPagedResponse datasetsResponse = dataLabelingServiceClient.listDatasets(datasetsRequest);
for (Dataset returnedDataset : datasetsResponse.getPage().iterateAll()) {
if (returnedDataset.getDisplayName().equals("LABEL_TEXT_DATASET_NAME")) {
dataset = returnedDataset;
}
}
// Import the texts
GcsSource gcsSource = GcsSource.newBuilder().setInputUri(DATASET_GCS_SOURCE_URI).setMimeType("text/csv").build();
InputConfig inputConfig = InputConfig.newBuilder().setDataType(// DataTypes: AUDIO, IMAGE, VIDEO, TEXT
DataType.TEXT).setGcsSource(gcsSource).build();
ImportDataRequest importDataRequest = ImportDataRequest.newBuilder().setName(dataset.getName()).setInputConfig(inputConfig).build();
ImportDataOperationResponse response = dataLabelingServiceClient.importDataAsync(importDataRequest).get();
System.out.format("Imported items: %d\n", response.getImportCount());
// Create the instruction
CreateInstruction.createInstruction(PROJECT_ID, INSTRUCTION_GCS_SOURCE_URI);
// Create the annotation spec set
CreateAnnotationSpecSet.createAnnotationSpecSet(PROJECT_ID);
// Get the instruction
ListInstructionsRequest instructionsRequest = ListInstructionsRequest.newBuilder().setParent(projectName.toString()).build();
ListInstructionsPagedResponse instructionsResponse = dataLabelingServiceClient.listInstructions(instructionsRequest);
for (Instruction returnedInstruction : instructionsResponse.getPage().iterateAll()) {
if (returnedInstruction.getDisplayName().equals("YOUR_INSTRUCTION_DISPLAY_NAME")) {
instruction = returnedInstruction;
}
}
// Get the annotation spec set
ListAnnotationSpecSetsRequest annotationRequest = ListAnnotationSpecSetsRequest.newBuilder().setParent(projectName.toString()).build();
ListAnnotationSpecSetsPagedResponse annotationsResponse = dataLabelingServiceClient.listAnnotationSpecSets(annotationRequest);
for (AnnotationSpecSet returnedAnnotation : annotationsResponse.getPage().iterateAll()) {
if (returnedAnnotation.getDisplayName().equals("YOUR_ANNOTATION_SPEC_SET_DISPLAY_NAME")) {
annotationSpecSet = returnedAnnotation;
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
use of com.google.cloud.automl.v1.Dataset in project java-datalabeling by googleapis.
the class ITSystemTest method beforeClass.
@BeforeClass
public static void beforeClass() throws Exception {
client = DataLabelingServiceClient.create();
/**
* create Dataset
*/
Dataset dataSetResponse = client.createDataset(PARENT, DATASET);
dataSetId = dataSetResponse.getName().split("/")[3];
LOGGER.info("Dataset created successfully.");
/**
* create AnnotationSpecSet
*/
Map<String, String> annotationLabels = new HashMap<>();
annotationLabels.put(LABEL_1, DESCRIPTION1);
annotationLabels.put(LABEL_2, DESCRIPTION2);
List<AnnotationSpec> annotationSpecs = new ArrayList<>();
for (Map.Entry<String, String> entry : annotationLabels.entrySet()) {
AnnotationSpec annotationSpec = AnnotationSpec.newBuilder().setDisplayName(entry.getKey()).setDescription(entry.getValue()).build();
annotationSpecs.add(annotationSpec);
}
AnnotationSpecSet annotationSpecSet = AnnotationSpecSet.newBuilder().setDisplayName(ANNOTATION_SPEC_SET).setDescription(DESCRIPTION).addAllAnnotationSpecs(annotationSpecs).build();
AnnotationSpecSet response = client.createAnnotationSpecSet(PARENT, annotationSpecSet);
annotationSpecSetId = response.getName().split("/")[3];
LOGGER.info("AnnotationSpecSet created successfully.");
}
use of com.google.cloud.automl.v1.Dataset in project java-datalabeling by googleapis.
the class ITSystemTest method getDatasetTest.
@Test
public void getDatasetTest() {
String dataset = DatasetName.format(PROJECT_ID, dataSetId);
Dataset response = client.getDataset(dataset);
assertEquals(DATASET_DISPLAY_NAME, response.getDisplayName());
assertEquals(DESCRIPTION, response.getDescription());
}
use of com.google.cloud.automl.v1.Dataset in project geonetwork-microservices by geonetwork.
the class DcatConverter method convert.
/**
* Convert an index document into a DCAT object.
*/
public CatalogRecord convert(JsonNode doc) {
CatalogRecord catalogRecord = null;
Dataset dcatDataset = null;
try {
IndexRecord record = new ObjectMapper().enable(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY).readValue(doc.get(IndexRecordFieldNames.source).toString(), IndexRecord.class);
String recordIdentifier = record.getMetadataIdentifier();
String recordUri = formatterConfiguration.buildLandingPageLink(record.getMetadataIdentifier());
Optional<ResourceIdentifier> resourceIdentifier = record.getResourceIdentifier().stream().filter(Objects::nonNull).findFirst();
// TODO: Define strategy to build IRI
final String resourceIdentifierUri = resourceIdentifier.isPresent() ? "local:" + resourceIdentifier.get().getCode() : null;
String language = record.getMainLanguage() == null ? defaultLanguage : record.getMainLanguage();
String languageUpperCase = language.toUpperCase();
// TODO: Need language mapper
String iso2letterLanguage = language.substring(0, 2);
List<String> resourceLanguage = record.getResourceLanguage();
List<String> resourceType = record.getResourceType();
boolean isInspireResource = resourceType.contains("dataset") || resourceType.contains("series") || resourceType.contains("service");
// TODO: Add multilingual support
// TODO .resource("https://creativecommons.org/publicdomain/zero/1.0/deed")
DatasetBuilder datasetBuilder = Dataset.builder().identifier(record.getResourceIdentifier().stream().map(c -> c.getCode()).collect(Collectors.toList())).title(listOfNullable(record.getResourceTitle().get(defaultText))).description(listOfNullable(record.getResourceAbstract().get(defaultText))).landingPage(listOfNullable(DcatDocument.builder().foafDocument(FoafDocument.builder().about(formatterConfiguration.buildLandingPageLink(record.getMetadataIdentifier())).title(record.getResourceTitle().get(defaultText)).build()).build())).provenance(record.getResourceLineage().stream().map(l -> Provenance.builder().provenanceStatement(ProvenanceStatement.builder().label(l.get(defaultText)).build()).build()).collect(Collectors.toList())).type(record.getResourceType().stream().map(t -> new RdfResource(null, "dcat:" + RESSOURCE_TYPE_MAPPING.get(t), null)).collect(Collectors.toList())).modified(toDate(record.getChangeDate())).theme(record.getCodelists().get(topic).stream().map(t -> Subject.builder().skosConcept(SkosConcept.builder().prefLabel(t.getProperties().get(defaultText)).build()).build()).collect(Collectors.toList())).theme(record.getTag().stream().map(t -> Subject.builder().skosConcept(SkosConcept.builder().prefLabel(t.get(defaultText)).build()).build()).collect(Collectors.toList()));
record.getResourceDate().stream().filter(d -> "creation".equals(d.getType())).forEach(d -> datasetBuilder.created(toDate(d.getDate())));
record.getResourceDate().stream().filter(d -> "publication".equals(d.getType())).forEach(d -> datasetBuilder.issued(toDate(d.getDate())));
record.getResourceDate().stream().filter(d -> "revision".equals(d.getType())).forEach(d -> datasetBuilder.modified(toDate(d.getDate())));
// TODO: Convert to meter ?
datasetBuilder.spatialResolutionInMeters(record.getResolutionScaleDenominator().stream().map(BigDecimal::new).collect(Collectors.toList()));
// INSPIRE
if (record.getSpecificationConformance().size() > 0) {
datasetBuilder.wasUsedBy(record.getSpecificationConformance().stream().map(c -> DcatActivity.builder().activity(// https://github.com/SEMICeu/iso-19139-to-dcat-ap/blob/master/iso-19139-to-dcat-ap.xsl#L837-L840
ProvActivity.builder().used(new RdfResource(null, resourceIdentifierUri, null)).qualifiedAssociation(ProvQualifiedAssociation.builder().hadPlan(ProvHadPlan.builder().wasDerivedFrom(new RdfResource("Resource", null, null, c.getTitle(), null)).build()).build()).generated(ProvGenerated.builder().type(new RdfResource("http://inspire.ec.europa.eu/metadata-codelist/DegreeOfConformity/" + INSPIRE_DEGREE_OF_CONFORMITY.get(c.getPass()), null)).build()).build()).build()).collect(Collectors.toList()));
}
if (record.getResourceLanguage() != null) {
// TODO: Where to put resource language ?
datasetBuilder.language(record.getResourceLanguage().stream().map(l -> new RdfResource(null, "http://publications.europa.eu/resource/authority/language/" + l.toUpperCase(), null)).collect(Collectors.toList()));
}
ArrayList<Codelist> updateFrequencyList = record.getCodelists().get(Codelists.maintenanceAndUpdateFrequency);
if (updateFrequencyList != null && updateFrequencyList.size() > 0) {
datasetBuilder.accrualPeriodicity(new RdfResource(null, ACCRUAL_PERIODICITY_URI_PREFIX + ACCRUAL_PERIODICITY_TO_ISO.get(updateFrequencyList.get(0).getProperties().get(CommonField.key)), null));
}
// <dct:spatial rdf:parseType="Resource">
datasetBuilder.spatial(record.getGeometries().stream().map(g -> DctSpatial.builder().location(DctLocation.builder().geometry(g).build()).build()).collect(Collectors.toList()));
datasetBuilder.temporal(record.getResourceTemporalExtentDateRange().stream().map(range -> {
DctPeriodOfTimeBuilder periodOfTime = DctPeriodOfTime.builder();
if (StringUtils.isNotEmpty(range.getGte())) {
periodOfTime.startDate(toDate(range.getGte()));
}
if (StringUtils.isNotEmpty(range.getLte())) {
periodOfTime.endDate(toDate(range.getLte()));
}
return DctTemporal.builder().periodOfTime(periodOfTime.build()).build();
}).collect(Collectors.toList()));
record.getLinks().stream().forEach(link -> {
DcatDistributionBuilder dcatDistributionBuilder = DcatDistribution.builder().title(listOfNullable(link.getName())).description(listOfNullable(link.getDescription())).representationTechnique(Subject.builder().skosConcept(SkosConcept.builder().prefLabel(link.getProtocol()).build()).build());
// TODO: depending on function/protocol build page/accessUrl/downloadUrl
dcatDistributionBuilder.accessUrl(link.getUrl());
datasetBuilder.distribution(listOfNullable(DcatDistributionContainer.builder().distribution(dcatDistributionBuilder.build()).build()));
});
datasetBuilder.contactPoint(record.getContactForResource().stream().map(contact -> DcatContactPoint.builder().contact(VcardContact.builder().title(contact.getOrganisation()).role(contact.getRole()).hasEmail(contact.getEmail()).build()).build()).collect(Collectors.toList()));
dcatDataset = datasetBuilder.build();
catalogRecord = CatalogRecord.builder().identifier(listOfNullable(record.getMetadataIdentifier())).created(toDate(record.getCreateDate())).modified(toDate(record.getChangeDate())).language(listOfNullable(new RdfResource(null, "http://publications.europa.eu/resource/authority/language/" + record.getMainLanguage().toUpperCase()))).primaryTopic(listOfNullable(new ResourceContainer(dcatDataset, null))).build();
} catch (JsonMappingException e) {
e.printStackTrace();
} catch (JsonProcessingException e) {
e.printStackTrace();
}
return catalogRecord;
}
use of com.google.cloud.automl.v1.Dataset in project geonetwork-microservices by geonetwork.
the class DcatTest method testJsonToDcat.
@Test
public void testJsonToDcat() throws IOException {
String identifier = "1567-765175-6561756";
Dataset dcatDataset = Dataset.builder().title(List.of("Rivers on earth")).description(List.of("Water drop ...")).identifier(List.of(identifier)).subject(List.of(Subject.builder().skosConcept(SkosConcept.builder().about("https://registry.org/hydrology").prefLabel("Hydrology").build()).build(), Subject.builder().skosConcept(SkosConcept.builder().prefLabel("Earth observation").build()).build())).landingPage(List.of(DcatDocument.builder().foafDocument(FoafDocument.builder().about("https://data/file.zip").title("Download dataset").description("CC0").format("Shapefile").build()).build())).qualifiedRelation(List.of(DcatRelationship.builder().relation(DcatRelation.builder().relation("http://...TODO").build()).build())).spatialResolutionInMeters(List.of(new BigDecimal(25000))).temporalResolution(List.of(Duration.ofDays(15))).accessRights(List.of(DcatAccessRights.builder().rightsStatement(RightsStatement.builder().label("public access limited according to Article 13(1)(b) of the INSPIRE Directive").about("https://registry.inspire/...").build()).build())).accrualPeriodicity(new RdfResource(null, ACCRUAL_PERIODICITY_URI_PREFIX + ACCRUAL_PERIODICITY_TO_ISO.get("daily"), null)).conformsTo(new RdfResource(null, "http://iso19115-3.schema.org", null)).created(new Date()).isReferencedBy(List.of(new RdfResource(null, "https://isReferencedBy", null))).relation(List.of(new RdfResource(null, "https://relation", null))).language(List.of(new RdfResource(null, "http://publications.europa.eu/resource/authority/language/FRE", null))).type(List.of(Subject.builder().skosConcept(SkosConcept.builder().prefLabel("dataset").build()).build())).page(List.of(DcatDocument.builder().foafDocument(FoafDocument.builder().about("https://apps.titellus.net/ogcapi/collections/main/items/" + identifier).build()).build())).versionInfo("1.0").qualifiedAttribution(List.of(DcatQualifiedAttribution.builder().attribution(ProvAttribution.builder().agent(new RdfResource(null, "http://agent", null)).hadRole(new RdfResource(null, "http://role/creator", null)).build()).build())).comment(List.of("Comments ...")).distribution(List.of(DcatDistributionContainer.builder().distribution(DcatDistribution.builder().accessUrl("https://sdi.eea.europa.eu/webdav/continental/europe/natural_areas/birds_directive/eea_v_3035_10_mio_art12-2008-2012_i_2008-2012_v01_r01/Art12-2008-2012_SHP").build()).build())).license(DcatLicenseDocumentContainer.builder().license(DcatLicenseDocument.builder().type(Subject.builder().skosConcept(SkosConcept.builder().prefLabel("CC0").build()).build()).build()).build()).build();
// POJO to XML
JAXBContext context = null;
String dcatXml = null;
try {
StringWriter sw = new StringWriter();
context = JAXBContext.newInstance(SkosConcept.class, Dataset.class);
Marshaller marshaller = context.createMarshaller();
marshaller.setProperty(Marshaller.JAXB_FRAGMENT, Boolean.TRUE);
marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);
marshaller.marshal(dcatDataset, sw);
dcatXml = sw.toString();
} catch (JAXBException e) {
e.printStackTrace();
}
org.eclipse.rdf4j.model.Model model = Rio.parse(new ByteArrayInputStream(dcatXml.getBytes()), "", RDFFormat.RDFXML);
// Rio.write(model, System.out, RDFFormat.RDFXML);
Rio.write(model, System.out, RDFFormat.TURTLE);
// Rio.write(model, System.out, RDFFormat.JSONLD);
}
Aggregations