use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.
the class RegenerateDedupKeysTest method simpleTest.
@Test
public void simpleTest() throws Exception {
Job job = jobRegistry.getJob(Constants.JOB_ID_REGEN_DEDUP_KEYS);
JobParameters jobParams = new JobParameters();
jobLauncher.run(job, jobParams);
HarvestedRecord rec1 = harvestedRecordDao.get(21L);
HarvestedRecord rec2 = harvestedRecordDao.get(22L);
Assert.assertNotNull(rec1);
Assert.assertNotNull(rec2);
Assert.assertEquals(rec1.getPublicationYear(), new Long(1993));
Assert.assertEquals(rec2.getPublicationYear(), new Long(1929));
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.
the class ImportRecordsJobTest method testSimpleImportISO2709.
@Test
public void testSimpleImportISO2709() throws Exception {
Job job = jobRegistry.getJob(Constants.JOB_ID_IMPORT);
Map<String, JobParameter> params = new HashMap<>();
params.put(Constants.JOB_PARAM_CONF_ID, new JobParameter(300L));
params.put(Constants.JOB_PARAM_IN_FILE, new JobParameter(testFileISO1));
params.put(Constants.JOB_PARAM_FORMAT, new JobParameter("iso"));
JobParameters jobParams = new JobParameters(params);
jobLauncher.run(job, jobParams);
HarvestedRecord insertedRecord = harvestedRecordDao.findByIdAndHarvestConfiguration("000000146", 300L);
Assert.assertNotNull(insertedRecord);
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.
the class OAIHarvestJobTest method testDeleteRecord.
@Test
public void testDeleteRecord() throws Exception {
reset(httpClient);
InputStream response0 = this.getClass().getResourceAsStream("/sample/IdentifyNonstandardGranularity.xml");
InputStream response1 = this.getClass().getResourceAsStream("/sample/ListRecordsNLKdeleted1.xml");
InputStream response2 = this.getClass().getResourceAsStream("/sample/ListRecordsNLKdeleted2.xml");
expect(httpClient.executeGet("http://oai.medvik.cz/medvik2cpk/oai?verb=Identify")).andReturn(response0);
expect(httpClient.executeGet("http://oai.medvik.cz/medvik2cpk/oai?verb=ListRecords&metadataPrefix=marc21")).andReturn(response1);
expect(httpClient.executeGet("http://oai.medvik.cz/medvik2cpk/oai?verb=ListRecords&resumptionToken=xaiutmvy00003")).andReturn(response2);
replay(httpClient);
final Long confID = 301L;
Map<String, JobParameter> params = new HashMap<>();
params.put(Constants.JOB_PARAM_CONF_ID, new JobParameter(confID));
JobExecution exec = jobExecutor.execute(Constants.JOB_ID_HARVEST, new JobParameters(params));
Assert.assertEquals(exec.getExitStatus(), ExitStatus.COMPLETED);
OAIHarvestConfiguration config = configDao.get(confID);
HarvestedRecord record = recordDao.findByIdAndHarvestConfiguration("111111", config);
Assert.assertNotNull(record, "Record not stored.");
Assert.assertNotNull(record.getDeleted());
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.
the class ImportRecordsWriter method writeInner.
protected void writeInner(List<? extends List<Record>> items) throws Exception {
for (List<Record> records : items) {
for (Record currentRecord : records) {
try {
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
MarcWriter marcWriter = new MarcXmlWriter(outStream, true);
marcWriter.setConverter(ISOCharConvertor.INSTANCE);
marcWriter.write(currentRecord);
marcWriter.close();
// need recordId before interception
byte[] recordContent = outStream.toByteArray();
MetadataRecord metadata = parseMetadata(recordContent);
String recordId = metadata.getUniqueId();
if (regexpExtractor != null) {
recordId = regexpExtractor.extract(recordId);
}
if (harvestConfiguration.isInterceptionEnabled()) {
MarcRecordInterceptor interceptor = marcInterceptorFactory.getInterceptor(harvestConfiguration, recordId, recordContent);
if (interceptor != null) {
byte[] recordContentNew = interceptor.intercept();
if (!Arrays.equals(recordContent, recordContentNew)) {
// if record content was changed, parse metadata again
metadata = parseMetadata(recordContentNew);
// set intercepted content
recordContent = recordContentNew;
}
}
}
HarvestedRecord hr = harvestedRecordDao.findByIdAndHarvestConfiguration(recordId, configurationId);
if (hr == null) {
HarvestedRecordUniqueId id = new HarvestedRecordUniqueId(harvestConfiguration, recordId);
hr = new HarvestedRecord(id);
// TODO detect format
hr.setFormat("marc21-xml");
hr.setHarvestedFrom(harvestConfiguration);
}
hr.setUpdated(new Date());
hr.setDeleted(null);
hr.setRawRecord(recordContent);
harvestedRecordDao.persist(hr);
dedupKeysParser.parse(hr, metadata);
if (harvestConfiguration.isFilteringEnabled() && !hr.getShouldBeProcessed()) {
logger.debug("Filtered record: " + hr.getUniqueId());
hr.setDeleted(new Date());
}
harvestedRecordDao.persist(hr);
progress.incrementAndLogProgress();
} catch (Exception e) {
logger.warn("Error occured in processing record");
throw e;
}
}
}
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.
the class DedupSkatKeysProcessor method process.
@Override
public List<HarvestedRecord> process(List<Long> item) throws Exception {
if (item == null || item.size() < 2) {
return Collections.emptyList();
}
// get skatRecord from list
HarvestedRecord skatRec = harvestedRecordDao.get(item.get(0));
// get other records from list
Set<HarvestedRecord> ordinaryRecords = new HashSet<>();
item.subList(1, item.size()).stream().forEach(i -> ordinaryRecords.add(harvestedRecordDao.get(i)));
List<Title> expectedTitles = skatRec.getTitles();
Set<HarvestedRecord> toBeMerged = new HashSet<>();
// decision is based on similarity of titles
for (HarvestedRecord currentRec : ordinaryRecords) {
for (Title currentTitle : currentRec.getTitles()) {
for (Title expectedTitle : expectedTitles) {
if (StringUtils.simmilarTitleMatch(currentTitle, expectedTitle, 70, 8)) {
toBeMerged.add(currentRec);
}
}
}
}
if (toBeMerged.isEmpty()) {
return Collections.emptyList();
}
List<Long> tobeMergedIds = new ArrayList<>();
tobeMergedIds.add(skatRec.getId());
toBeMerged.stream().forEach(r -> tobeMergedIds.add(r.getId()));
// pass ids to be merged into parent
return super.process(tobeMergedIds);
}
Aggregations