6
6
import com .qbizm .kramerius .imp .jaxb .DigitalObject ;
7
7
import cz .incad .kramerius .StreamHeadersObserver ;
8
8
import cz .incad .kramerius .fedora .RepositoryAccess ;
9
+ import cz .incad .kramerius .fedora .impl .tmp .ContentFormat ;
10
+ import cz .incad .kramerius .fedora .impl .tmp .UnsupportedContentFormatException ;
9
11
import cz .incad .kramerius .fedora .om .repository .AkubraRepository ;
10
12
import cz .incad .kramerius .fedora .om .repository .RepositoryDatastream ;
11
13
import cz .incad .kramerius .fedora .om .repository .RepositoryException ;
33
35
34
36
import javax .annotation .Nullable ;
35
37
import javax .xml .bind .JAXBException ;
38
+ import javax .xml .parsers .DocumentBuilderFactory ;
36
39
import javax .xml .xpath .XPathExpressionException ;
37
40
import java .io .ByteArrayInputStream ;
38
41
import java .io .IOException ;
39
42
import java .io .InputStream ;
40
43
import java .io .StringReader ;
41
44
import java .nio .charset .Charset ;
45
+ import java .nio .charset .StandardCharsets ;
42
46
import java .time .LocalDateTime ;
43
47
import java .time .format .DateTimeParseException ;
44
48
import java .util .*;
@@ -91,30 +95,6 @@ public boolean isPidAvailable(String pid) throws IOException, RepositoryExceptio
91
95
boolean exists = this .repositoryApi .objectExists (pid );
92
96
return exists ;
93
97
}
94
- @ Override
95
- public InputStream getFoxml (String pid , boolean archive ) throws IOException {
96
- try {
97
- if (archive ){
98
- DigitalObject obj = manager .readObjectCloneFromStorage (pid );
99
- manager .resolveArchivedDatastreams (obj );
100
- return this .manager .marshallObject (obj );
101
- }else {
102
- return this .manager .retrieveObject (pid );
103
- }
104
- } catch (Exception e ) {
105
- throw new IOException (e );
106
- }
107
- }
108
- @ Override
109
- public org .dom4j .Document getFoxml (String pid ) throws RepositoryException , IOException {
110
- Lock readLock = AkubraDOManager .getReadLock (pid );
111
- try {
112
- RepositoryObject object = akubraRepositoryImpl .getObject (pid );
113
- return Utils .inputstreamToDocument (object .getFoxml (), true );
114
- } finally {
115
- readLock .unlock ();
116
- }
117
- }
118
98
//-------- get object property
119
99
@ Override
120
100
public String getProperty (String pid , String propertyName ) throws IOException , RepositoryException {
@@ -187,70 +167,6 @@ public List<String> getDatastreamNames(String pid) throws RepositoryException, I
187
167
readLock .unlock ();
188
168
}
189
169
}
190
- // TODO here we always use AkubraUtils.getStreamContent but we have also AkubraObject.AkubraDatastream for fetching stream content
191
- @ Override
192
- public InputStream getDataStream (String pid , String datastreamName ) throws IOException {
193
- try {
194
- pid = makeSureObjectPid (pid );
195
- if (this .accessLog != null && this .accessLog .isReportingAccess (pid , datastreamName )) {
196
- reportAccess (pid , datastreamName );
197
- }
198
- DigitalObject object = manager .readObjectFromStorage (pid );
199
- if (object != null ) {
200
- DatastreamVersionType stream = AkubraUtils .getLastStreamVersion (object , datastreamName );
201
- if (stream != null ) {
202
- return AkubraUtils .getStreamContent (stream , manager );
203
- } else {
204
- throw new IOException ("cannot find stream '" + datastreamName + "' for pid '" + pid + "'" );
205
- }
206
- } else {
207
- throw new IOException ("cannot find pid '" + pid + "'" );
208
- }
209
- } catch (Exception e ) {
210
- throw new IOException (e );
211
- }
212
- }
213
- // XML data stream
214
- @ Override
215
- public Document getStream (String pid , String streamName ) throws IOException {
216
- DigitalObject object = manager .readObjectFromStorage (pid );
217
- if (object != null ) {
218
- DatastreamVersionType stream = AkubraUtils .getLastStreamVersion (object , streamName );
219
- if (stream != null ) {
220
- if (stream .getXmlContent () != null ) {
221
- List <Element > elementList = stream .getXmlContent ().getAny ();
222
- if (!elementList .isEmpty ()) {
223
- return elementList .get (0 ).getOwnerDocument ();
224
- } else {
225
- throw new IOException ("Datastream not found: " + pid + " - " + streamName );
226
- }
227
- } else {
228
- throw new IOException ("Expected XML datastream: " + pid + " - " + streamName );
229
- }
230
- }
231
- throw new IOException ("Datastream not found: " + pid + " - " + streamName );
232
- }
233
- throw new IOException ("Object not found: " + pid );
234
- }
235
- @ Override
236
- public org .dom4j .Document getDatastreamXml (String pid , String dsId ) throws RepositoryException , IOException {
237
- Lock readLock = AkubraDOManager .getReadLock (pid );
238
- try {
239
- RepositoryObject object = akubraRepositoryImpl .getObject (pid );
240
- if (object .streamExists (dsId )) {
241
- org .dom4j .Document foxml = Utils .inputstreamToDocument (object .getFoxml (), true );
242
- org .dom4j .Element dcEl = (org .dom4j .Element ) Dom4jUtils .buildXpath (String .format ("/foxml:digitalObject/foxml:datastream[@ID='%s']" , dsId )).selectSingleNode (foxml );
243
- org .dom4j .Element detached = (org .dom4j .Element ) dcEl .detach ();
244
- org .dom4j .Document result = DocumentHelper .createDocument ();
245
- result .add (detached );
246
- return result ;
247
- } else {
248
- return null ;
249
- }
250
- } finally {
251
- readLock .unlock ();
252
- }
253
- }
254
170
@ Override
255
171
public List <Map <String , String >> getStreamsOfObject (String pid ) throws IOException {
256
172
try {
@@ -320,9 +236,120 @@ public Date getStreamLastmodifiedFlag(String pid, String streamName) throws IOEx
320
236
}
321
237
throw new IOException ("Object not found: " + pid );
322
238
}
323
- //------ get stream CONTENT
324
- // input data stream
239
+ //------------------------------------------------------------------------------------------------------------
240
+ // NEW !!!!!!!!!!!!!!!!
241
+ public <T > T getStreamContent (String pid , KnownDatastreams dsId , Class <T > returnType ) throws IOException , UnsupportedContentFormatException {
242
+ // Determine supported formats for the content
243
+ ContentFormat supportedFormat = determineSupportedFormat (id );
244
+ // Validate the requested format
245
+ if ((contentType == String .class && !supportedFormat .supportsString ()) ||
246
+ (contentType == InputStream .class && !supportedFormat .supportsStream ()) ||
247
+ (contentType == Document .class && !supportedFormat .supportsXml ())) {
248
+ throw new UnsupportedContentFormatException ("Format not supported for content ID: " + id );
249
+ }
250
+ // Retrieve content as bytes
251
+ byte [] rawContent = fetchContentFromStorage (id );
252
+ // Convert content to the requested format
253
+ if (contentType == String .class ) {
254
+ return contentType .cast (new String (rawContent , StandardCharsets .UTF_8 ));
255
+ } else if (contentType == InputStream .class ) {
256
+ return contentType .cast (new ByteArrayInputStream (rawContent ));
257
+ } else if (contentType == Document .class ) {
258
+ return contentType .cast (parseXml (rawContent ));
259
+ }
260
+ throw new IllegalArgumentException ("Unsupported content type: " + contentType );
261
+ }
262
+ public <T > T getFoxml (String pid , KnownDatastreams dsId , Class <T > returnType ) throws IOException , UnsupportedContentFormatException {
263
+ // Determine supported formats for the content
264
+ ContentFormat supportedFormat = determineSupportedFormat (id );
265
+ // Validate the requested format
266
+ if ((contentType == String .class && !supportedFormat .supportsString ()) ||
267
+ (contentType == InputStream .class && !supportedFormat .supportsStream ()) ||
268
+ (contentType == Document .class && !supportedFormat .supportsXml ())) {
269
+ throw new UnsupportedContentFormatException ("Format not supported for content ID: " + id );
270
+ }
271
+ // Retrieve content as bytes
272
+ byte [] rawContent = fetchContentFromStorage (id );
273
+ // Convert content to the requested format
274
+ if (contentType == String .class ) {
275
+ return contentType .cast (new String (rawContent , StandardCharsets .UTF_8 ));
276
+ } else if (contentType == InputStream .class ) {
277
+ return contentType .cast (new ByteArrayInputStream (rawContent ));
278
+ } else if (contentType == Document .class ) {
279
+ return contentType .cast (parseXml (rawContent ));
280
+ }
281
+ throw new IllegalArgumentException ("Unsupported content type: " + contentType );
282
+ }
283
+ StreamContentHelper getStreamContentHelper ();
284
+
285
+ // TODO here we always use AkubraUtils.getStreamContent but we have also AkubraObject.AkubraDatastream for fetching stream content
286
+ private ContentFormat determineSupportedFormat (String id ) {
287
+ // Example logic to determine supported formats
288
+ if (id .startsWith ("streamOnly" )) {
289
+ return new ContentFormat (false , true , false );
290
+ } else {
291
+ return new ContentFormat (true , true , true );
292
+ }
293
+ }
294
+ private byte [] fetchContentFromStorage (String id ) {
295
+ // Mock: Fetch content as bytes from your storage
296
+ return ("<xml>Content for ID: " + id + "</xml>" ).getBytes (StandardCharsets .UTF_8 );
297
+ }
298
+ private Document parseXml (byte [] content ) throws IOException {
299
+ try {
300
+ DocumentBuilderFactory factory = DocumentBuilderFactory .newInstance ();
301
+ return factory .newDocumentBuilder ().parse (new ByteArrayInputStream (content ));
302
+ } catch (Exception e ) {
303
+ throw new IOException ("Failed to parse XML" , e );
304
+ }
305
+ }
306
+
307
+ // TODO just one method rendering 2 content types; also we can add new par dsId and if null the whole foxml will be returned
308
+ // <--- AkubraObject.getFoXml
309
+ public org .dom4j .Document getDatastreamXml (String pid , String dsId ) throws RepositoryException , IOException {
310
+ Lock readLock = AkubraDOManager .getReadLock (pid );
311
+ try {
312
+ RepositoryObject object = akubraRepositoryImpl .getObject (pid );
313
+ if (object .streamExists (dsId )) {
314
+ org .dom4j .Document foxml = Utils .inputstreamToDocument (object .getFoxml (), true );
315
+ org .dom4j .Element dcEl = (org .dom4j .Element ) Dom4jUtils .buildXpath (String .format ("/foxml:digitalObject/foxml:datastream[@ID='%s']" , dsId )).selectSingleNode (foxml );
316
+ org .dom4j .Element detached = (org .dom4j .Element ) dcEl .detach ();
317
+ org .dom4j .Document result = DocumentHelper .createDocument ();
318
+ result .add (detached );
319
+ return result ;
320
+ } else {
321
+ return null ;
322
+ }
323
+ } finally {
324
+ readLock .unlock ();
325
+ }
326
+ }
327
+ @ Override
328
+ public InputStream getFoxml (String pid , boolean archive ) throws IOException {
329
+ try {
330
+ if (archive ){
331
+ DigitalObject obj = manager .readObjectCloneFromStorage (pid );
332
+ manager .resolveArchivedDatastreams (obj );
333
+ return this .manager .marshallObject (obj );
334
+ }else {
335
+ return this .manager .retrieveObject (pid );
336
+ }
337
+ } catch (Exception e ) {
338
+ throw new IOException (e );
339
+ }
340
+ }
325
341
@ Override
342
+ public org .dom4j .Document getFoxml (String pid ) throws RepositoryException , IOException {
343
+ Lock readLock = AkubraDOManager .getReadLock (pid );
344
+ try {
345
+ RepositoryObject object = akubraRepositoryImpl .getObject (pid );
346
+ return Utils .inputstreamToDocument (object .getFoxml (), true );
347
+ } finally {
348
+ readLock .unlock ();
349
+ }
350
+ }
351
+
352
+ // <--- AkubraObject.getStream.getContent (6x)
326
353
public InputStream getLatestVersionOfDatastream (String pid , String dsId ) throws RepositoryException , IOException {
327
354
Lock readLock = AkubraDOManager .getReadLock (pid );
328
355
try {
@@ -337,6 +364,62 @@ public InputStream getLatestVersionOfDatastream(String pid, String dsId) throws
337
364
readLock .unlock ();
338
365
}
339
366
}
367
+ // <-- DigitalObject, AkubraUtils.getLastStreamVersion (3x)
368
+ public InputStream getDataStream (String pid , String datastreamName ) throws IOException {
369
+ try {
370
+ pid = makeSureObjectPid (pid );
371
+ if (this .accessLog != null && this .accessLog .isReportingAccess (pid , datastreamName )) {
372
+ reportAccess (pid , datastreamName );
373
+ }
374
+ DigitalObject object = manager .readObjectFromStorage (pid );
375
+ if (object != null ) {
376
+ DatastreamVersionType stream = AkubraUtils .getLastStreamVersion (object , datastreamName );
377
+ if (stream != null ) {
378
+ return AkubraUtils .getStreamContent (stream , manager );
379
+ } else {
380
+ throw new IOException ("cannot find stream '" + datastreamName + "' for pid '" + pid + "'" );
381
+ }
382
+ } else {
383
+ throw new IOException ("cannot find pid '" + pid + "'" );
384
+ }
385
+ } catch (Exception e ) {
386
+ throw new IOException (e );
387
+ }
388
+ }
389
+
390
+ // getLatestVersionOfDatastream (4x)
391
+ public org .dom4j .Document getLatestVersionOfInlineXmlDatastream (String pid , String dsId ) throws RepositoryException , IOException {
392
+ InputStream is = getLatestVersionOfDatastream (pid , dsId );
393
+ return is == null ? null : Utils .inputstreamToDocument (is , true );
394
+ }
395
+ // getLatestVersionOfDatastream (1x)
396
+ public String getLatestVersionOfManagedTextDatastream (String pid , String dsId ) throws RepositoryException , IOException {
397
+ InputStream is = getLatestVersionOfDatastream (pid , dsId );
398
+ return is == null ? null : Utils .inputstreamToString (is );
399
+ }
400
+
401
+ // <-- DigitalObject, AkubraUtils.getLastStreamVersion (3x)
402
+ public Document getStream (String pid , String streamName ) throws IOException {
403
+ DigitalObject object = manager .readObjectFromStorage (pid );
404
+ if (object != null ) {
405
+ DatastreamVersionType stream = AkubraUtils .getLastStreamVersion (object , streamName );
406
+ if (stream != null ) {
407
+ if (stream .getXmlContent () != null ) {
408
+ List <Element > elementList = stream .getXmlContent ().getAny ();
409
+ if (!elementList .isEmpty ()) {
410
+ return elementList .get (0 ).getOwnerDocument ();
411
+ } else {
412
+ throw new IOException ("Datastream not found: " + pid + " - " + streamName );
413
+ }
414
+ } else {
415
+ throw new IOException ("Expected XML datastream: " + pid + " - " + streamName );
416
+ }
417
+ }
418
+ throw new IOException ("Datastream not found: " + pid + " - " + streamName );
419
+ }
420
+ throw new IOException ("Object not found: " + pid );
421
+ }
422
+
340
423
@ Override
341
424
public InputStream getImgFull (String pid ) throws IOException , RepositoryException {
342
425
this .accessLog .reportAccess (pid , KnownDatastreams .IMG_FULL .toString ());
@@ -385,22 +468,10 @@ public InputStream getSmallThumbnail(String pid) throws IOException {
385
468
public InputStream getImageFULL (String pid ) throws IOException {
386
469
return getDataStream (pid , FedoraUtils .IMG_FULL_STREAM );
387
470
}
388
- // text data stream
389
- @ Override
390
- public String getLatestVersionOfManagedTextDatastream (String pid , String dsId ) throws RepositoryException , IOException {
391
- InputStream is = getLatestVersionOfDatastream (pid , dsId );
392
- return is == null ? null : Utils .inputstreamToString (is );
393
- }
394
471
@ Override
395
472
public String getOcrText (String pid ) throws IOException , RepositoryException {
396
473
return getLatestVersionOfManagedTextDatastream (pid , KnownDatastreams .OCR_TEXT .toString ());
397
474
}
398
- // XML data stream
399
- @ Override
400
- public org .dom4j .Document getLatestVersionOfInlineXmlDatastream (String pid , String dsId ) throws RepositoryException , IOException {
401
- InputStream is = getLatestVersionOfDatastream (pid , dsId );
402
- return is == null ? null : Utils .inputstreamToDocument (is , true );
403
- }
404
475
@ Override
405
476
public org .dom4j .Document getRelsExt (String pid , boolean namespaceAware ) throws IOException , RepositoryException {
406
477
org .dom4j .Document doc = getLatestVersionOfInlineXmlDatastream (pid , KnownDatastreams .RELS_EXT .toString ());
@@ -458,6 +529,7 @@ public org.dom4j.Document getOcrAlto(String pid, boolean namespaceAware) throws
458
529
}
459
530
return doc ;
460
531
}
532
+ //----------------------------------------------------------------------------------------------------------------
461
533
// --- check stream exists
462
534
@ Override
463
535
public boolean isStreamAvailable (String pid , String dsId ) throws IOException , RepositoryException {
0 commit comments