@@ -50,6 +50,34 @@ public function __construct(array $configuration = [])
50
50
$ this ->excludeHosts = isset ($ configuration ["excludeHosts " ]) ? explode (", " , $ configuration ["excludeHosts " ]) : [];
51
51
}
52
52
53
+
54
+
55
+ protected function isValid ($ value ): void
56
+ {
57
+ foreach ($ this ->getDocumentUrls ($ value ) as $ url ) {
58
+ if (!$ this ->isExcluded ($ url ) && !$ this ->urlExists ($ url )) {
59
+ $ this ->addError ('URL " ' . $ url . '" could not be found. ' , 1737384167 );
60
+ }
61
+ }
62
+ }
63
+
64
+ /**
65
+ * Retrieve all URLs of the document, except for the URLs of the file groups, where only one URL is returned per file group and host.
66
+ *
67
+ * @param DOMDocument $document The document
68
+ * @return array The array of URLs
69
+ */
70
+ protected function getDocumentUrls (DOMDocument $ document ): array
71
+ {
72
+ $ tempDocument = clone $ document ; // do not modify original document
73
+ $ urls = $ this ->getFileUrlAndRemoveFileGroups ($ tempDocument );
74
+
75
+ // get the urls of document without file group nodes
76
+ preg_match_all ('/ ' . ValidationHelper::URL_REGEX . '/i ' , $ tempDocument ->saveXML (), $ matches );
77
+ $ urls += $ matches [0 ] ?? [];
78
+ return array_unique ($ urls );
79
+ }
80
+
53
81
/**
54
82
* Get the representative file urls and remove file groups to prevent rechecking.
55
83
*
@@ -67,6 +95,7 @@ protected function getFileUrlAndRemoveFileGroups(DOMDocument $document): array
67
95
foreach ($ fileGroups as $ fileGroup ) {
68
96
$ fLocats = $ xpath ->query ('mets:file/mets:FLocat ' , $ fileGroup );
69
97
foreach ($ fLocats as $ fLocat ) {
98
+ // @phpstan-ignore-next-line
70
99
$ url = $ fLocat ->getAttribute ("xlink:href " );
71
100
$ host = parse_url ($ url , PHP_URL_HOST );
72
101
if (!in_array ($ host , $ hosts )) {
@@ -80,32 +109,6 @@ protected function getFileUrlAndRemoveFileGroups(DOMDocument $document): array
80
109
return $ urls ;
81
110
}
82
111
83
- protected function isValid ($ value ): void
84
- {
85
- foreach ($ this ->getDocumentUrls ($ value ) as $ url ) {
86
- if (!$ this ->isExcluded ($ url ) && !$ this ->urlExists ($ url )) {
87
- $ this ->addError ('URL " ' . $ url . '" could not be found. ' , 1737384167 );
88
- }
89
- }
90
- }
91
-
92
- /**
93
- * Retrieve all URLs of the document, except for the URLs of the file groups, where only one URL is returned per file group and host.
94
- *
95
- * @param DOMDocument $document The document
96
- * @return array The array of URLs
97
- */
98
- protected function getDocumentUrls (DOMDocument $ document ): array
99
- {
100
- $ tempDocument = clone $ document ; // do not modify original document
101
- $ urls = $ this ->getFileUrlAndRemoveFileGroups ($ tempDocument );
102
-
103
- // get the urls of document without file group nodes
104
- preg_match_all ('/ ' . ValidationHelper::URL_REGEX . '/i ' , $ tempDocument ->saveXML (), $ matches );
105
- $ urls += $ matches [0 ] ?? [];
106
- return array_unique ($ urls );
107
- }
108
-
109
112
private function urlExists ($ url ): bool
110
113
{
111
114
$ headers = @get_headers ($ url );
0 commit comments