Skip to content

Commit 1b2e64c

Browse files
Merge pull request #413 from openzim/no_crash_on_corrupted
[CHECK] Do not continue with other checks if integrity check fails.
2 parents d6c6a1b + 875e09a commit 1b2e64c

File tree

4 files changed

+78
-56
lines changed

4 files changed

+78
-56
lines changed

src/zimcheck/checks.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ void test_checksum(zim::Archive& archive, ErrorLogger& reporter) {
240240
}
241241
}
242242

243-
void test_integrity(const std::string& filename, ErrorLogger& reporter) {
243+
bool test_integrity(const std::string& filename, ErrorLogger& reporter) {
244244
reporter.infoMsg("[INFO] Verifying ZIM-archive structure integrity...");
245245
zim::IntegrityCheckList checks;
246246
checks.set(); // enable all checks (including checksum)
@@ -249,6 +249,7 @@ void test_integrity(const std::string& filename, ErrorLogger& reporter) {
249249
if (!result) {
250250
reporter.infoMsg(" [ERROR] ZIM file's low level structure is invalid");
251251
}
252+
return result;
252253
}
253254

254255

src/zimcheck/checks.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ class ErrorLogger {
109109

110110

111111
void test_checksum(zim::Archive& archive, ErrorLogger& reporter);
112-
void test_integrity(const std::string& filename, ErrorLogger& reporter);
112+
bool test_integrity(const std::string& filename, ErrorLogger& reporter);
113113
void test_metadata(const zim::Archive& archive, ErrorLogger& reporter);
114114
void test_favicon(const zim::Archive& archive, ErrorLogger& reporter);
115115
void test_mainpage(const zim::Archive& archive, ErrorLogger& reporter);

src/zimcheck/zimcheck.cpp

Lines changed: 59 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -266,70 +266,75 @@ int zimcheck (const std::vector<const char*>& args)
266266
error.infoMsg("[INFO] Zimcheck version is " + std::string(VERSION));
267267

268268
//Test 0: Low-level ZIM-file structure integrity checks
269-
if(enabled_tests.isEnabled(TestType::INTEGRITY))
270-
test_integrity(filename, error);
269+
bool should_run_full_test = true;
270+
if(enabled_tests.isEnabled(TestType::INTEGRITY)) {
271+
should_run_full_test = test_integrity(filename, error);
272+
} else {
273+
error.infoMsg("[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.");
274+
}
275+
271276

272-
// Does it make sense to do the other checks if the integrity
273-
// check fails?
274-
zim::Archive archive( filename );
275-
error.addInfo("file_uuid", stringify(archive.getUuid()));
277+
if (should_run_full_test) {
278+
zim::Archive archive( filename );
279+
error.addInfo("file_uuid", stringify(archive.getUuid()));
276280

277-
//Test 1: Internal Checksum
278-
if(enabled_tests.isEnabled(TestType::CHECKSUM)) {
279-
if ( enabled_tests.isEnabled(TestType::INTEGRITY) ) {
280-
error.infoMsg(
281-
"[INFO] Avoiding redundant checksum test"
282-
" (already performed by the integrity check)."
283-
);
284-
} else {
285-
test_checksum(archive, error);
281+
//Test 1: Internal Checksum
282+
if(enabled_tests.isEnabled(TestType::CHECKSUM)) {
283+
if ( enabled_tests.isEnabled(TestType::INTEGRITY) ) {
284+
error.infoMsg(
285+
"[INFO] Avoiding redundant checksum test"
286+
" (already performed by the integrity check)."
287+
);
288+
} else {
289+
test_checksum(archive, error);
290+
}
286291
}
287-
}
288292

289-
//Test 2: Metadata Entries:
290-
//The file is searched for the compulsory metadata entries.
291-
if(enabled_tests.isEnabled(TestType::METADATA))
292-
test_metadata(archive, error);
293+
//Test 2: Metadata Entries:
294+
//The file is searched for the compulsory metadata entries.
295+
if(enabled_tests.isEnabled(TestType::METADATA))
296+
test_metadata(archive, error);
293297

294-
//Test 3: Test for Favicon.
295-
if(enabled_tests.isEnabled(TestType::FAVICON))
296-
test_favicon(archive, error);
298+
//Test 3: Test for Favicon.
299+
if(enabled_tests.isEnabled(TestType::FAVICON))
300+
test_favicon(archive, error);
297301

298302

299-
//Test 4: Main Page Entry
300-
if(enabled_tests.isEnabled(TestType::MAIN_PAGE))
301-
test_mainpage(archive, error);
303+
//Test 4: Main Page Entry
304+
if(enabled_tests.isEnabled(TestType::MAIN_PAGE))
305+
test_mainpage(archive, error);
302306

303-
/* Now we want to avoid to loop on the tests but on the article.
304-
*
305-
* If we loop of the tests we will have :
306-
*
307-
* for (test: tests) {
308-
* for(article: articles) {
309-
* data = article->getData();
310-
* ...
311-
* }
312-
* }
313-
*
314-
* And so we will get several the data of an article (and so decompression and so).
315-
* By looping on the articles first, we have :
316-
*
317-
* for (article: articles) {
318-
* data = article->getData();
319-
* for (test: tests) {
320-
* ...
321-
* }
322-
* }
323-
*/
307+
/* Now we want to avoid to loop on the tests but on the article.
308+
*
309+
* If we loop of the tests we will have :
310+
*
311+
* for (test: tests) {
312+
* for(article: articles) {
313+
* data = article->getData();
314+
* ...
315+
* }
316+
* }
317+
*
318+
* And so we will get several the data of an article (and so decompression and so).
319+
* By looping on the articles first, we have :
320+
*
321+
* for (article: articles) {
322+
* data = article->getData();
323+
* for (test: tests) {
324+
* ...
325+
* }
326+
* }
327+
*/
324328

325-
if ( enabled_tests.isEnabled(TestType::URL_INTERNAL) ||
326-
enabled_tests.isEnabled(TestType::URL_EXTERNAL) ||
327-
enabled_tests.isEnabled(TestType::REDUNDANT) ||
328-
enabled_tests.isEnabled(TestType::EMPTY) )
329-
test_articles(archive, error, progress, enabled_tests, thread_count);
329+
if ( enabled_tests.isEnabled(TestType::URL_INTERNAL) ||
330+
enabled_tests.isEnabled(TestType::URL_EXTERNAL) ||
331+
enabled_tests.isEnabled(TestType::REDUNDANT) ||
332+
enabled_tests.isEnabled(TestType::EMPTY) )
333+
test_articles(archive, error, progress, enabled_tests, thread_count);
330334

331-
if ( enabled_tests.isEnabled(TestType::REDIRECT))
332-
test_redirect_loop(archive, error);
335+
if ( enabled_tests.isEnabled(TestType::REDIRECT))
336+
test_redirect_loop(archive, error);
337+
}
333338

334339
const bool overallStatus = error.overallStatus();
335340
error.addInfo("status", overallStatus);

test/zimcheck-test.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ TEST(zimcheck, checksum_goodzimfile)
274274
const std::string expected_output(
275275
"[INFO] Checking zim file data/zimfiles/good.zim" "\n"
276276
"[INFO] Zimcheck version is " VERSION "\n"
277+
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
277278
"[INFO] Verifying Internal Checksum..." "\n"
278279
"[INFO] Overall Test Status: Pass" "\n"
279280
"[INFO] Total time taken by zimcheck: <3 seconds." "\n"
@@ -293,6 +294,7 @@ TEST(zimcheck, metadata_goodzimfile)
293294
const std::string expected_output(
294295
"[INFO] Checking zim file data/zimfiles/good.zim" "\n"
295296
"[INFO] Zimcheck version is " VERSION "\n"
297+
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
296298
"[INFO] Checking metadata..." "\n"
297299
"[INFO] Overall Test Status: Pass" "\n"
298300
"[INFO] Total time taken by zimcheck: <3 seconds." "\n"
@@ -312,6 +314,7 @@ TEST(zimcheck, favicon_goodzimfile)
312314
const std::string expected_output(
313315
"[INFO] Checking zim file data/zimfiles/good.zim" "\n"
314316
"[INFO] Zimcheck version is " VERSION "\n"
317+
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
315318
"[INFO] Searching for Favicon..." "\n"
316319
"[INFO] Overall Test Status: Pass" "\n"
317320
"[INFO] Total time taken by zimcheck: <3 seconds." "\n"
@@ -331,6 +334,7 @@ TEST(zimcheck, mainpage_goodzimfile)
331334
const std::string expected_output(
332335
"[INFO] Checking zim file data/zimfiles/good.zim" "\n"
333336
"[INFO] Zimcheck version is " VERSION "\n"
337+
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
334338
"[INFO] Searching for main page..." "\n"
335339
"[INFO] Overall Test Status: Pass" "\n"
336340
"[INFO] Total time taken by zimcheck: <3 seconds." "\n"
@@ -350,6 +354,7 @@ TEST(zimcheck, article_content_goodzimfile)
350354
const std::string expected_output(
351355
"[INFO] Checking zim file data/zimfiles/good.zim" "\n"
352356
"[INFO] Zimcheck version is " VERSION "\n"
357+
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
353358
"[INFO] Verifying Articles' content..." "\n"
354359
"[INFO] Overall Test Status: Pass" "\n"
355360
"[INFO] Total time taken by zimcheck: <3 seconds." "\n"
@@ -373,6 +378,7 @@ TEST(zimcheck, redundant_articles_goodzimfile)
373378
const std::string expected_output(
374379
"[INFO] Checking zim file data/zimfiles/good.zim" "\n"
375380
"[INFO] Zimcheck version is " VERSION "\n"
381+
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
376382
"[INFO] Verifying Articles' content..." "\n"
377383
"[INFO] Searching for redundant articles..." "\n"
378384
" Verifying Similar Articles for redundancies..." "\n"
@@ -394,6 +400,7 @@ TEST(zimcheck, redirect_loop_goodzimfile)
394400
const std::string expected_output(
395401
"[INFO] Checking zim file data/zimfiles/good.zim" "\n"
396402
"[INFO] Zimcheck version is " VERSION "\n"
403+
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
397404
"[INFO] Checking for redirect loops..." "\n"
398405
"[INFO] Overall Test Status: Pass" "\n"
399406
"[INFO] Total time taken by zimcheck: <3 seconds." "\n"
@@ -505,6 +512,7 @@ TEST(zimcheck, bad_checksum)
505512
const std::string expected_output(
506513
"[INFO] Checking zim file data/zimfiles/bad_checksum.zim" "\n"
507514
"[INFO] Zimcheck version is " VERSION "\n"
515+
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
508516
"[INFO] Verifying Internal Checksum..." "\n"
509517
" [ERROR] Wrong Checksum in ZIM archive" "\n"
510518
"[ERROR] Invalid checksum:" "\n"
@@ -528,6 +536,7 @@ TEST(zimcheck, metadata_poorzimfile)
528536
const std::string expected_stdout(
529537
"[INFO] Checking zim file data/zimfiles/poor.zim" "\n"
530538
"[INFO] Zimcheck version is " VERSION "\n"
539+
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
531540
"[INFO] Checking metadata..." "\n"
532541
"[ERROR] Metadata errors:" "\n"
533542
" Missing mandatory metadata: Title" "\n"
@@ -553,6 +562,7 @@ TEST(zimcheck, favicon_poorzimfile)
553562
const std::string expected_stdout(
554563
"[INFO] Checking zim file data/zimfiles/poor.zim" "\n"
555564
"[INFO] Zimcheck version is " VERSION "\n"
565+
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
556566
"[INFO] Searching for Favicon..." "\n"
557567
"[ERROR] Favicon:" "\n"
558568
" Favicon is missing" "\n"
@@ -574,6 +584,7 @@ TEST(zimcheck, mainpage_poorzimfile)
574584
const std::string expected_stdout(
575585
"[INFO] Checking zim file data/zimfiles/poor.zim" "\n"
576586
"[INFO] Zimcheck version is " VERSION "\n"
587+
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
577588
"[INFO] Searching for main page..." "\n"
578589
"[ERROR] Missing mainpage:" "\n"
579590
" Main Page Index stored in Archive Header: 4294967295" "\n"
@@ -595,6 +606,7 @@ TEST(zimcheck, empty_items_poorzimfile)
595606
const std::string expected_stdout(
596607
"[INFO] Checking zim file data/zimfiles/poor.zim" "\n"
597608
"[INFO] Zimcheck version is " VERSION "\n"
609+
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
598610
"[INFO] Verifying Articles' content..." "\n"
599611
"[ERROR] Empty articles:" "\n"
600612
" Entry empty.html is empty" "\n"
@@ -616,6 +628,7 @@ TEST(zimcheck, internal_url_check_poorzimfile)
616628
const std::string expected_stdout(
617629
"[INFO] Checking zim file data/zimfiles/poor.zim" "\n"
618630
"[INFO] Zimcheck version is " VERSION "\n"
631+
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
619632
"[INFO] Verifying Articles' content..." "\n"
620633
"[ERROR] Invalid internal links found:" "\n"
621634
" The following links:" "\n"
@@ -642,6 +655,7 @@ TEST(zimcheck, external_url_check_poorzimfile)
642655
const std::string expected_stdout(
643656
"[INFO] Checking zim file data/zimfiles/poor.zim" "\n"
644657
"[INFO] Zimcheck version is " VERSION "\n"
658+
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
645659
"[INFO] Verifying Articles' content..." "\n"
646660
"[ERROR] Invalid external links found:" "\n"
647661
" http://a.io/pic.png is an external dependence in article external_image_http.html" "\n"
@@ -665,6 +679,7 @@ TEST(zimcheck, redundant_poorzimfile)
665679
const std::string expected_stdout(
666680
"[INFO] Checking zim file data/zimfiles/poor.zim" "\n"
667681
"[INFO] Zimcheck version is " VERSION "\n"
682+
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
668683
"[INFO] Verifying Articles' content..." "\n"
669684
"[INFO] Searching for redundant articles..." "\n"
670685
" Verifying Similar Articles for redundancies..." "\n"
@@ -688,6 +703,7 @@ TEST(zimcheck, redirect_loop_poorzimfile)
688703
const std::string expected_output(
689704
"[INFO] Checking zim file data/zimfiles/poor.zim" "\n"
690705
"[INFO] Zimcheck version is " VERSION "\n"
706+
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
691707
"[INFO] Checking for redirect loops..." "\n"
692708
"[ERROR] Redirect loop(s) exist:" "\n"
693709
" Redirect loop exists from entry redirect_loop.html" "\n"

0 commit comments

Comments
 (0)