From cb98549a9d9b7574468520b0c5ed2e3bf56e543e Mon Sep 17 00:00:00 2001 From: Guergana Tzatchkova Date: Mon, 19 Feb 2024 22:03:37 +0700 Subject: [PATCH] Ignore already reviewed mismatches on reupload (#880) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug: T329631 Co-authored-by: Hasan Akgün --- app/Jobs/ImportCSV.php | 83 +++++++++- tests/Feature/ImportCSVTest.php | 261 +++++++++++++++++++++++++++++++- 2 files changed, 334 insertions(+), 10 deletions(-) diff --git a/app/Jobs/ImportCSV.php b/app/Jobs/ImportCSV.php index ef926ce13..b895b4ba7 100644 --- a/app/Jobs/ImportCSV.php +++ b/app/Jobs/ImportCSV.php @@ -46,16 +46,58 @@ public function handle(CSVImportReader $reader) $filepath = Storage::disk('local') ->path('mismatch-files/' . $this->meta->filename); - DB::transaction(function () use ($reader, $filepath) { - $reader->lines($filepath)->each(function ($mismatchLine) { - $mismatch = Mismatch::make($mismatchLine); - if ($mismatch->type == null) { - $mismatch->type = 'statement'; + $mismatch_attrs = (new Mismatch())->getFillable(); + + DB::transaction(function () use ($reader, $filepath, $mismatch_attrs) { + $new_mismatches = []; + $where_clauses = []; + + $mismatches_per_upload_user = DB::table('mismatches') + ->select($mismatch_attrs) + ->join('import_meta', 'mismatches.import_id', '=', 'import_meta.id') + ->where('import_meta.user_id', '=', $this->meta->user->id); + + $reader->lines($filepath)->each(function ($mismatchLine) use ( + &$new_mismatches, + &$where_clauses + ) { + + $new_mismatch = $this->createMismatch($mismatchLine); + $new_mismatches[] = $new_mismatch; + $where_clause = [['review_status', '!=', 'pending']]; + foreach ($new_mismatch->getAttributes() as $key => $attribute) { + if ($key == 'review_status') { + continue; + } + $where_clause[] = [$key, $attribute]; + } + $where_clauses[] = $where_clause; + }); + + $mismatches_per_upload_user->where(function ($query) use ($where_clauses) { + foreach ($where_clauses as $where_clause) { + $query->orWhere(function ($query) use ($where_clause) { + $query->where($where_clause); + }); } - $mismatch->importMeta()->associate($this->meta); - $mismatch->save(); }); + $existing_mismatches = $mismatches_per_upload_user->get(); + + foreach ($new_mismatches as $new_mismatch) { + if ($existing_mismatches->doesntContain(function ($value) use ($new_mismatch) { + $metaAttrs = $new_mismatch->getAttributes(); + foreach ($metaAttrs as $attrKey => $attr) { + if ($attrKey != 'review_status' && $value->{$attrKey} != $attr) { + return false; + } + } + return true; + })) { + $this->saveMismatch($new_mismatch); + } + } + $this->meta->status = 'completed'; $this->meta->save(); }); @@ -64,7 +106,7 @@ public function handle(CSVImportReader $reader) /** * Handle a job failure. * - * @param \Throwable $exception + * @param \Throwable $exception * @return void */ public function failed(Throwable $exception) @@ -78,4 +120,29 @@ public function failed(Throwable $exception) $this->meta->status = 'failed'; $this->meta->save(); } + + + private function createMismatch($mismatch_data) + { + $new_mismatch = Mismatch::make($mismatch_data); + + if ($new_mismatch->type == null) { + $new_mismatch->type = 'statement'; + } + + return $new_mismatch; + } + + /** + * Save mismatch to database + * + * @param \Mismatch $new_mismatch + * @return void + */ + private function saveMismatch($new_mismatch) + { + // if review_status == pending -> save + $new_mismatch->importMeta()->associate($this->meta); + $new_mismatch->save(); + } } diff --git a/tests/Feature/ImportCSVTest.php b/tests/Feature/ImportCSVTest.php index 0b2629598..1012170f3 100644 --- a/tests/Feature/ImportCSVTest.php +++ b/tests/Feature/ImportCSVTest.php @@ -8,15 +8,16 @@ use App\Models\ImportMeta; use App\Jobs\ImportCSV; use App\Models\User; +use App\Models\Mismatch; use Throwable; class ImportCSVTest extends TestCase { use RefreshDatabase; /** - * Ensure import persists mismatches to database + * Ensure import persists mismatches to empty database */ - public function test_creates_mismatches(): void + public function test_creates_mismatches_in_empty_db(): void { $filename = 'creates-mismatches.csv'; $user = User::factory()->uploader()->create(); @@ -58,6 +59,193 @@ public function test_creates_mismatches(): void } } + /** + * Ensure reupload doesn't import duplicated row with reviewed state + */ + public function test_doesnt_import_mismatches_when_row_exists_and_is_reviewed(): void + { + $filename = 'creates-mismatches.csv'; + $user = User::factory()->uploader()->create(); + $reupload_import = ImportMeta::factory()->for($user)->create([ + 'filename' => $filename + ]); + + $already_in_db_import = ImportMeta::factory()->for($user); + + Mismatch::factory() + ->for($already_in_db_import)->create([ + 'statement_guid' => 'Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5', + 'item_id' => 'Q184746', + 'property_id' => 'P569', + 'meta_wikidata_value' => 'Q12138', + 'wikidata_value' => '3 April 1934', + 'external_value' => '1934-04-03', + 'external_url' => 'https://d-nb.info/gnd/119004453', + 'review_status' => 'both', + 'type' => 'statement' + ]); + + $header = config('imports.upload.column_keys'); + $lines = [ + ["Q184746","Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5","P569","3 April 1934" + ,"Q12138","1934-04-03","https://d-nb.info/gnd/119004453","statement"], + ["Q184746","Q184746$7200D1AD-E4E8-401B-8D57-8C823810F11F","P21","Q6581072" + ,"","nonbinary","https://www.imdb.com/name/nm0328762/","statement"] + ]; + + $content = join("\n", array_map(function (array $line) { + return join(',', $line); + }, array_merge([$header], $lines))); + + Storage::fake('local'); + Storage::put( + 'mismatch-files/' . $filename, + $content + ); + + $expected = array_map(function ($row) use ($header) { + return array_combine($header, $row); + }, $lines); + + ImportCSV::dispatch($reupload_import); + + $this->assertDatabaseCount('mismatches', count($lines)); + $this->assertDatabaseHas('mismatches', [ + 'import_id' => $reupload_import->id + ]); + + foreach ($expected as $mismatch) { + $this->assertDatabaseHas('mismatches', $mismatch); + } + } + + /** + * Ensure reupload imports duplicated row if db row has review status = 'pending' + */ + public function test_imports_mismatches_when_row_exists_and_review_status_is_pending(): void + { + $filename = 'creates-mismatches.csv'; + $user = User::factory()->uploader()->create(); + $reupload_import = ImportMeta::factory()->for($user)->create([ + 'filename' => $filename + ]); + + $already_in_db_import = ImportMeta::factory()->for($user)->create(); + + Mismatch::factory() + ->for($already_in_db_import)->create([ + 'statement_guid' => 'Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5', + 'item_id' => 'Q184746', + 'property_id' => 'P569', + 'meta_wikidata_value' => 'Q12138', + 'wikidata_value' => '3 April 1934', + 'external_value' => '1934-04-03', + 'external_url' => 'https://d-nb.info/gnd/119004453', + 'review_status' => 'pending', + 'type' => 'statement' + ]); + + $header = config('imports.upload.column_keys'); + $lines = [ + ["Q184746","Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5","P569","3 April 1934" + ,"Q12138","1934-04-03","https://d-nb.info/gnd/119004453","statement"], + ["Q184746","Q184746$7200D1AD-E4E8-401B-8D57-8C823810F11F","P21","Q6581072" + ,"","nonbinary","https://www.imdb.com/name/nm0328762/","statement"] + ]; + + $content = join("\n", array_map(function (array $line) { + return join(',', $line); + }, array_merge([$header], $lines))); + + Storage::fake('local'); + Storage::put( + 'mismatch-files/' . $filename, + $content + ); + + $expected = array_map(function ($row) use ($header) { + return array_combine($header, $row); + }, $lines); + + ImportCSV::dispatch($reupload_import); + + $this->assertDatabaseCount('mismatches', count($lines) + 1); + $this->assertDatabaseHas('mismatches', [ + 'import_id' => $already_in_db_import->id + ]); + + foreach ($expected as $mismatch) { + $this->assertDatabaseHas('mismatches', $mismatch); + } + } + + /** + * Ensure reupload imports duplicated rows if uploaded by different users + */ + public function test_imports_mismatch_row_from_different_users_only_when_not_pending_in_db(): void + { + $filename = 'creates-mismatches.csv'; + $user1 = User::factory()->uploader()->create(); + $user2 = User::factory()->uploader()->create(); + $reupload_import1 = ImportMeta::factory()->for($user1)->create([ + 'filename' => $filename + ]); + $reupload_import2 = ImportMeta::factory()->for($user2)->create([ + 'filename' => $filename + ]); + + $already_in_db_import = ImportMeta::factory()->for($user1)->create(); + + Mismatch::factory() + ->for($already_in_db_import)->create([ + 'statement_guid' => 'Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5', + 'item_id' => 'Q184746', + 'property_id' => 'P569', + 'meta_wikidata_value' => 'Q12138', + 'wikidata_value' => '3 April 1934', + 'external_value' => '1934-04-03', + 'external_url' => 'https://d-nb.info/gnd/119004453', + 'review_status' => 'both', + 'type' => 'statement' + ]); + + $header = config('imports.upload.column_keys'); + $lines = [ + ["Q184746","Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5","P569","3 April 1934" + ,"Q12138","1934-04-03","https://d-nb.info/gnd/119004453","statement"], + ["Q184746","Q184746$7200D1AD-E4E8-401B-8D57-8C823810F11F","P21","Q6581072" + ,"","nonbinary","https://www.imdb.com/name/nm0328762/","statement"] + ]; + + $content = join("\n", array_map(function (array $line) { + return join(',', $line); + }, array_merge([$header], $lines))); + + Storage::fake('local'); + Storage::put( + 'mismatch-files/' . $filename, + $content + ); + + $expected = array_map(function ($row) use ($header) { + return array_combine($header, $row); + }, $lines); + + ImportCSV::dispatch($reupload_import1); + ImportCSV::dispatch($reupload_import2); + + // total would be 5 but one is not imported because it's already reviewed in db + $this->assertDatabaseCount('mismatches', count($lines) * 2); + $this->assertDatabaseHas('mismatches', [ + 'import_id' => $reupload_import1->id, + 'import_id' => $reupload_import2->id, + ]); + + foreach ($expected as $mismatch) { + $this->assertDatabaseHas('mismatches', $mismatch); + } + } + /** * Ensure no change is commited to Database in case of error */ @@ -103,4 +291,73 @@ public function test_rolls_back_on_failure(): void ]); } } + + /** + * Ensure reupload doesn't import duplicated row with reviewed state and ignoring type column if it's empty + */ + public function test_doesnt_import_mismatches_when_row_exists_and_is_reviewed_and_type_is_empty_in_csv(): void + { + $filename = 'creates-mismatches.csv'; + $user1 = User::factory()->uploader()->create(); + $user2 = User::factory()->uploader()->create(); + $reupload_import1 = ImportMeta::factory()->for($user1)->create([ + 'filename' => $filename + ]); + $reupload_import2 = ImportMeta::factory()->for($user2)->create([ + 'filename' => $filename + ]); + + $already_in_db_import = ImportMeta::factory()->for($user1)->create(); + + Mismatch::factory() + ->for($already_in_db_import)->create([ + 'statement_guid' => 'Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5', + 'item_id' => 'Q184746', + 'property_id' => 'P569', + 'meta_wikidata_value' => 'Q12138', + 'wikidata_value' => '3 April 1934', + 'external_value' => '1934-04-03', + 'external_url' => 'https://d-nb.info/gnd/119004453', + 'review_status' => 'both', + 'type' => 'statement' + ]); + + $header = config('imports.upload.column_keys'); + $lines = [ + ["Q184746","Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5","P569","3 April 1934" + ,"Q12138","1934-04-03","https://d-nb.info/gnd/119004453",""], + ["Q184746","Q184746$7200D1AD-E4E8-401B-8D57-8C823810F11F","P21","Q6581072" + ,"","nonbinary","https://www.imdb.com/name/nm0328762/",""] + ]; + + $content = join("\n", array_map(function (array $line) { + return join(',', $line); + }, array_merge([$header], $lines))); + + Storage::fake('local'); + Storage::put( + 'mismatch-files/' . $filename, + $content + ); + + $expected = array_map(function ($row) use ($header) { + $expected_line = array_combine($header, $row); + $expected_line['type'] = 'statement'; + return $expected_line; + }, $lines); + + ImportCSV::dispatch($reupload_import1); + ImportCSV::dispatch($reupload_import2); + + // total would be 5 but one is not imported because it's already reviewed in db + $this->assertDatabaseCount('mismatches', count($lines) * 2); + $this->assertDatabaseHas('mismatches', [ + 'import_id' => $reupload_import1->id, + 'import_id' => $reupload_import2->id, + ]); + + foreach ($expected as $mismatch) { + $this->assertDatabaseHas('mismatches', $mismatch); + } + } }