Skip to content

Commit

Permalink
Ignore already reviewed mismatches on reupload (#880)
Browse files Browse the repository at this point in the history
Bug: T329631

Co-authored-by: Hasan Akgün <hasanakg@outlook.com>
  • Loading branch information
guergana and hasanakg authored Feb 19, 2024
1 parent 2c4e64b commit cb98549
Show file tree
Hide file tree
Showing 2 changed files with 334 additions and 10 deletions.
83 changes: 75 additions & 8 deletions app/Jobs/ImportCSV.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,58 @@ public function handle(CSVImportReader $reader)
$filepath = Storage::disk('local')
->path('mismatch-files/' . $this->meta->filename);

DB::transaction(function () use ($reader, $filepath) {
$reader->lines($filepath)->each(function ($mismatchLine) {
$mismatch = Mismatch::make($mismatchLine);
if ($mismatch->type == null) {
$mismatch->type = 'statement';
$mismatch_attrs = (new Mismatch())->getFillable();

DB::transaction(function () use ($reader, $filepath, $mismatch_attrs) {
$new_mismatches = [];
$where_clauses = [];

$mismatches_per_upload_user = DB::table('mismatches')
->select($mismatch_attrs)
->join('import_meta', 'mismatches.import_id', '=', 'import_meta.id')
->where('import_meta.user_id', '=', $this->meta->user->id);

$reader->lines($filepath)->each(function ($mismatchLine) use (
&$new_mismatches,
&$where_clauses
) {

$new_mismatch = $this->createMismatch($mismatchLine);
$new_mismatches[] = $new_mismatch;
$where_clause = [['review_status', '!=', 'pending']];
foreach ($new_mismatch->getAttributes() as $key => $attribute) {
if ($key == 'review_status') {
continue;
}
$where_clause[] = [$key, $attribute];
}
$where_clauses[] = $where_clause;
});

$mismatches_per_upload_user->where(function ($query) use ($where_clauses) {
foreach ($where_clauses as $where_clause) {
$query->orWhere(function ($query) use ($where_clause) {
$query->where($where_clause);
});
}
$mismatch->importMeta()->associate($this->meta);
$mismatch->save();
});

$existing_mismatches = $mismatches_per_upload_user->get();

foreach ($new_mismatches as $new_mismatch) {
if ($existing_mismatches->doesntContain(function ($value) use ($new_mismatch) {
$metaAttrs = $new_mismatch->getAttributes();
foreach ($metaAttrs as $attrKey => $attr) {
if ($attrKey != 'review_status' && $value->{$attrKey} != $attr) {
return false;
}
}
return true;
})) {
$this->saveMismatch($new_mismatch);
}
}

$this->meta->status = 'completed';
$this->meta->save();
});
Expand All @@ -64,7 +106,7 @@ public function handle(CSVImportReader $reader)
/**
* Handle a job failure.
*
* @param \Throwable $exception
* @param \Throwable $exception
* @return void
*/
public function failed(Throwable $exception)
Expand All @@ -78,4 +120,29 @@ public function failed(Throwable $exception)
$this->meta->status = 'failed';
$this->meta->save();
}


private function createMismatch($mismatch_data)
{
$new_mismatch = Mismatch::make($mismatch_data);

if ($new_mismatch->type == null) {
$new_mismatch->type = 'statement';
}

return $new_mismatch;
}

/**
* Save mismatch to database
*
* @param \Mismatch $new_mismatch
* @return void
*/
private function saveMismatch($new_mismatch)
{
// if review_status == pending -> save
$new_mismatch->importMeta()->associate($this->meta);
$new_mismatch->save();
}
}
261 changes: 259 additions & 2 deletions tests/Feature/ImportCSVTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,16 @@
use App\Models\ImportMeta;
use App\Jobs\ImportCSV;
use App\Models\User;
use App\Models\Mismatch;
use Throwable;

class ImportCSVTest extends TestCase
{
use RefreshDatabase;
/**
* Ensure import persists mismatches to database
* Ensure import persists mismatches to empty database
*/
public function test_creates_mismatches(): void
public function test_creates_mismatches_in_empty_db(): void
{
$filename = 'creates-mismatches.csv';
$user = User::factory()->uploader()->create();
Expand Down Expand Up @@ -58,6 +59,193 @@ public function test_creates_mismatches(): void
}
}

/**
* Ensure reupload doesn't import duplicated row with reviewed state
*/
public function test_doesnt_import_mismatches_when_row_exists_and_is_reviewed(): void
{
$filename = 'creates-mismatches.csv';
$user = User::factory()->uploader()->create();
$reupload_import = ImportMeta::factory()->for($user)->create([
'filename' => $filename
]);

$already_in_db_import = ImportMeta::factory()->for($user);

Mismatch::factory()
->for($already_in_db_import)->create([
'statement_guid' => 'Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5',
'item_id' => 'Q184746',
'property_id' => 'P569',
'meta_wikidata_value' => 'Q12138',
'wikidata_value' => '3 April 1934',
'external_value' => '1934-04-03',
'external_url' => 'https://d-nb.info/gnd/119004453',
'review_status' => 'both',
'type' => 'statement'
]);

$header = config('imports.upload.column_keys');
$lines = [
["Q184746","Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5","P569","3 April 1934"
,"Q12138","1934-04-03","https://d-nb.info/gnd/119004453","statement"],
["Q184746","Q184746$7200D1AD-E4E8-401B-8D57-8C823810F11F","P21","Q6581072"
,"","nonbinary","https://www.imdb.com/name/nm0328762/","statement"]
];

$content = join("\n", array_map(function (array $line) {
return join(',', $line);
}, array_merge([$header], $lines)));

Storage::fake('local');
Storage::put(
'mismatch-files/' . $filename,
$content
);

$expected = array_map(function ($row) use ($header) {
return array_combine($header, $row);
}, $lines);

ImportCSV::dispatch($reupload_import);

$this->assertDatabaseCount('mismatches', count($lines));
$this->assertDatabaseHas('mismatches', [
'import_id' => $reupload_import->id
]);

foreach ($expected as $mismatch) {
$this->assertDatabaseHas('mismatches', $mismatch);
}
}

/**
* Ensure reupload imports duplicated row if db row has review status = 'pending'
*/
public function test_imports_mismatches_when_row_exists_and_review_status_is_pending(): void
{
$filename = 'creates-mismatches.csv';
$user = User::factory()->uploader()->create();
$reupload_import = ImportMeta::factory()->for($user)->create([
'filename' => $filename
]);

$already_in_db_import = ImportMeta::factory()->for($user)->create();

Mismatch::factory()
->for($already_in_db_import)->create([
'statement_guid' => 'Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5',
'item_id' => 'Q184746',
'property_id' => 'P569',
'meta_wikidata_value' => 'Q12138',
'wikidata_value' => '3 April 1934',
'external_value' => '1934-04-03',
'external_url' => 'https://d-nb.info/gnd/119004453',
'review_status' => 'pending',
'type' => 'statement'
]);

$header = config('imports.upload.column_keys');
$lines = [
["Q184746","Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5","P569","3 April 1934"
,"Q12138","1934-04-03","https://d-nb.info/gnd/119004453","statement"],
["Q184746","Q184746$7200D1AD-E4E8-401B-8D57-8C823810F11F","P21","Q6581072"
,"","nonbinary","https://www.imdb.com/name/nm0328762/","statement"]
];

$content = join("\n", array_map(function (array $line) {
return join(',', $line);
}, array_merge([$header], $lines)));

Storage::fake('local');
Storage::put(
'mismatch-files/' . $filename,
$content
);

$expected = array_map(function ($row) use ($header) {
return array_combine($header, $row);
}, $lines);

ImportCSV::dispatch($reupload_import);

$this->assertDatabaseCount('mismatches', count($lines) + 1);
$this->assertDatabaseHas('mismatches', [
'import_id' => $already_in_db_import->id
]);

foreach ($expected as $mismatch) {
$this->assertDatabaseHas('mismatches', $mismatch);
}
}

/**
* Ensure reupload imports duplicated rows if uploaded by different users
*/
public function test_imports_mismatch_row_from_different_users_only_when_not_pending_in_db(): void
{
$filename = 'creates-mismatches.csv';
$user1 = User::factory()->uploader()->create();
$user2 = User::factory()->uploader()->create();
$reupload_import1 = ImportMeta::factory()->for($user1)->create([
'filename' => $filename
]);
$reupload_import2 = ImportMeta::factory()->for($user2)->create([
'filename' => $filename
]);

$already_in_db_import = ImportMeta::factory()->for($user1)->create();

Mismatch::factory()
->for($already_in_db_import)->create([
'statement_guid' => 'Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5',
'item_id' => 'Q184746',
'property_id' => 'P569',
'meta_wikidata_value' => 'Q12138',
'wikidata_value' => '3 April 1934',
'external_value' => '1934-04-03',
'external_url' => 'https://d-nb.info/gnd/119004453',
'review_status' => 'both',
'type' => 'statement'
]);

$header = config('imports.upload.column_keys');
$lines = [
["Q184746","Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5","P569","3 April 1934"
,"Q12138","1934-04-03","https://d-nb.info/gnd/119004453","statement"],
["Q184746","Q184746$7200D1AD-E4E8-401B-8D57-8C823810F11F","P21","Q6581072"
,"","nonbinary","https://www.imdb.com/name/nm0328762/","statement"]
];

$content = join("\n", array_map(function (array $line) {
return join(',', $line);
}, array_merge([$header], $lines)));

Storage::fake('local');
Storage::put(
'mismatch-files/' . $filename,
$content
);

$expected = array_map(function ($row) use ($header) {
return array_combine($header, $row);
}, $lines);

ImportCSV::dispatch($reupload_import1);
ImportCSV::dispatch($reupload_import2);

// total would be 5 but one is not imported because it's already reviewed in db
$this->assertDatabaseCount('mismatches', count($lines) * 2);
$this->assertDatabaseHas('mismatches', [
'import_id' => $reupload_import1->id,
'import_id' => $reupload_import2->id,
]);

foreach ($expected as $mismatch) {
$this->assertDatabaseHas('mismatches', $mismatch);
}
}

/**
* Ensure no change is commited to Database in case of error
*/
Expand Down Expand Up @@ -103,4 +291,73 @@ public function test_rolls_back_on_failure(): void
]);
}
}

/**
* Ensure reupload doesn't import duplicated row with reviewed state and ignoring type column if it's empty
*/
public function test_doesnt_import_mismatches_when_row_exists_and_is_reviewed_and_type_is_empty_in_csv(): void
{
$filename = 'creates-mismatches.csv';
$user1 = User::factory()->uploader()->create();
$user2 = User::factory()->uploader()->create();
$reupload_import1 = ImportMeta::factory()->for($user1)->create([
'filename' => $filename
]);
$reupload_import2 = ImportMeta::factory()->for($user2)->create([
'filename' => $filename
]);

$already_in_db_import = ImportMeta::factory()->for($user1)->create();

Mismatch::factory()
->for($already_in_db_import)->create([
'statement_guid' => 'Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5',
'item_id' => 'Q184746',
'property_id' => 'P569',
'meta_wikidata_value' => 'Q12138',
'wikidata_value' => '3 April 1934',
'external_value' => '1934-04-03',
'external_url' => 'https://d-nb.info/gnd/119004453',
'review_status' => 'both',
'type' => 'statement'
]);

$header = config('imports.upload.column_keys');
$lines = [
["Q184746","Q184746$7814880A-A6EF-40EC-885E-F46DD58C8DC5","P569","3 April 1934"
,"Q12138","1934-04-03","https://d-nb.info/gnd/119004453",""],
["Q184746","Q184746$7200D1AD-E4E8-401B-8D57-8C823810F11F","P21","Q6581072"
,"","nonbinary","https://www.imdb.com/name/nm0328762/",""]
];

$content = join("\n", array_map(function (array $line) {
return join(',', $line);
}, array_merge([$header], $lines)));

Storage::fake('local');
Storage::put(
'mismatch-files/' . $filename,
$content
);

$expected = array_map(function ($row) use ($header) {
$expected_line = array_combine($header, $row);
$expected_line['type'] = 'statement';
return $expected_line;
}, $lines);

ImportCSV::dispatch($reupload_import1);
ImportCSV::dispatch($reupload_import2);

// total would be 5 but one is not imported because it's already reviewed in db
$this->assertDatabaseCount('mismatches', count($lines) * 2);
$this->assertDatabaseHas('mismatches', [
'import_id' => $reupload_import1->id,
'import_id' => $reupload_import2->id,
]);

foreach ($expected as $mismatch) {
$this->assertDatabaseHas('mismatches', $mismatch);
}
}
}

0 comments on commit cb98549

Please sign in to comment.