Skip to content

Commit

Permalink
adding basic test
Browse files Browse the repository at this point in the history
  • Loading branch information
jw2249a committed Jan 9, 2024
1 parent 8fbdd1b commit 718c282
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 7 deletions.
5 changes: 5 additions & 0 deletions .#scratch.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
XSym
0040
ab1f61d72a7b8770c9d0b5379dff8a6c
jw@Js-MacBook-Air.local.22816:1704653445

10 changes: 8 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,16 @@ authors = ["Jack R. Williams <contact@jackryanwilliams.com>"]
version = "0.1.1"

[deps]
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
StringDistances = "88034a9c-02f8-509d-84a9-84ec65e18404"

[extras]
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"

[targets]
test = ["Test", "CSV", "Pkg"]
5 changes: 2 additions & 3 deletions src/fastlink/fastlink.jl
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,7 @@ function fastLink(dfA::DataFrame, dfB::DataFrame,

res=ResultMatrix(comparison_levels, (obs_a,obs_b))

fastlink_settings=FastLinkVars(varnames,res,vartypes,stringdist_method,jw_weight,
cut_a,cut_p,upper_case,partials,fuzzy,comparison_levels)
fastlink_settings=FastLinkVars(varnames,res,vartypes,stringdist_method,jw_weight,cut_a,cut_p,upper_case,partials,fuzzy,comparison_levels)



Expand All @@ -223,7 +222,7 @@ function fastLink(dfA::DataFrame, dfB::DataFrame,

# iterate through variables and execute function over them
for i in eachindex(varnames)
@info "Now match $(varnames[i])"
@info "Now matching var $(varnames[i]) using $(match_method[i])"
fastlink_settings.comparison_funs[i](dfA[!,varnames[i]],dfB[!,varnames[i]])
end

Expand Down
2 changes: 1 addition & 1 deletion src/gammas/gammaCKfuzzy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ function gammaCKfuzzy!(results::SubArray,array_2Dindex::Function,
end

query_len = UInt8(min(ncodeunits(query_name),16))
query_masks_lookup = maskify(query_name,query_len,space_char=0x40,max_char=max_char)
query_masks_lookup = maskify(query_name,query_len,space_char=space_char,max_char=max_char)
query_partial = UInt16(1024 ÷ query_len)
candidate_scores = deepcopy(base_candidate_scores)

Expand Down
37 changes: 36 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,43 @@

using FastLink
using Test
import DataFrames: DataFrame
import CSV
import Pkg.Artifacts: @artifact_str

a_fil = @artifact_str "dfA"
b_fil = @artifact_str "dfB"

dfA=CSV.read("$(a_fil)/dfA.csv", DataFrame,
ntasks=1,
pool=true,
missingstring=["", "NA"])
dfB=CSV.read("$(b_fil)/dfB.csv", DataFrame,
ntasks=1,
pool=true,
missingstring=["", "NA"])

@testset "FastLink.jl" begin
1 === 1
varnames = ["firstname","middlename", "lastname","housenum"]
cut_a = [0.92,0.92,0.92,1]
cut_p = [0.88,0.88,0.88,2]
match_method = ["string","string","string","float"]
partials = [true,true,false,true]
fuzzy = [true,false,true,false]
stringdist_method = ["jw","jw","jw",""]
upper_case = [false,false,false,false]
jw_weight = [0.1,0.1,0.1,0.0]

fastLink(dfA,dfB,varnames,
match_method=match_method,
partials=partials,
fuzzy=fuzzy,
upper_case=upper_case,
stringdist_method=stringdist_method,
cut_a=cut_a,
cut_p=cut_p,
jw_weight=jw_weight)()

println("completed")
return true
end

0 comments on commit 718c282

Please sign in to comment.