From 440f9f89a05de5f3a3176236d2870d93083a7fb5 Mon Sep 17 00:00:00 2001 From: jinseob kim Date: Sun, 5 Jan 2025 18:33:51 +0900 Subject: [PATCH] s --- docs/KSIC2025/index.html | 766 ++++++++++++++++++++++++++++++++++++--- docs/KSIC2025/index.qmd | 118 +++++- 2 files changed, 836 insertions(+), 48 deletions(-) diff --git a/docs/KSIC2025/index.html b/docs/KSIC2025/index.html index ffa6760..a081cad 100644 --- a/docs/KSIC2025/index.html +++ b/docs/KSIC2025/index.html @@ -1087,7 +1087,679 @@

jskm::svyjskm(s1, pval = T, table = T, design = dpbc) -

log-rank test(X), survey rank test(O)

+

log-rank test(X), survey rank test(O, survey::svyranktest)

+ + +
+

MatchIt

+

2그룹 PS/IPTW에서 가장 많이 쓰는 패키지

+
+
library(MatchIt)
+data("lalonde", package = "MatchIt")
+
+table(lalonde$treat)
+
+

+  0   1 
+429 185 
+
+
#1:1 NN matching w/ replacement on a logistic regression PS
+m.out <- matchit(treat ~ age + educ + race + married + 
+                   nodegree + re74 + re75, data = lalonde, distance = "glm", method = "nearest", ratio = 1, caliper = NULL)
+m.out
+
+
A matchit object
+ - method: 1:1 nearest neighbor matching without replacement
+ - distance: Propensity score
+             - estimated with logistic regression
+ - number of obs.: 614 (original), 370 (matched)
+ - target estimand: ATT
+ - covariates: age, educ, race, married, nodegree, re74, re75
+
+
+ +
+
+

Matching data

+
+
mdata <- match.data(m.out)
+rmarkdown::paged_table(mdata[order(mdata$subclass), c("treat", "distance", "subclass")])
+
+
+ +
+
+
+
+
+

Matching 결과 확인

+
+
summary(m.out)
+
+

+Call:
+matchit(formula = treat ~ age + educ + race + married + nodegree + 
+    re74 + re75, data = lalonde, method = "nearest", distance = "glm", 
+    caliper = NULL, ratio = 1)
+
+Summary of Balance for All Data:
+           Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
+distance          0.5774        0.1822          1.7941     0.9211    0.3774
+age              25.8162       28.0303         -0.3094     0.4400    0.0813
+educ             10.3459       10.2354          0.0550     0.4959    0.0347
+raceblack         0.8432        0.2028          1.7615          .    0.6404
+racehispan        0.0595        0.1422         -0.3498          .    0.0827
+racewhite         0.0973        0.6550         -1.8819          .    0.5577
+married           0.1892        0.5128         -0.8263          .    0.3236
+nodegree          0.7081        0.5967          0.2450          .    0.1114
+re74           2095.5737     5619.2365         -0.7211     0.5181    0.2248
+re75           1532.0553     2466.4844         -0.2903     0.9563    0.1342
+           eCDF Max
+distance     0.6444
+age          0.1577
+educ         0.1114
+raceblack    0.6404
+racehispan   0.0827
+racewhite    0.5577
+married      0.3236
+nodegree     0.1114
+re74         0.4470
+re75         0.2876
+
+Summary of Balance for Matched Data:
+           Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
+distance          0.5774        0.3629          0.9739     0.7566    0.1321
+age              25.8162       25.3027          0.0718     0.4568    0.0847
+educ             10.3459       10.6054         -0.1290     0.5721    0.0239
+raceblack         0.8432        0.4703          1.0259          .    0.3730
+racehispan        0.0595        0.2162         -0.6629          .    0.1568
+racewhite         0.0973        0.3135         -0.7296          .    0.2162
+married           0.1892        0.2108         -0.0552          .    0.0216
+nodegree          0.7081        0.6378          0.1546          .    0.0703
+re74           2095.5737     2342.1076         -0.0505     1.3289    0.0469
+re75           1532.0553     1614.7451         -0.0257     1.4956    0.0452
+           eCDF Max Std. Pair Dist.
+distance     0.4216          0.9740
+age          0.2541          1.3938
+educ         0.0757          1.2474
+raceblack    0.3730          1.0259
+racehispan   0.1568          1.0743
+racewhite    0.2162          0.8390
+married      0.0216          0.8281
+nodegree     0.0703          1.0106
+re74         0.2757          0.7965
+re75         0.2054          0.7381
+
+Sample Sizes:
+          Control Treated
+All           429     185
+Matched       185     185
+Unmatched     244       0
+Discarded       0       0
+
+
+
+
+

jstable::CreateTableOneJS

+
+
jstable::CreateTableOneJS(vars = names(m.out$X), strata = "treat", data = lalonde, smd = T)$table %>% kable
+
+ ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
level01ptestSMDsig
n429185NA
age28.03 ± 10.7925.82 ± 7.160.0030.242**
educ10.24 ± 2.8610.35 ± 2.010.5850.045
race (%)black87 (20.3)156 (84.3)<0.0011.701**
hispan61 (14.2)11 ( 5.9)NA
white281 (65.5)18 ( 9.7)NA
married0.51 ± 0.500.19 ± 0.39<0.0010.719**
nodegree0.60 ± 0.490.71 ± 0.460.0070.235**
re745619.24 ± 6788.752095.57 ± 4886.62<0.0010.596**
re752466.48 ± 3292.001532.06 ± 3219.250.0010.287**
+
+
+
+
+

Mathing data

+
+
jstable::CreateTableOneJS(vars = names(m.out$X), strata = "treat", data = mdata, smd = T)$table %>% kable
+
+ ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
level01ptestSMDsig
n185185NA
age25.30 ± 10.5925.82 ± 7.160.5850.057
educ10.61 ± 2.6610.35 ± 2.010.2900.110
race (%)black87 (47.0)156 (84.3)<0.0010.855**
hispan40 (21.6)11 ( 5.9)NA
white58 (31.4)18 ( 9.7)NA
married0.21 ± 0.410.19 ± 0.390.6040.054
nodegree0.64 ± 0.480.71 ± 0.460.1510.150
re742342.11 ± 4238.982095.57 ± 4886.620.6050.054
re751614.75 ± 2632.351532.06 ± 3219.250.7870.028
+
+
+
+
+

Caliper

+
+
m.out.caliper <- matchit(treat ~ age + educ + race + married + 
+                   nodegree + re74 + re75, data = lalonde, method = "nearest", ratio = 1, caliper = 0.1)
+jstable::CreateTableOneJS(vars = names(m.out$X), strata = "treat", data = match.data(m.out.caliper), smd = T)$table %>% kable
+
+ ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
level01ptestSMDsig
n111111NA
age26.14 ± 10.8825.95 ± 6.580.8700.022
educ10.49 ± 2.6610.49 ± 2.041.000<0.001
race (%)black80 (72.1)82 (73.9)0.9540.041
hispan12 (10.8)11 ( 9.9)NA
white19 (17.1)18 (16.2)NA
married0.24 ± 0.430.21 ± 0.410.5230.086
nodegree0.61 ± 0.490.65 ± 0.480.5800.074
re742408.56 ± 4436.772667.11 ± 5732.760.7070.050
re751800.87 ± 2917.711811.30 ± 3699.160.9810.003
+
+
+
+
+

IPTW: svyCreateTableOneJS

+
+
library(survey)
+lalonde$wt <- ifelse(lalonde$treat == 1, 1/m.out$distance, 1/(1 - m.out$distance))
+design.lalonde <- svydesign(ids=~1, strata=NULL, weights=~wt, data = lalonde)
+jstable::svyCreateTableOneJS(vars = names(m.out$X), strata = "treat", data = design.lalonde, smd = T)$table %>% kable
+
+ ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
level01ptestSMDsig
n616.00553.63NA
age27.10 ± 10.8025.57 ± 6.530.0880.172
educ10.29 ± 2.7410.61 ± 2.050.3030.132
race (%)black245.1 (39.8)247.9 (44.8)0.6950.112
hispan72.1 (11.7)67.4 (12.2)NA
white298.8 (48.5)238.3 (43.0)NA
married0.41 ± 0.490.31 ± 0.470.2280.197
nodegree0.62 ± 0.480.57 ± 0.500.4580.112
re744552.74 ± 6337.092932.18 ± 5709.420.0400.269**
re752172.04 ± 3160.141658.07 ± 3072.890.1360.165
+
+
+
+
+

3그룹 A vs B vs C

+

Matching: N수 제일 작은 그룹(A) 에 맞춰 여러번 매칭

+ +

IPTW: twang::mnps

+

日本ブログレビュー

@@ -1119,57 +1791,57 @@

-
## Gaussian
-glm_gaussian <- glm(mpg~cyl + disp, data = mtcars)
-glmshow.display(glm_gaussian, decimal = 2)
-
$first.line
-[1] "Linear regression predicting mpg\n"
-
-$table
-     crude coeff.(95%CI)   crude P value adj. coeff.(95%CI)    adj. P value
-cyl  "-2.88 (-3.51,-2.24)" "< 0.001"     "-1.59 (-2.98,-0.19)" "0.034"
-disp "-0.04 (-0.05,-0.03)" "< 0.001"     "-0.02 (-0.04,0)"     "0.054"
-
-$last.lines
-[1] "No. of observations = 32\nR-squared = 0.7596\nAIC value = 167.1456\n\n"
+
## Gaussian
+glm_gaussian <- glm(mpg~cyl + disp, data = mtcars)
+glmshow.display(glm_gaussian, decimal = 2)
+
$first.line
+[1] "Linear regression predicting mpg\n"
+
+$table
+     crude coeff.(95%CI)   crude P value adj. coeff.(95%CI)    adj. P value
+cyl  "-2.88 (-3.51,-2.24)" "< 0.001"     "-1.59 (-2.98,-0.19)" "0.034"
+disp "-0.04 (-0.05,-0.03)" "< 0.001"     "-0.02 (-0.04,0)"     "0.054"
+
+$last.lines
+[1] "No. of observations = 32\nR-squared = 0.7596\nAIC value = 167.1456\n\n"
-
## Binomial
-glm_binomial <- glm(vs~cyl + disp, data = mtcars, family = binomial)
-glmshow.display(glm_binomial, decimal = 2)
-
$first.line
-[1] "Logistic regression predicting vs\n"
- 
-$table
-     crude OR.(95%CI)   crude P value adj. OR.(95%CI)    adj. P value
-cyl  "0.2 (0.08,0.56)"  "0.002"       "0.15 (0.02,1.02)" "0.053"     
-disp "0.98 (0.97,0.99)" "0.002"       "1 (0.98,1.03)"    "0.715"     
-
-$last.lines
-[1] "No. of observations = 32\nAIC value = 23.8304\n\n"
+
## Binomial
+glm_binomial <- glm(vs~cyl + disp, data = mtcars, family = binomial)
+glmshow.display(glm_binomial, decimal = 2)
+
$first.line
+[1] "Logistic regression predicting vs\n"
+ 
+$table
+     crude OR.(95%CI)   crude P value adj. OR.(95%CI)    adj. P value
+cyl  "0.2 (0.08,0.56)"  "0.002"       "0.15 (0.02,1.02)" "0.053"     
+disp "0.98 (0.97,0.99)" "0.002"       "1 (0.98,1.03)"    "0.715"     
+
+$last.lines
+[1] "No. of observations = 32\nAIC value = 23.8304\n\n"

Subgroup analysis

-
TableSubgroupMultiGLM(status ~ sex, var_subgroups = c("kk", "kk1"), data = lung, family = "binomial")
-
    Variable Count Percent           OR Lower Upper P value P for interaction
-sex2  Overall   228     100         3.01  1.65  5.47  <0.001              <NA>
-1          kk  <NA>    <NA>         <NA>  <NA>  <NA>    <NA>             0.476
-2           0    38    16.9            7   0.7 70.03   0.098              <NA>
-3           1   187    83.1         2.94  1.55  5.57   0.001              <NA>
-4         kk1  <NA>    <NA>         <NA>  <NA>  <NA>    <NA>             0.984
-5           0     8     3.6 314366015.19     0   Inf   0.997              <NA>
-6           1   217    96.4         2.85  1.55  5.25   0.001              <NA>
-
TableSubgroupMultiCox(Surv(time, status) ~ sex, var_subgroups = c("kk", "kk1"), data = lung)
-
    Variable Count Percent Point Estimate Lower Upper sex=1 sex=2 P value P for interaction
-sex  Overall   228     100           1.91  1.14   3.2   100   100   0.014              <NA>
-1       <NA>  <NA>    <NA>           <NA>  <NA>  <NA>  <NA>  <NA>    <NA>              <NA>
-2         kk  <NA>    <NA>           <NA>  <NA>  <NA>  <NA>  <NA>    <NA>             0.525
-3          0    38    16.9           2.88  0.31 26.49    10   100    0.35              <NA>
-4          1   187    83.1           1.84  1.08  3.14   100   100   0.026              <NA>
-5       <NA>  <NA>    <NA>           <NA>  <NA>  <NA>  <NA>  <NA>    <NA>              <NA>
-6        kk1  <NA>    <NA>           <NA>  <NA>  <NA>  <NA>  <NA>    <NA>             0.997
-7          0     8     3.6           <NA>  <NA>  <NA>     0   100    <NA>              <NA>
-8          1   217    96.4           1.88  1.12  3.17   100   100   0.018              <NA>
+
TableSubgroupMultiGLM(status ~ sex, var_subgroups = c("kk", "kk1"), data = lung, family = "binomial")
+
    Variable Count Percent           OR Lower Upper P value P for interaction
+sex2  Overall   228     100         3.01  1.65  5.47  <0.001              <NA>
+1          kk  <NA>    <NA>         <NA>  <NA>  <NA>    <NA>             0.476
+2           0    38    16.9            7   0.7 70.03   0.098              <NA>
+3           1   187    83.1         2.94  1.55  5.57   0.001              <NA>
+4         kk1  <NA>    <NA>         <NA>  <NA>  <NA>    <NA>             0.984
+5           0     8     3.6 314366015.19     0   Inf   0.997              <NA>
+6           1   217    96.4         2.85  1.55  5.25   0.001              <NA>
+
TableSubgroupMultiCox(Surv(time, status) ~ sex, var_subgroups = c("kk", "kk1"), data = lung)
+
    Variable Count Percent Point Estimate Lower Upper sex=1 sex=2 P value P for interaction
+sex  Overall   228     100           1.91  1.14   3.2   100   100   0.014              <NA>
+1       <NA>  <NA>    <NA>           <NA>  <NA>  <NA>  <NA>  <NA>    <NA>              <NA>
+2         kk  <NA>    <NA>           <NA>  <NA>  <NA>  <NA>  <NA>    <NA>             0.525
+3          0    38    16.9           2.88  0.31 26.49    10   100    0.35              <NA>
+4          1   187    83.1           1.84  1.08  3.14   100   100   0.026              <NA>
+5       <NA>  <NA>    <NA>           <NA>  <NA>  <NA>  <NA>  <NA>    <NA>              <NA>
+6        kk1  <NA>    <NA>           <NA>  <NA>  <NA>  <NA>  <NA>    <NA>             0.997
+7          0     8     3.6           <NA>  <NA>  <NA>     0   100    <NA>              <NA>
+8          1   217    96.4           1.88  1.12  3.17   100   100   0.018              <NA>

中国での動画やブログレビュー

diff --git a/docs/KSIC2025/index.qmd b/docs/KSIC2025/index.qmd index 3e9696c..6ad3893 100644 --- a/docs/KSIC2025/index.qmd +++ b/docs/KSIC2025/index.qmd @@ -329,7 +329,123 @@ s1 <- svykm(Surv(time, status > 0) ~ sex, design = dpbc) jskm::svyjskm(s1, pval = T, table = T, design = dpbc) ``` -log-rank test(X), survey rank test(O) +log-rank test(X), survey rank test(O, `survey::svyranktest`) + +- 또는 `IPWsurvival::adjusted.LR` 이용(SAS 디폴트) + + +## `MatchIt` + +2그룹 PS/IPTW에서 가장 많이 쓰는 패키지 + +```{r, echo=T} +library(MatchIt) +data("lalonde", package = "MatchIt") + +table(lalonde$treat) + +#1:1 NN matching w/ replacement on a logistic regression PS +m.out <- matchit(treat ~ age + educ + race + married + + nodegree + re74 + re75, data = lalonde, distance = "glm", method = "nearest", ratio = 1, caliper = NULL) +m.out +``` +- `distance`: glm(로지스틱) 이 기본, 다른 ML모델도 가능 +- `method`: nearest 충분, ratio: 1:N 매칭 +- `caliper`: 매칭이 잘 안될때(ex: Treat/Control 숫자가 비슷할 때) + +## Matching data + +```{r, echo=T} +mdata <- match.data(m.out) +rmarkdown::paged_table(mdata[order(mdata$subclass), c("treat", "distance", "subclass")]) +``` + +## Matching 결과 확인 + +```{r, echo=T} +summary(m.out) +``` + +## `jstable::CreateTableOneJS` + + +```{r, echo=T} +jstable::CreateTableOneJS(vars = names(m.out$X), strata = "treat", data = lalonde, smd = T)$table %>% kable +``` + + +## Mathing data + +```{r, echo=T} +jstable::CreateTableOneJS(vars = names(m.out$X), strata = "treat", data = mdata, smd = T)$table %>% kable +``` + +## Caliper + +```{r, echo=T} +m.out.caliper <- matchit(treat ~ age + educ + race + married + + nodegree + re74 + re75, data = lalonde, method = "nearest", ratio = 1, caliper = 0.1) +jstable::CreateTableOneJS(vars = names(m.out$X), strata = "treat", data = match.data(m.out.caliper), smd = T)$table %>% kable +``` + +## IPTW: `svyCreateTableOneJS` + +```{r, echo=T} +library(survey) +lalonde$wt <- ifelse(lalonde$treat == 1, 1/m.out$distance, 1/(1 - m.out$distance)) +design.lalonde <- svydesign(ids=~1, strata=NULL, weights=~wt, data = lalonde) +jstable::svyCreateTableOneJS(vars = names(m.out$X), strata = "treat", data = design.lalonde, smd = T)$table %>% kable +``` + +## openstat.ai + + + + +## 3그룹 A vs B vs C + +Matching: N수 제일 작은 그룹(A) 에 맞춰 여러번 매칭 + +- `MatchIt` 그대로 쓸 수 있는건 장점 +- $ATT$도 아닌 $ATT_{A}$? + + +IPTW: `twang::mnps` + +- 시간이 오래걸림: 공단데이터에서 24시간 걸릴수도 +- $ATE$ 가능, 더 추천 + + +## Issue 1: Matching 후 pair정보 이용 + + +## Issue 2: Sex를 그룹변수로 Matching 가능? + + +## Executive summary + +**Baseline 맞춘다(X), RCT를 모방한다(O)** + +- ATE(average treatment effect) vs ATT(average treatment effect on treated) + +- Matching은 ATT, IPTW는 ATE + +**2그룹 `MatchIt`, 3그룹 `twang` 패키지** + +- Logistic regression, Nearest neighbor, caliper 이해 + +- 3그룹 matching은 가장 작은 N수에 맞춰 2번 수행 + +- openstat.ai 에서 2그룹 matching/IPTW 지원 + + +**분석 이슈** + +- Matching 후 pair정보 이용해야 하는가?(ex: stratified cox) + +- 성별에 따라 매칭/IPTW 해도 되는가? + +