forked from SteffenMoritz/Synthetic_Data_Challenge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path6. GAN_SAT_Copula.Rmd
62 lines (50 loc) · 1.27 KB
/
6. GAN_SAT_Copula.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
---
title: "GAN_SAT"
author: "Steffen Moritz"
date: "1/27/2022"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r dataset, include=FALSE}
# Load Source Dataset
load(here::here("satgpa.rda"))
sat <- satgpa
```
```{r reticulate, include=FALSE}
# Use reticulate, set python up - so ctgan can be later used
library("reticulate")
#install_python()
#py_available(initialize = TRUE)
use_python("/usr/local/bin/python3")
py_version()
#virtualenv_create("synthchallenge")
#virtualenv_install("synthchallenge", "sdnist")
#virtualenv_install("synthchallenge", "CTGAN")
use_virtualenv("synthchallenge")
#py_module_available("ctgan")
```
```{python pycode}
# import ctgan
from sdv.tabular import CopulaGAN
# define which columns shall be discrete
discrete_columns = [
'sex',
'hs_gpa'
]
# Define Algorithm
copgan = CopulaGAN()
# Fit - r.sat is the syntax to use variables from r scope
copgan.fit(r.sat)
# Create samples from model
res = copgan.sample(1000)
#print(samples)
# save model (could be used to create different samples later)
#ctgan.save('gan_model.pkl')
```
```{r}
# Save result file p$res is the syntax used to access variables in py from r
result_copulagan <- py$res
save(result_copulagan, file = "results/sm_sat_gan_copulagan.rda")
```