diff --git a/.gitignore b/.gitignore
index f86955f..e7d39db 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,6 @@
-packrat/lib*/
.Rproj.user
.Rhistory
report.html
slides.html
slides.md
slides.Rpres
-
diff --git a/.travis.yml b/.travis.yml
index d3eb51b..b05b7bd 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,34 +1,32 @@
# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
language: r
-sudo: required
-cache:
- directories:
- - "$TRAVIS_BUILD_DIR/packrat/src"
- - "$TRAVIS_BUILD_DIR/packrat/lib"
- packages: true
+r:
+ - release
-git:
- depth: 3
+sudo: required
env:
global:
- secure: GITHUB_PAT
-r_packages:
- - matchingR
- - readr
- - dplyr
- - tibble
- - DT
+cache:
+ directories:
+ - $HOME/.local/share/renv
+ - $TRAVIS_BUILD_DIR/renv/library
+ packages: true
install:
-- R -e "0" --args --bootstrap-packrat
-
-before_script:
- - chmod +x ./_build.sh
- - chmod +x ./_deploy.sh
-
+ - Rscript -e "if (!requireNamespace('renv', quietly = TRUE)) install.packages('renv')"
+ - Rscript -e "renv::restore()"
+
script:
- - ./_build.sh
- - ./_deploy.sh
\ No newline at end of file
+ - nohup R --slave --no-restore -e 'shiny::runApp(port = 3000)' &
+
+addons:
+ apt:
+ packages:
+ - libudunits2-dev
+ - libgdal-dev
+ - libgeos-dev
+ - libproj-dev
\ No newline at end of file
diff --git a/README.md b/README.md
index 88c199a..64607ed 100644
--- a/README.md
+++ b/README.md
@@ -1,42 +1,40 @@
# Preference Allocation
[![Travis-CI Build Status](https://travis-ci.org/avisionh/Preference-Allocation.svg?branch=master)](https://travis-ci.org/avisionh/Preference-Allocation)
-### Collaborators
+# Overview
+preferenceallocation explores methods for solving *preference allocation*/*one-sided matching* problems.
-- [Avision Ho](https://github.com/avisionh)
-- [Le Duong](https://github.com/ledu1993)
+Consider that we have to assign *x* people to *y* sessions. For each of these *y* sessions,
+and individual person will have a preference ordering, meaning that they strictly prefer some sessions over others.
+The task is to allocate these *x* people to their *y* sessions, accounting for their preferences
+in such a way that the total utility of all *x* people is maximised.
-# Update
-- A Shiny app is being built for this problem.
-- The project management and code development of this stage will be captured on [Azure DevOps](https://azure.microsoft.com/en-gb/services/devops/).
- - Compared to GitHub, this has superior project management capabilities.
-- Link to this can be found on the [public project page, Preference Allocation](https://avisionh.visualstudio.com/Preference%20Allocation).
+## Methodology
+We will tackle this problem in two ways:
+1. **Gale-Shapley Algorithm |** Implementation of Alvin Roth and Lloyd Shapley's algorithm that assigns delegates to sessions in random order by accounting for both their preferences and ensuring that no two matching pairs will mutually want to switch their matches.
+1. **Iterative Preference |** Implementation of a method suggested by a work experience student, Fatma Hussain, this takes chooses delegates and assigns them their n-th most preferred session provided the session is available.
-***
+## Usage
+To see how to use the bespoke *iterative preference* method proposed in this repo, access and run the `src/iterative_preference.R` script.
-# Background
-The problem we will tackle in this repository is of *preference allocation*/*one-sided matching*.
+To see how to use [matchingR](https://github.com/jtilly/matchingR)'s implementation of Gale-Shapley algorithm, access and run the `src/galeshapley.R` script.
-## Task
-Consider that we have to assign *x* people to *y* sessions. For each of these *y* sessions,
-and individual person will have a preference ordering, meaning that they strictly prefer some sessions over others.
+> Note: Your data needs to be in tidy data format for *iterative preference* whereas for the Gale-Shapley algorithm, it does not.
-The task is to allocate these *x* people to their *y* sessions, accounting for their preferences
-in such a way that the total utility of all *x* people is maximised.
+WIP Shiny app is being developed so you can enter your data and apply the iterative preference method on it to get matchings.
+- https://avisionh.shinyapps.io/preference20allocation/
+
+Documentation of how each method works is available in these slides:
+- https://avisionh.github.io/preferenceallocation/
-## Aim/Motivation
-- [x] Write an algorithm that automates the matching/mapping of one set to another set given pre-defined constraints.
-- [ ] Write effective functions, include error-trapping and -handling.
-- [ ] Build a Shiny app that makes this algorithm accessible to non-programmers.
-- [ ] Host the Shiny app on a public domain, [shinyapp.io](https://www.shinyapps.io/).
-- [ ] Demonstrate a consistent R coding and Git workflow usage.
-- [ ] Implement a CI/CD pipeline to robustly and continuously test whether the code works on different operating systems (OS) using [travis-ci](https://travis-ci.org/) and [Azure DevOps](https://azure.microsoft.com/en-gb/services/devops/).
-- [ ] Adopt an Agile project management approach using [Azure DevOps](https://azure.microsoft.com/en-gb/services/devops/) to capture and efficiently manage feature requests.
-- [ ] Conduct user-research to continuously improve the algorithm and Shiny app.
+## Getting help
+If you encounter a clear bug, please fill a minimal reproducible example on [Issues](https://github.com/avisionh/preferenceallocation/issues). For questions and other discussion, please use the [Discussion](https://github.com/avisionh/preferenceallocation/discussions) channel.
+
+***
# Case Study
-This algorithm was used in the [GSS Conference 2018](https://gss.civilservice.gov.uk/events/gss-conference-2018/) to allocate a set of 400 conference delegates to a series of talks that were taking place at the same time.
+This algorithm was used in the 2018 and 2019 versions of the [GSS Conference](https://gss.civilservice.gov.uk/) to allocate a set ofvconference delegates to a series of talks that were taking place at the same time.
These talks were delivered by internal government and external private sector companies.
@@ -44,11 +42,8 @@ In total, there were four sessions of five simultaneous talks. As such, this alg
**Note:** The rooms in which the speakers delivered their presentations were not pre-allocated. Instead, the decision to place more popular talks (based on people's preferences) in larger rooms was based on plotting the distribution of preferences for the five simultaneous talks for all delegates.
-# Methodology
-We will tackle this problem in two ways:
-1. **Gale-Shapley Algorithm |** Implementation of Alvin Roth and Lloyd Shapley's algorithm that assigns delegates to sessions in random order by accounting for both their preferences and ensuring that no two matching pairs will mutually want to switch their matches.
-1. **Iterative Preference |** Implementation of a method suggested by a work experience student, Fatma Hussain, this takes chooses delegates and assigns them their n-th most preferred session provided the session is available.
-***
+## EARL 2019
+This project was presented at [Enterprise Application of the R Language (EARL) Conference](https://www.mango-solutions.com/earl-speaker-highlights-from-the-mango-team/) in 2019.
## References
- [The Stable Marriage Problem and School Choice](http://www.ams.org/publicoutreach/feature-column/fc-2015-03)
diff --git a/_build.sh b/_build.sh
deleted file mode 100644
index 10b8674..0000000
--- a/_build.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/env Rscript
-
-rmarkdown::render("index.Rmd")
\ No newline at end of file
diff --git a/_deploy.sh b/_deploy.sh
deleted file mode 100644
index ecdbff8..0000000
--- a/_deploy.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-
-# Configure your name and email if you have not done so
-git config --global user.email ${EMAIL}
-git config --global user.name ${USERNAME}
-
-# Ensure that the book will only be updated when the build is
-# triggered from the master branch.
-[ "${TRAVIS_BRANCH}" != "master" ] && exit 0
-
-[ "${TRAVIS_PULL_REQUEST}" != "false" ] && exit 0
-
-# Clone the repository to the book-output directory
-git clone -b gh-pages \
- https://${GITHUB_PAT}@github.com/${TRAVIS_REPO_SLUG}.git \
- book-output
-
-# Copy locally built *.html files into
-cp -r index.html book-output/
-
-# Create .nojekyll file to prevent git from trying to build
-# html pages with jekyll.
-touch book-output/.nojekyll
-
-# Add the locally built files to a commit and push
-cd book-output
-git add . -f
-git commit -m "chore: automatic build update" || true
-git push origin gh-pages
diff --git a/docs/index.html b/docs/index.html
new file mode 100644
index 0000000..da801f2
--- /dev/null
+++ b/docs/index.html
@@ -0,0 +1,5989 @@
+
+
+
+
For instance, `Person 1` and `Person 4` are allocated to `r paste0("Session ", results_galeshapley$matched.students[1,])` and `r paste0("Session ", results_galeshapley$matched.students[4,])` respectively.
@@ -169,40 +189,53 @@ rownames(results_galeshapley$matched.students) <- names
colnames(results_galeshapley$matched.students) <- "Session Allocated"
# show allocation
-results_galeshapley$matched.students %>%
- t()
+t(results_galeshapley$matched.students)
```
In the second output is the table of delegates' initial preferences to compare our allocations against.
```{r Gale-Shapley - delegate preferences}
-# show deleagate preferences
-utility_delegates %>%
- datatable()
+# show delegate preferences
+datatable(data = utility_delegates)
```
## Analysis Approach 2 - Iterative Preferences {.smaller}
In the code below, we will run Fatma's suggested algorithm, **Iterative Preferences** against the initial preferences of delegates to check that our matchings are desirable.
-``` {r Iterative Preference - implementation, echo = TRUE, eval = FALSE}
-results_iterativepreference <- func_iterative_preferences(
- x = utility_delegates,
- limits = c(2, 1, 1, 2),
- with_replacement = FALSE
+Before we do so, we need to reshape our data into tidy data format.
+```{r Iterative Preference - wrangle, include=FALSE}
+utility_delegates <- utility_delegates %>%
+ rownames_to_column(var = "Session") %>%
+ pivot_longer(cols = -"Session",
+ names_to = "Person",
+ values_to = "Preference Score") %>%
+ pivot_wider(id_cols = "Person",
+ names_from = "Session",
+ values_from = "Preference Score")
+room_sizes <- data.frame(Room = c("Room_01","Room_02","Room_03","Room_04"),
+ Size = c(2, 1 , 1, 2))
+
+```
+
+
+``` {r Iterative Preference - implementation, echo = TRUE}
+results_iterativepreference <- func_iterative_preferences(x = utility_delegates,
+ limits = room_sizes,
+ with_replacement = FALSE
)
```
-```{r (HIDDEN) Dataframe transformation, echo = FALSE, eval = TRUE}
-# convert to viewable format for slides
-results_iterativepreference[[1]] <- results_iterativepreference[[1]] %>%
- magrittr::set_colnames(paste0("Person ", results_iterativepreference[[2]]))
-rownames(results_iterativepreference[[1]]) <- c("Delegate", "Session Allocated")
+```{r Iterative Preference - helper, include = FALSE}
+matchings <- results_iterativepreference[[1]]
+sampling <- results_iterativepreference[[2]]
```
-From running the algorithm we have the following allocation of delegates to sessions. `r results_iterativepreference[[1]][2, ]`.
+
+
+From running the algorithm we have the following allocation of delegates to sessions. `r matchings$SessionPreferredColumnId`.
-For instance, `r results_iterativepreference[[1]] %>% select(1) %>% colnames()` and `r results_iterativepreference[[1]] %>% select(4) %>% colnames()` are allocated to `r paste0("Session ", results_iterativepreference[[1]][2, 1])` and `r paste0("Session ", results_iterativepreference[[1]][2, 4])` respectively.
+For instance, `r pull(matchings[1,1])` and `r pull(matchings[4,1])` are allocated to `r paste0("Session ", pull(matchings[1,2]))` and `r paste0("Session ", pull(matchings[4,2]))` respectively.
## Algorithm's Output {.smaller}
@@ -210,23 +243,32 @@ In the first output is the matched allocations from using the **Iterative Prefer
```{r Iterative Preference - allocation}
# show allocation
-results_iterativepreference[[1]][2, ]
+matchings <- matchings %>%
+ rename("Session Allocated" = "SessionPreferredColumnId") %>%
+ mutate(PersonRowId = paste0('Person ', PersonRowId)) %>%
+ t() %>%
+ as_tibble()
+colnames(matchings) <- matchings[1, ]
+matchings <- matchings[-1, ]
+datatable(data = matchings,
+ options = list(dom = 't'),
+ rownames = FALSE)
```
In the second output is the random order of delegates that our algorithm used.
```{r Iterative Preference - delegate sampling}
# show delegate sampling
-paste0("Person ", results_iterativepreference[[2]])
-
+paste0("Person ", sampling)
```
In the third output is the table of delegates' initial preferences to compare our allocations against.
```{r Iterative Preference - delegate preferences}
# show delegate preferences
-utility_delegates %>%
- datatable()
+datatable(data = utility_delegates,
+ options = list(dom = 't'),
+ rownames = FALSE)
```
# Hope you enjoyed the talk! :)
diff --git a/docs/report.Rmd b/docs/report.Rmd
index 1714773..1d71785 100644
--- a/docs/report.Rmd
+++ b/docs/report.Rmd
@@ -32,10 +32,30 @@ set.seed(1)
options(scipen = 999)
# Load data
-utility_delegates <- read_csv(file = "data/dummy_student_preferences.csv")
+utility_delegates <- read_csv(file = "../data/dummy_student_preferences.csv")
-# Import source code
-source("scripts/main.R")
+# Load functions
+source("../src/functions.R")
+```
+
+```{r data-prep, include=FALSE}
+utility_delegates <- utility_delegates %>%
+ remove_rownames() %>%
+ column_to_rownames(var = "X1")
+
+# Set colleges to have no preferences
+n_delegates <- ncol(utility_delegates)
+m_sessions <- nrow(utility_delegates)
+
+utility_sessions <- matrix(data = rep(x = 0, times = n_delegates*m_sessions),
+ nrow = n_delegates,
+ ncol = m_sessions)
+utility_sessions <- utility_sessions %>%
+ as_tibble() %>%
+ rename(`College 1` = V1,
+ `College 2` = V2,
+ `College 3` = V3,
+ `College 4` = V4)
```
# Introduction
@@ -135,8 +155,7 @@ source("scripts/main.R")
nce, we can see that for *Person 1*, they strictly prefer *Session 2* over *Session 1*, $u_{d_1}(2) > u_{d_1}(1)$.
```{r Data: Delegate Peferences}
-utility_delegates %>%
- datatable()
+datatable(data = utility_delegates)
```
(@) We will also generate the preferences for sessions to delegates in the code below. Here, we have sessions as our columns and delegates as our rows. This table is the transpose of the delegate's one above.
@@ -166,13 +185,15 @@ utility_sessions %>%
# Section 4: Analysis
(@) In the code below, we will run the college admissions variant of the **Gale-Shapley algorithm**.
-``` {r Gale-Shapley Analysis, eval = FALSE}
-galeShapley.collegeAdmissions(studentUtils = utility_delegates,
- collegeUtils = utility_sessions,
- slots = c(2, 1, 1, 2))
+``` {r Gale-Shapley Analysis, echo = TRUE}
+results_galeshapley <- galeShapley.collegeAdmissions(
+ studentUtils = utility_delegates,
+ collegeUtils = utility_sessions,
+ slots = c(2, 1, 1, 2)
+)
```
-(@) From running the algorithm we have the following allocation of delegates to sessions. `r results$matched.students %>% t()`. For instance, `Person 1` and `Person 4` are allocated to `r paste0("Session ", results$matched.students[1,])` and `r paste0("Session ", results$matched.students[4,])` respectively.
+(@) From running the algorithm we have the following allocation of delegates to sessions. `r t(results_galeshapley$matched.students)`. For instance, `Person 1` and `Person 4` are allocated to `r paste0("Session ", results_galeshapley$matched.students[1,])` and `r paste0("Session ", results_galeshapley$matched.students[4,])` respectively.
***
@@ -180,14 +201,13 @@ galeShapley.collegeAdmissions(studentUtils = utility_delegates,
(@) In the code below, we will present the matched allocations generated by the college admissions variant of the **Gale-Shapley algorithm** we used against the initial preferences of delegates to check that our matchings are desirable,
```{r Compare Outputs with Preferences}
-rownames(results$matched.students) <- names
-colnames(results$matched.students) <- "Session Allocated"
-results$matched.students %>%
+rownames(results_galeshapley$matched.students) <- names
+colnames(results_galeshapley$matched.students) <- "Session Allocated"
+results_galeshapley$matched.students %>%
t() %>%
pander()
-utility_delegates %>%
- datatable()
+datatable(data = utility_delegates)
```
diff --git a/inst/global.R b/inst/global.R
index 062ba8e..bb4226f 100644
--- a/inst/global.R
+++ b/inst/global.R
@@ -26,7 +26,7 @@ library(stringr)
library(ggplot2)
# load external functions
-source("scripts/functions.R")
+source("../src/functions.R")
# ----------------------------------------------------------------------- #
diff --git a/www/custom.css b/inst/www/custom.css
similarity index 100%
rename from www/custom.css
rename to inst/www/custom.css
diff --git a/src/functions.R b/src/functions.R
index ab299a8..4ccc7d7 100644
--- a/src/functions.R
+++ b/src/functions.R
@@ -52,7 +52,8 @@ func_sample <- function(x, n, replacement) {
# ARGUMENTS:
# 1. 'x' | (tibble/dataframe) Data to feed in
# 2. 'limits' | (tibble/dataframe) Maximum capacity of each session
-func_iterative_preferences <- function(x, limits, with_replacement) {
+# TODO: Pass in columns names instead of relying on indexing which is brittle
+func_iterative_preferences <- function(x, limits, with_replacement = FALSE) {
# get number of people
n_people <- nrow(x)
@@ -61,10 +62,10 @@ func_iterative_preferences <- function(x, limits, with_replacement) {
# create dummy tibble for storing output
matchings <- tibble(PersonRowId = rep(x = -1, times = n_people),
- SessionPreferredColumnId = rep(x = "dummy", times = n_people))
+ SessionPreferredColumnId = rep(x = -1, times = n_people))
# convert limits from vector to tibble
- limits <- limits[,2] %>% as.tibble()
+ limits <- limits[,2] %>% as_tibble()
# generate vector of people and random sample from it
people <- seq(from = 1, to = n_people, by = 1)
@@ -90,7 +91,8 @@ func_iterative_preferences <- function(x, limits, with_replacement) {
if(limits[preferred_session,] > 0) {
# assign person number to session number
- matchings[i, ] <- c(rownames(x)[sample_people[i]], preferred_session)
+ person_id = as.double(rownames(x)[sample_people[i]])
+ matchings[i, ] <- list(person_id, preferred_session)
# remove a place from session that's been allocated
limits[preferred_session, 1] <- limits[preferred_session, 1] - 1
diff --git a/src/main.R b/src/galeshapley.R
similarity index 68%
rename from src/main.R
rename to src/galeshapley.R
index 3b755d1..faf38ce 100644
--- a/src/main.R
+++ b/src/galeshapley.R
@@ -11,7 +11,7 @@ library(dplyr)
library(tibble)
# Load custom functions
-source('scripts/functions.R')
+source('src/functions.R')
# Set seed so we can replicate our results
set.seed(1)
@@ -19,18 +19,18 @@ set.seed(1)
# Load in student preferences dummy data
utility_delegates <- read_csv(file = "data/dummy_student_preferences.csv")
utility_delegates <- utility_delegates %>%
- remove_rownames %>%
+ remove_rownames() %>%
column_to_rownames(var = "X1")
# Set colleges to have no preferences
n_delegates <- ncol(utility_delegates)
m_sessions <- nrow(utility_delegates)
-utility_sessions <- matrix(data = rep(x = 0, times = n_delegates*m_sessions),
- nrow = n_delegates,
- ncol = m_sessions)
+utility_sessions <- matrix(data = rep(x = 0, times = n_delegates*m_sessions),
+ nrow = n_delegates,
+ ncol = m_sessions)
utility_sessions <- utility_sessions %>%
- as.tibble() %>%
+ as_tibble() %>%
rename(`College 1` = V1,
`College 2` = V2,
`College 3` = V3,
@@ -45,9 +45,6 @@ results_galeshapley <- galeShapley.collegeAdmissions(
collegeUtils = utility_sessions,
slots = c(2, 1, 1, 2)
)
-# Approach 2 - Iterative Preferences
-results_iterativepreference <- func_iterative_preferences(x = utility_delegates, limits = c(2, 1, 1, 2), with_replacement = FALSE)
- # convert matchings to dataframe
-results_iterativepreference[[1]] <- results_iterativepreference[[1]] %>% as.data.frame()
+
diff --git a/src/test/benchmark_large_data.R b/src/iterative_preference.R
similarity index 97%
rename from src/test/benchmark_large_data.R
rename to src/iterative_preference.R
index b56346b..8e7efa6 100644
--- a/src/test/benchmark_large_data.R
+++ b/src/iterative_preference.R
@@ -13,7 +13,7 @@ library(dplyr)
library(tibble)
# Load custom functions
-source('scripts/functions.R')
+source('src/functions.R')
# Set seed so we can replicate our results
set.seed(1)
@@ -56,7 +56,7 @@ utility_delegates <- utility_delegates %>%
room_sizes <- data.frame(Room = c("Room_01","Room_02","Room_03","Room_04"),
Size = c(0.2 * n_delegates, 0.3 * n_delegates ,0.1 * n_delegates, 0.4 * n_delegates))
-# Run interative preferences timed
+# Run iterative preferences timed
start_time <- Sys.time()
results_iterativepreference <- func_iterative_preferences(x = utility_delegates,
limits = room_sizes,