From 6f18dc20df01f6c4b14d9e8cec26eea2e3ea42af Mon Sep 17 00:00:00 2001
From: Yennie Jun <yennie.jun@gmail.com>
Date: Tue, 15 Dec 2020 16:52:34 +0900
Subject: [PATCH] Adding example for cleaning and exploring survey data

---
 data-plots/survey-data.ipynb | 1287 ++++++++++++++++++++++++++++++++++
 1 file changed, 1287 insertions(+)
 create mode 100644 data-plots/survey-data.ipynb
diff --git a/data-plots/survey-data.ipynb b/data-plots/survey-data.ipynb
new file mode 100644
index 0000000..652fb5c
--- /dev/null
+++ b/data-plots/survey-data.ipynb
@@ -0,0 +1,1287 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import datetime\n",
+    "import matplotlib.pyplot as plt\n",
+    "import re\n",
+    "import psycopg2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Fetching data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Connect to database\n",
+    "conn = psycopg2.connect(\n",
+    "    host='covid19db.org',\n",
+    "    port=5432,\n",
+    "    dbname='covid19',\n",
+    "    user='covid19',\n",
+    "    password='covid19')\n",
+    "cur = conn.cursor()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Fetch data using SQL Query\n",
+    "sql_command = \"\"\"SELECT * FROM world_bank\"\"\"\n",
+    "df_wb = pd.read_sql(sql_command, conn)\n",
+    "\n",
+    "sql_command = \"\"\"SELECT * FROM surveys\"\"\"\n",
+    "df_surveys = pd.read_sql(sql_command, conn)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## World Bank Table"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>value</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>country</th>\n",
+       "      <th>indicator_name</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"5\" valign=\"top\">Afghanistan</th>\n",
+       "      <th>ARI treatment (% of children under 5 taken to a health provider)</th>\n",
+       "      <td>61.500000</td>\n",
+       "      <td>2015</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Access to clean fuels and technologies for cooking (% of population)</th>\n",
+       "      <td>32.440000</td>\n",
+       "      <td>2016</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Access to electricity (% of population)</th>\n",
+       "      <td>98.713203</td>\n",
+       "      <td>2018</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Access to electricity, rural (% of rural population)</th>\n",
+       "      <td>98.272872</td>\n",
+       "      <td>2018</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Access to electricity, urban (% of urban population)</th>\n",
+       "      <td>100.000000</td>\n",
+       "      <td>2018</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"5\" valign=\"top\">Zimbabwe</th>\n",
+       "      <th>Women who believe a husband is justified in beating his wife when she neglects the children (%)</th>\n",
+       "      <td>21.400000</td>\n",
+       "      <td>2015</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Women who believe a husband is justified in beating his wife when she refuses sex with him (%)</th>\n",
+       "      <td>14.500000</td>\n",
+       "      <td>2015</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Women who were first married by age 15 (% of women ages 20-24)</th>\n",
+       "      <td>3.700000</td>\n",
+       "      <td>2015</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Women who were first married by age 18 (% of women ages 20-24)</th>\n",
+       "      <td>32.400000</td>\n",
+       "      <td>2015</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Women's share of population ages 15+ living with HIV (%)</th>\n",
+       "      <td>59.800000</td>\n",
+       "      <td>2018</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>307665 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                                     value  \\\n",
+       "country     indicator_name                                                   \n",
+       "Afghanistan ARI treatment (% of children under 5 taken to a...   61.500000   \n",
+       "            Access to clean fuels and technologies for cook...   32.440000   \n",
+       "            Access to electricity (% of population)              98.713203   \n",
+       "            Access to electricity, rural (% of rural popula...   98.272872   \n",
+       "            Access to electricity, urban (% of urban popula...  100.000000   \n",
+       "...                                                                    ...   \n",
+       "Zimbabwe    Women who believe a husband is justified in bea...   21.400000   \n",
+       "            Women who believe a husband is justified in bea...   14.500000   \n",
+       "            Women who were first married by age 15 (% of wo...    3.700000   \n",
+       "            Women who were first married by age 18 (% of wo...   32.400000   \n",
+       "            Women's share of population ages 15+ living wit...   59.800000   \n",
+       "\n",
+       "                                                                year  \n",
+       "country     indicator_name                                            \n",
+       "Afghanistan ARI treatment (% of children under 5 taken to a...  2015  \n",
+       "            Access to clean fuels and technologies for cook...  2016  \n",
+       "            Access to electricity (% of population)             2018  \n",
+       "            Access to electricity, rural (% of rural popula...  2018  \n",
+       "            Access to electricity, urban (% of urban popula...  2018  \n",
+       "...                                                              ...  \n",
+       "Zimbabwe    Women who believe a husband is justified in bea...  2015  \n",
+       "            Women who believe a husband is justified in bea...  2015  \n",
+       "            Women who were first married by age 15 (% of wo...  2015  \n",
+       "            Women who were first married by age 18 (% of wo...  2015  \n",
+       "            Women's share of population ages 15+ living wit...  2018  \n",
+       "\n",
+       "[307665 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# To group by country and indicator_name (survey question) and look at values\n",
+    "df_wb.groupby([\"country\", \"indicator_name\"])\\\n",
+    "    [[\"country\", \"indicator_name\", \"value\", \"year\"]].mean(\"value\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Just looking at survey results from 2019\n",
+    "df_wb_2019 = df_wb[df_wb.year==2018].dropna(subset=[\"value\"])[\n",
+    "    [\"country\", \"indicator_name\", \"value\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>country</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>indicator_name</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Mammal species, threatened</th>\n",
+       "      <td>215</td>\n",
+       "      <td>215</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Bird species, threatened</th>\n",
+       "      <td>215</td>\n",
+       "      <td>215</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Plant species (higher), threatened</th>\n",
+       "      <td>215</td>\n",
+       "      <td>215</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Fish species, threatened</th>\n",
+       "      <td>215</td>\n",
+       "      <td>215</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Adjusted savings: mineral depletion (current US$)</th>\n",
+       "      <td>214</td>\n",
+       "      <td>214</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Access to electricity (% of population)</th>\n",
+       "      <td>214</td>\n",
+       "      <td>214</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Access to electricity, urban (% of urban population)</th>\n",
+       "      <td>212</td>\n",
+       "      <td>212</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Terrestrial protected areas (% of total land area)</th>\n",
+       "      <td>211</td>\n",
+       "      <td>211</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Terrestrial and marine protected areas (% of total territorial area)</th>\n",
+       "      <td>210</td>\n",
+       "      <td>210</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Surface area (sq. km)</th>\n",
+       "      <td>209</td>\n",
+       "      <td>209</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                    country  value\n",
+       "indicator_name                                                    \n",
+       "Mammal species, threatened                              215    215\n",
+       "Bird species, threatened                                215    215\n",
+       "Plant species (higher), threatened                      215    215\n",
+       "Fish species, threatened                                215    215\n",
+       "Adjusted savings: mineral depletion (current US$)       214    214\n",
+       "Access to electricity (% of population)                 214    214\n",
+       "Access to electricity, urban (% of urban popula...      212    212\n",
+       "Terrestrial protected areas (% of total land area)      211    211\n",
+       "Terrestrial and marine protected areas (% of to...      210    210\n",
+       "Surface area (sq. km)                                   209    209"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Showing the top questions that have been asked by the most number of countries\n",
+    "\n",
+    "df_wb_2019.groupby(\"indicator_name\").count()\\\n",
+    "           .sort_values(by=\"value\", ascending=False).head(10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>country</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>indicator_name</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Adjusted savings: education expenditure (% of GNI)</th>\n",
+       "      <td>198</td>\n",
+       "      <td>198</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Compulsory education, duration (years)</th>\n",
+       "      <td>178</td>\n",
+       "      <td>178</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Adjusted savings: education expenditure (current US$)</th>\n",
+       "      <td>178</td>\n",
+       "      <td>178</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Preprimary education, duration (years)</th>\n",
+       "      <td>177</td>\n",
+       "      <td>177</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Primary education, pupils (% female)</th>\n",
+       "      <td>89</td>\n",
+       "      <td>89</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Primary education, teachers</th>\n",
+       "      <td>89</td>\n",
+       "      <td>89</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Primary education, teachers (% female)</th>\n",
+       "      <td>89</td>\n",
+       "      <td>89</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Primary education, pupils</th>\n",
+       "      <td>89</td>\n",
+       "      <td>89</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Secondary education, general pupils</th>\n",
+       "      <td>86</td>\n",
+       "      <td>86</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Secondary education, general pupils (% female)</th>\n",
+       "      <td>85</td>\n",
+       "      <td>85</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                    country  value\n",
+       "indicator_name                                                    \n",
+       "Adjusted savings: education expenditure (% of GNI)      198    198\n",
+       "Compulsory education, duration (years)                  178    178\n",
+       "Adjusted savings: education expenditure (curren...      178    178\n",
+       "Preprimary education, duration (years)                  177    177\n",
+       "Primary education, pupils (% female)                     89     89\n",
+       "Primary education, teachers                              89     89\n",
+       "Primary education, teachers (% female)                   89     89\n",
+       "Primary education, pupils                                89     89\n",
+       "Secondary education, general pupils                      86     86\n",
+       "Secondary education, general pupils (% female)           85     85"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Showing the top questions that have been asked by the most number of countries\n",
+    "# that include certain keywords \n",
+    "\n",
+    "df_wb_2019[df_wb_2019.indicator_name.str\\\n",
+    "           .contains(\"education\")]\\\n",
+    "           .groupby(\"indicator_name\").count()\\\n",
+    "           .sort_values(by=\"value\", ascending=False).head(10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Survey Table"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Currently, the survey properties are in one long JSON. We can explode each property in the JSON into its own line so that each survey question-answer pair gets its own line"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>source</th>\n",
+       "      <th>wave</th>\n",
+       "      <th>gid</th>\n",
+       "      <th>country</th>\n",
+       "      <th>countrycode</th>\n",
+       "      <th>adm_area_1</th>\n",
+       "      <th>adm_area_2</th>\n",
+       "      <th>adm_area_3</th>\n",
+       "      <th>samplesize</th>\n",
+       "      <th>properties</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>EVS</td>\n",
+       "      <td>1981-1984</td>\n",
+       "      <td>[BEL]</td>\n",
+       "      <td>Belgium</td>\n",
+       "      <td>BEL</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>1145</td>\n",
+       "      <td>{'A001': {'Label': 'Important in life: Family'...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>EVS</td>\n",
+       "      <td>1981-1984</td>\n",
+       "      <td>[CAN]</td>\n",
+       "      <td>Canada</td>\n",
+       "      <td>CAN</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>1254</td>\n",
+       "      <td>{'A001': {'Label': 'Important in life: Family'...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  source       wave    gid  country countrycode adm_area_1 adm_area_2  \\\n",
+       "0    EVS  1981-1984  [BEL]  Belgium         BEL       None       None   \n",
+       "1    EVS  1981-1984  [CAN]   Canada         CAN       None       None   \n",
+       "\n",
+       "  adm_area_3  samplesize                                         properties  \n",
+       "0       None        1145  {'A001': {'Label': 'Important in life: Family'...  \n",
+       "1       None        1254  {'A001': {'Label': 'Important in life: Family'...  "
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_surveys.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "code_folding": []
+   },
+   "outputs": [],
+   "source": [
+    "def explode_survey_props(props):\n",
+    "    q_label, label, category, frequency = [], [], [], []\n",
+    "\n",
+    "    for line in props:\n",
+    "        if \"X023\" in line: continue # this one too complicated to clean\n",
+    "        if \"X051\" in line: continue # ethnic group; this one has no categories\n",
+    "        if \"original_region_code\" in line: break #this one is not related\n",
+    "            \n",
+    "        for freq in props[line]['Frequencies']:      \n",
+    "            if re.search(\"[A-Z]\\d+_\\d+_(-*\\d+)\", freq):\n",
+    "                q_val = re.search(\"[A-Z]\\d+_\\d+_(-*\\d+)\", freq).group(1)\n",
+    "            else:\n",
+    "                q_val = re.search(\"[A-Z]\\d\\d\\d[A-Z]*_(-*\\d+)\", freq).group(1)\n",
+    "            q_label.append(line)\n",
+    "            label.append(props[line]['Label'])\n",
+    "            frequency.append(props[line]['Frequencies'][freq])        \n",
+    "\n",
+    "            if \"X002\" in freq or \"X003\" in freq: #special case of birth year/age\n",
+    "                category.append(freq)\n",
+    "            else:\n",
+    "                category.append(props[line]['Categories'][q_val])\n",
+    "\n",
+    "    return q_label, label, category, frequency"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Build up in arrays\n",
+    "waves, gids, countries, samplesizes = [],[],[],[]\n",
+    "q_labels, labels, categories, frequencies = [],[],[],[]\n",
+    "\n",
+    "# Iterate through the survey to explode each json\n",
+    "for i in range(len(df_surveys)):\n",
+    "    props = df_surveys.properties[i]\n",
+    "    q_label, label, category, frequency = explode_survey_props(props)\n",
+    "    \n",
+    "    # These are copied over from the original dataframe\n",
+    "    source = [df_surveys.source[i]] * len(q_label)\n",
+    "    wave = [df_surveys.wave[i]] * len(q_label)\n",
+    "    gid = [df_surveys.gid[i]] * len(q_label)\n",
+    "    country = [df_surveys.country[i]] * len(q_label)\n",
+    "    samplesize = [df_surveys.samplesize[i]] * len(q_label)\n",
+    "\n",
+    "    waves.extend(wave)\n",
+    "    gids.extend(gid)\n",
+    "    countries.extend(country)\n",
+    "    samplesizes.extend(samplesize)\n",
+    "    q_labels.extend(q_label)\n",
+    "    labels.extend(label)\n",
+    "    categories.extend(category)\n",
+    "    frequencies.extend(frequency)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Build the dataframe... this takes some time because the arrays are long\n",
+    "\n",
+    "cols = ['wave', 'gid', 'country', 'samplesize', \n",
+    "        'q_label', 'label', 'category', 'frequency']\n",
+    "survey_explode_df = pd.DataFrame([waves, gids, countries, samplesizes, \n",
+    "                                  q_labels, labels, categories, frequencies]).T\n",
+    "survey_explode_df.columns=cols\n",
+    "survey_explode_df.frequency = survey_explode_df.frequency.apply(float)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "833664\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>wave</th>\n",
+       "      <th>gid</th>\n",
+       "      <th>country</th>\n",
+       "      <th>samplesize</th>\n",
+       "      <th>q_label</th>\n",
+       "      <th>label</th>\n",
+       "      <th>category</th>\n",
+       "      <th>frequency</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>661109</th>\n",
+       "      <td>2010-2014</td>\n",
+       "      <td>[ESP.2_1]</td>\n",
+       "      <td>Spain</td>\n",
+       "      <td>33</td>\n",
+       "      <td>X028</td>\n",
+       "      <td>Employment status</td>\n",
+       "      <td>Self employed</td>\n",
+       "      <td>0.121212</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>272821</th>\n",
+       "      <td>2005-2009</td>\n",
+       "      <td>[MAR]</td>\n",
+       "      <td>Morocco</td>\n",
+       "      <td>1200</td>\n",
+       "      <td>E037</td>\n",
+       "      <td>Government responsibility</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0.033333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>103880</th>\n",
+       "      <td>2008-2010</td>\n",
+       "      <td>[NOR]</td>\n",
+       "      <td>Norway</td>\n",
+       "      <td>1090</td>\n",
+       "      <td>X003</td>\n",
+       "      <td>Age</td>\n",
+       "      <td>X003_23</td>\n",
+       "      <td>0.019688</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>790382</th>\n",
+       "      <td>2008-2010</td>\n",
+       "      <td>[PRT.3_1, PRT.8_1, PRT.12.4_1, PRT.14_1, PRT.1...</td>\n",
+       "      <td>Portugal</td>\n",
+       "      <td>183</td>\n",
+       "      <td>X036</td>\n",
+       "      <td>Profession/job</td>\n",
+       "      <td>\"Missing; Unkown\"</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>226109</th>\n",
+       "      <td>1999-2004</td>\n",
+       "      <td>[PRI]</td>\n",
+       "      <td>Puerto Rico</td>\n",
+       "      <td>720</td>\n",
+       "      <td>X003</td>\n",
+       "      <td>Age</td>\n",
+       "      <td>X003_-1</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             wave                                                gid  \\\n",
+       "661109  2010-2014                                          [ESP.2_1]   \n",
+       "272821  2005-2009                                              [MAR]   \n",
+       "103880  2008-2010                                              [NOR]   \n",
+       "790382  2008-2010  [PRT.3_1, PRT.8_1, PRT.12.4_1, PRT.14_1, PRT.1...   \n",
+       "226109  1999-2004                                              [PRI]   \n",
+       "\n",
+       "            country samplesize q_label                      label  \\\n",
+       "661109        Spain         33    X028          Employment status   \n",
+       "272821      Morocco       1200    E037  Government responsibility   \n",
+       "103880       Norway       1090    X003                        Age   \n",
+       "790382     Portugal        183    X036             Profession/job   \n",
+       "226109  Puerto Rico        720    X003                        Age   \n",
+       "\n",
+       "                 category  frequency  \n",
+       "661109      Self employed   0.121212  \n",
+       "272821                  4   0.033333  \n",
+       "103880            X003_23   0.019688  \n",
+       "790382  \"Missing; Unkown\"   0.000000  \n",
+       "226109            X003_-1   0.000000  "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "print(len(survey_explode_df))\n",
+    "display(survey_explode_df.sample(5))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "74\n",
+      "['Important in life: Family' 'Important in life: Friends'\n",
+      " 'Important in life: Work' 'State of health (subjective)'\n",
+      " 'Important child qualities: independence'\n",
+      " 'Important child qualities: feeling of responsibility'\n",
+      " 'Important child qualities: obedience'\n",
+      " 'Member: Belong to religious organization'\n",
+      " 'Member: Belong to sports or recreation'\n",
+      " 'Active/Inactive membership of church or religious organization'\n",
+      " 'Active/Inactive membership of sport or recreation'\n",
+      " 'Most people can be trusted' 'Satisfaction with your life'\n",
+      " 'How much freedom of choice and control'\n",
+      " 'Schwartz: It is important to this person living in secure surroundings'\n",
+      " 'Schwartz: It is important to this person to have a good time'\n",
+      " 'Schwartz: It is important to this person to always behave properly'\n",
+      " 'Schwartz: It is important to this person to do something for the good of society'\n",
+      " 'Social position: People in their 20s'\n",
+      " 'Social position: People in their 40s']\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Showing 20 of the unique questions asked in the survey\n",
+    "survey_questions = survey_explode_df.label.unique()\n",
+    "print(len(survey_questions))\n",
+    "print(survey_questions[:20])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['1981-1984', '1990-1993', '1999-2001', '2008-2010', '1989-1993',\n",
+       "       '1994-1998', '1999-2004', '2005-2009', '2010-2014'], dtype=object)"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# The unique waves of years\n",
+    "survey_explode_df.wave.unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>frequency</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>country</th>\n",
+       "      <th>category</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"8\" valign=\"top\">Algeria</th>\n",
+       "      <th>Don´t know</th>\n",
+       "      <td>0.003333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Missing; Unknown</th>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>No answer</th>\n",
+       "      <td>0.000833</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Not asked in survey</th>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Not at all important</th>\n",
+       "      <td>0.011667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Not very important</th>\n",
+       "      <td>0.011667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Rather important</th>\n",
+       "      <td>0.041667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Very important</th>\n",
+       "      <td>0.930833</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"8\" valign=\"top\">Argentina</th>\n",
+       "      <th>Don´t know</th>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Missing; Unknown</th>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>No answer</th>\n",
+       "      <td>0.002467</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Not asked in survey</th>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Not at all important</th>\n",
+       "      <td>0.002577</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Not very important</th>\n",
+       "      <td>0.004394</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Rather important</th>\n",
+       "      <td>0.101518</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Very important</th>\n",
+       "      <td>0.889044</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"4\" valign=\"top\">Armenia</th>\n",
+       "      <th>Don´t know</th>\n",
+       "      <td>0.001592</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Missing; Unknown</th>\n",
+       "      <td>0.002474</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>No answer</th>\n",
+       "      <td>0.000573</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Not asked in survey</th>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                frequency\n",
+       "country   category                       \n",
+       "Algeria   Don´t know             0.003333\n",
+       "          Missing; Unknown       0.000000\n",
+       "          No answer              0.000833\n",
+       "          Not asked in survey    0.000000\n",
+       "          Not at all important   0.011667\n",
+       "          Not very important     0.011667\n",
+       "          Rather important       0.041667\n",
+       "          Very important         0.930833\n",
+       "Argentina Don´t know             0.000000\n",
+       "          Missing; Unknown       0.000000\n",
+       "          No answer              0.002467\n",
+       "          Not asked in survey    0.000000\n",
+       "          Not at all important   0.002577\n",
+       "          Not very important     0.004394\n",
+       "          Rather important       0.101518\n",
+       "          Very important         0.889044\n",
+       "Armenia   Don´t know             0.001592\n",
+       "          Missing; Unknown       0.002474\n",
+       "          No answer              0.000573\n",
+       "          Not asked in survey    0.000000"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# How do countries respond to Family Values in 2010-2014?\n",
+    "mask = (survey_explode_df.wave=='2010-2014')\\\n",
+    "    &(survey_explode_df.label==\"Important in life: Family\")\n",
+    "survey_explode_df[mask].groupby([\"country\", \"category\"]).sum().head(20)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>country</th>\n",
+       "      <th>wave</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>label</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Important child qualities: feeling of responsibility</th>\n",
+       "      <td>107</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Important child qualities: independence</th>\n",
+       "      <td>107</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Important child qualities: obedience</th>\n",
+       "      <td>107</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Important in life: Family</th>\n",
+       "      <td>107</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Important in life: Friends</th>\n",
+       "      <td>107</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Important in life: Work</th>\n",
+       "      <td>107</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>It is not important for me to know about science in my daily life</th>\n",
+       "      <td>107</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Most important: first choice</th>\n",
+       "      <td>107</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Most important: second choice</th>\n",
+       "      <td>107</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Schwartz: It is important to this person living in secure surroundings</th>\n",
+       "      <td>107</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Schwartz: It is important to this person to always behave properly</th>\n",
+       "      <td>107</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Schwartz: It is important to this person to do something for the good of society</th>\n",
+       "      <td>107</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Schwartz: It is important to this person to have a good time</th>\n",
+       "      <td>107</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                    country  wave\n",
+       "label                                                            \n",
+       "Important child qualities: feeling of responsib...      107     9\n",
+       "Important child qualities: independence                 107     9\n",
+       "Important child qualities: obedience                    107     9\n",
+       "Important in life: Family                               107     9\n",
+       "Important in life: Friends                              107     9\n",
+       "Important in life: Work                                 107     9\n",
+       "It is not important for me to know about scienc...      107     9\n",
+       "Most important: first choice                            107     9\n",
+       "Most important: second choice                           107     9\n",
+       "Schwartz: It is important to this person living...      107     9\n",
+       "Schwartz: It is important to this person to alw...      107     9\n",
+       "Schwartz: It is important to this person to do ...      107     9\n",
+       "Schwartz: It is important to this person to hav...      107     9"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# How do countries respond to survey questions that include the word \"important\"?\n",
+    "survey_explode_df[survey_explode_df.label.str.contains(\"Important|important\")]\\\n",
+    "    .groupby(\"label\").agg({\"country\": \"nunique\", \"wave\":\"nunique\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>frequency</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>country</th>\n",
+       "      <th>wave</th>\n",
+       "      <th>label</th>\n",
+       "      <th>category</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"5\" valign=\"top\">Albania</th>\n",
+       "      <th rowspan=\"5\" valign=\"top\">1994-1998</th>\n",
+       "      <th rowspan=\"5\" valign=\"top\">Active/Inactive membership of church or religious organization</th>\n",
+       "      <th>Active member</th>\n",
+       "      <td>0.048048</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Don´t know</th>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Inactive member</th>\n",
+       "      <td>0.162162</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Missing; Unknown</th>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>No answer</th>\n",
+       "      <td>0.003003</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <th>...</th>\n",
+       "      <th>...</th>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"5\" valign=\"top\">Zimbabwe</th>\n",
+       "      <th rowspan=\"5\" valign=\"top\">2010-2014</th>\n",
+       "      <th rowspan=\"5\" valign=\"top\">Year of birth</th>\n",
+       "      <th>X002_1994</th>\n",
+       "      <td>0.006580</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>X002_1995</th>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>X002_1996</th>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>X002_1997</th>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>X002_1999</th>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>354576 rows × 1 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                                                        frequency\n",
+       "country  wave      label                                              category                   \n",
+       "Albania  1994-1998 Active/Inactive membership of church or religio... Active member      0.048048\n",
+       "                                                                      Don´t know         0.000000\n",
+       "                                                                      Inactive member    0.162162\n",
+       "                                                                      Missing; Unknown   0.000000\n",
+       "                                                                      No answer          0.003003\n",
+       "...                                                                                           ...\n",
+       "Zimbabwe 2010-2014 Year of birth                                      X002_1994          0.006580\n",
+       "                                                                      X002_1995          0.000000\n",
+       "                                                                      X002_1996          0.000000\n",
+       "                                                                      X002_1997          0.000000\n",
+       "                                                                      X002_1999          0.000000\n",
+       "\n",
+       "[354576 rows x 1 columns]"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Group survey questions by country, wave, label, category, then sum\n",
+    "survey_explode_df.groupby([\"country\", \"wave\", \"label\", \"category\"])\\\n",
+    "    [[\"samplesize\", \"frequency\"]].sum()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

		value	year
country	indicator_name
Afghanistan	ARI treatment (% of children under 5 taken to a health provider)	61.500000	2015
Access to clean fuels and technologies for cooking (% of population)	32.440000	2016
Access to electricity (% of population)	98.713203	2018
Access to electricity, rural (% of rural population)	98.272872	2018
Access to electricity, urban (% of urban population)	100.000000	2018
...	...	...	...
Zimbabwe	Women who believe a husband is justified in beating his wife when she neglects the children (%)	21.400000	2015
Women who believe a husband is justified in beating his wife when she refuses sex with him (%)	14.500000	2015
Women who were first married by age 15 (% of women ages 20-24)	3.700000	2015
Women who were first married by age 18 (% of women ages 20-24)	32.400000	2015
Women's share of population ages 15+ living with HIV (%)	59.800000	2018
	country	value
indicator_name
Mammal species, threatened	215	215
Bird species, threatened	215	215
Plant species (higher), threatened	215	215
Fish species, threatened	215	215
Adjusted savings: mineral depletion (current US$)	214	214
Access to electricity (% of population)	214	214
Access to electricity, urban (% of urban population)	212	212
Terrestrial protected areas (% of total land area)	211	211
Terrestrial and marine protected areas (% of total territorial area)	210	210
Surface area (sq. km)	209	209
	country	value
indicator_name
Adjusted savings: education expenditure (% of GNI)	198	198
Compulsory education, duration (years)	178	178
Adjusted savings: education expenditure (current US$)	178	178
Preprimary education, duration (years)	177	177
Primary education, pupils (% female)	89	89
Primary education, teachers	89	89
Primary education, teachers (% female)	89	89
Primary education, pupils	89	89
Secondary education, general pupils	86	86
Secondary education, general pupils (% female)	85	85
	source	wave	gid	country	countrycode	adm_area_1	adm_area_2	adm_area_3	samplesize	properties
0	EVS	1981-1984	[BEL]	Belgium	BEL	None	None	None	1145	{'A001': {'Label': 'Important in life: Family'...
1	EVS	1981-1984	[CAN]	Canada	CAN	None	None	None	1254	{'A001': {'Label': 'Important in life: Family'...
	wave	gid	country	samplesize	q_label	label	category	frequency
661109	2010-2014	[ESP.2_1]	Spain	33	X028	Employment status	Self employed	0.121212
272821	2005-2009	[MAR]	Morocco	1200	E037	Government responsibility	4	0.033333
103880	2008-2010	[NOR]	Norway	1090	X003	Age	X003_23	0.019688
790382	2008-2010	[PRT.3_1, PRT.8_1, PRT.12.4_1, PRT.14_1, PRT.1...	Portugal	183	X036	Profession/job	\"Missing; Unkown\"	0.000000
226109	1999-2004	[PRI]	Puerto Rico	720	X003	Age	X003_-1	0.000000
		frequency
country	category
Algeria	Don´t know	0.003333
Missing; Unknown	0.000000
No answer	0.000833
Not asked in survey	0.000000
Not at all important	0.011667
Not very important	0.011667
Rather important	0.041667
Very important	0.930833
Argentina	Don´t know	0.000000
Missing; Unknown	0.000000
No answer	0.002467
Not asked in survey	0.000000
Not at all important	0.002577
Not very important	0.004394
Rather important	0.101518
Very important	0.889044
Armenia	Don´t know	0.001592
Missing; Unknown	0.002474
No answer	0.000573
Not asked in survey	0.000000
	country	wave
label
Important child qualities: feeling of responsibility	107	9
Important child qualities: independence	107	9
Important child qualities: obedience	107	9
Important in life: Family	107	9
Important in life: Friends	107	9
Important in life: Work	107	9
It is not important for me to know about science in my daily life	107	9
Most important: first choice	107	9
Most important: second choice	107	9
Schwartz: It is important to this person living in secure surroundings	107	9
Schwartz: It is important to this person to always behave properly	107	9
Schwartz: It is important to this person to do something for the good of society	107	9
Schwartz: It is important to this person to have a good time	107	9
				frequency
country	wave	label	category
Albania	1994-1998	Active/Inactive membership of church or religious organization	Active member	0.048048
Don´t know	0.000000
Inactive member	0.162162
Missing; Unknown	0.000000
No answer	0.003003
...	...	...	...	...
Zimbabwe	2010-2014	Year of birth	X002_1994	0.006580
X002_1995	0.000000
X002_1996	0.000000
X002_1997	0.000000
X002_1999	0.000000