From 29c73a3090d6e0d6daa25011c9ea48bee44206db Mon Sep 17 00:00:00 2001 From: Neema Date: Mon, 23 Dec 2024 08:49:50 +0530 Subject: [PATCH 1/4] Added file on chisquare_test --- .../chi-squared-tests/chi-squared-tests.md | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 content/python/concepts/statsmodels/terms/chi-squared-tests/chi-squared-tests.md diff --git a/content/python/concepts/statsmodels/terms/chi-squared-tests/chi-squared-tests.md b/content/python/concepts/statsmodels/terms/chi-squared-tests/chi-squared-tests.md new file mode 100644 index 00000000000..efb9f4a9915 --- /dev/null +++ b/content/python/concepts/statsmodels/terms/chi-squared-tests/chi-squared-tests.md @@ -0,0 +1,82 @@ +--- +Title: 'Chi-Squared-tests' +Description: 'Assess the relationship between the actual and expected variables against a hypothesis' +Subjects: + - 'Machine Learning' + - 'Data Science' +Tags: + - 'Statistics' + - 'Properties' + - 'Models' + - 'Data' +CatalogContent: + - 'learn-python-3' + - 'paths/computer-science' +--- + +In statsmodel **`chisquare`** is used to test for proportions. It can be used to check if the observed proportions differ from +the expected proportions. In simpler words, it tests whether proportions (sucess/failures) are equal across groups. It can +compare proportions across multiple groups or categories. It cannot be used to check for independence and goodness-of-fit. + +## Syntax + +```psuedo +statsmodels.stats.proportion.proportions_chisquare( + count, + nobs, + value=None +) +``` + +- `count` {int,array_like}: a 1D array of success in nob trials +- `nobs` {int}: the total number of observations +- `value` {none} + +## Example + +```py +from statsmodels.stats.proportion import proportions_chisquare + +# Observed counts +counts = [150, 80, 100, 70] + +# Total number of observations +nobs = sum(counts) + +# Perform the chi-square test +chi2, p_value, (observed_table, expected_table) = proportions_chisquare(counts, nobs) + +# Print the results +print(f"Chi-square statistic: {chi2}") +print(f"P-value: {p_value}") +print("\nObserved and Expected Table:") +print(observed_table) +print("\nExpected Counts:") +print(expected_table) + +# Interpret the p-value +alpha = 0.05 +if p_value < alpha: + print("\nReject the null hypothesis: The proportions are significantly different.") +else: + print("\nFail to reject the null hypothesis: The proportions are not significantly different.") +``` + +```shell +Chi-square statistic: 50.66666666666667 +P-value: 5.761101160109705e-11 + +Observed and Expected Table: +[[150. 250.] + [ 80. 320.] + [100. 300.] + [ 70. 330.]] + +Expected Counts: +[[100. 300.] + [100. 300.] + [100. 300.] + [100. 300.]] + +Reject the null hypothesis: The proportions are significantly different. +``` From 183655f6bdf635b0faed50714cd95d1b2fa34899 Mon Sep 17 00:00:00 2001 From: Mamta Wardhani Date: Fri, 27 Dec 2024 22:42:37 +0530 Subject: [PATCH 2/4] Update chi-squared-tests.md minor fixes --- .../chi-squared-tests/chi-squared-tests.md | 46 ++++++++----------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/content/python/concepts/statsmodels/terms/chi-squared-tests/chi-squared-tests.md b/content/python/concepts/statsmodels/terms/chi-squared-tests/chi-squared-tests.md index efb9f4a9915..54a7b087015 100644 --- a/content/python/concepts/statsmodels/terms/chi-squared-tests/chi-squared-tests.md +++ b/content/python/concepts/statsmodels/terms/chi-squared-tests/chi-squared-tests.md @@ -1,6 +1,6 @@ --- -Title: 'Chi-Squared-tests' -Description: 'Assess the relationship between the actual and expected variables against a hypothesis' +Title: 'Chi-Squared tests' +Description: 'Assess the relationship between the actual and expected variables against a hypothesis.' Subjects: - 'Machine Learning' - 'Data Science' @@ -14,26 +14,23 @@ CatalogContent: - 'paths/computer-science' --- -In statsmodel **`chisquare`** is used to test for proportions. It can be used to check if the observed proportions differ from -the expected proportions. In simpler words, it tests whether proportions (sucess/failures) are equal across groups. It can -compare proportions across multiple groups or categories. It cannot be used to check for independence and goodness-of-fit. +The **Chi-Square test** in statsmodels is used to test whether observed proportions differ from expected proportions. It is commonly used to compare proportions across multiple groups or categories. The test can be applied in two contexts: goodness-of-fit (to see if the proportions match an expected distribution) and test of independence (to assess if two categorical variables are independent). ## Syntax ```psuedo -statsmodels.stats.proportion.proportions_chisquare( - count, - nobs, - value=None -) +scipy.stats.chisquare(f_obs, f_exp=None, ddof=0, axis=0) ``` -- `count` {int,array_like}: a 1D array of success in nob trials -- `nobs` {int}: the total number of observations -- `value` {none} +- `f_obs`: The observed frequencies or values. This should be a 1D or 2D array where each value represents the observed count in a category or group. +- `f_exp`: The expected frequencies or values. This is also a 1D or 2D array, where each value represents the expected count in the corresponding category or group. +- `ddof`: The "Delta Degrees of Freedom" adjustment for the test. This is used to adjust for the number of parameters estimated from the data. For a goodness-of-fit test, `ddof=0` is standard, but you can adjust it for specific models or tests. +- `axis`: The axis along which to compute the test. For multi-dimensional data, you can specify the axis (0 for rows and 1 for columns). If `axis` is set to `None`, the test is applied to all dimensions of the array. ## Example +In this example, a chi-square test is performed to compare observed proportions across four categories with the expected proportions to determine if they significantly differ: + ```py from statsmodels.stats.proportion import proportions_chisquare @@ -44,13 +41,11 @@ counts = [150, 80, 100, 70] nobs = sum(counts) # Perform the chi-square test -chi2, p_value, (observed_table, expected_table) = proportions_chisquare(counts, nobs) +chi2, p_value, expected_table = proportions_chisquare(counts, nobs) # Print the results print(f"Chi-square statistic: {chi2}") print(f"P-value: {p_value}") -print("\nObserved and Expected Table:") -print(observed_table) print("\nExpected Counts:") print(expected_table) @@ -62,21 +57,20 @@ else: print("\nFail to reject the null hypothesis: The proportions are not significantly different.") ``` +The code above generates the ouput as follows: + ```shell Chi-square statistic: 50.66666666666667 P-value: 5.761101160109705e-11 -Observed and Expected Table: -[[150. 250.] - [ 80. 320.] - [100. 300.] - [ 70. 330.]] - Expected Counts: -[[100. 300.] - [100. 300.] - [100. 300.] - [100. 300.]] +(array([[150., 250.], + [ 80., 320.], + [100., 300.], + [ 70., 330.]]), array([[100., 300.], + [100., 300.], + [100., 300.], + [100., 300.]])) Reject the null hypothesis: The proportions are significantly different. ``` From 3b30264963f659dcfefbad5c9b9ebecd370e1dc2 Mon Sep 17 00:00:00 2001 From: Avdhoot <50920321+avdhoottt@users.noreply.github.com> Date: Tue, 14 Jan 2025 20:18:42 +0530 Subject: [PATCH 3/4] Update content/python/concepts/statsmodels/terms/chi-squared-tests/chi-squared-tests.md --- .../statsmodels/terms/chi-squared-tests/chi-squared-tests.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/python/concepts/statsmodels/terms/chi-squared-tests/chi-squared-tests.md b/content/python/concepts/statsmodels/terms/chi-squared-tests/chi-squared-tests.md index 54a7b087015..f62fe00f39d 100644 --- a/content/python/concepts/statsmodels/terms/chi-squared-tests/chi-squared-tests.md +++ b/content/python/concepts/statsmodels/terms/chi-squared-tests/chi-squared-tests.md @@ -25,7 +25,7 @@ scipy.stats.chisquare(f_obs, f_exp=None, ddof=0, axis=0) - `f_obs`: The observed frequencies or values. This should be a 1D or 2D array where each value represents the observed count in a category or group. - `f_exp`: The expected frequencies or values. This is also a 1D or 2D array, where each value represents the expected count in the corresponding category or group. - `ddof`: The "Delta Degrees of Freedom" adjustment for the test. This is used to adjust for the number of parameters estimated from the data. For a goodness-of-fit test, `ddof=0` is standard, but you can adjust it for specific models or tests. -- `axis`: The axis along which to compute the test. For multi-dimensional data, you can specify the axis (0 for rows and 1 for columns). If `axis` is set to `None`, the test is applied to all dimensions of the array. +- `axis`: The axis along which the test is computed. For multi-dimensional data, you can specify the axis (0 for rows and 1 for columns). If `axis` is set to `None`, the test is applied to all dimensions of the array. ## Example From e7ac025c91b204d34fed3b9721828ceaa6357a24 Mon Sep 17 00:00:00 2001 From: Avdhoot Fulsundar Date: Tue, 14 Jan 2025 20:29:55 +0530 Subject: [PATCH 4/4] Updates and errors --- .../chi-squared-tests/chi-squared-tests.md | 49 +++++++++---------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/content/python/concepts/statsmodels/terms/chi-squared-tests/chi-squared-tests.md b/content/python/concepts/statsmodels/terms/chi-squared-tests/chi-squared-tests.md index f62fe00f39d..96b9009dd56 100644 --- a/content/python/concepts/statsmodels/terms/chi-squared-tests/chi-squared-tests.md +++ b/content/python/concepts/statsmodels/terms/chi-squared-tests/chi-squared-tests.md @@ -29,48 +29,43 @@ scipy.stats.chisquare(f_obs, f_exp=None, ddof=0, axis=0) ## Example -In this example, a chi-square test is performed to compare observed proportions across four categories with the expected proportions to determine if they significantly differ: +In this example, a chi-square test is performed to compare observed proportions across four categories with the expected proportions to determine if they differ: ```py -from statsmodels.stats.proportion import proportions_chisquare +from scipy.stats import chisquare # Observed counts counts = [150, 80, 100, 70] -# Total number of observations -nobs = sum(counts) - -# Perform the chi-square test -chi2, p_value, expected_table = proportions_chisquare(counts, nobs) - -# Print the results -print(f"Chi-square statistic: {chi2}") +# For equal expected proportions (null hypothesis) +# Expected counts would be total/number of categories +n_categories = len(counts) +total = sum(counts) +expected = [total/n_categories] * n_categories + +# Perform chi-square test +chi2_stat, p_value = chisquare( + f_obs=counts, # Observed frequencies + f_exp=expected, # Expected frequencies + ddof=0 # Degrees of freedom adjustment +) + +# Print results +print(f"Chi-square statistic: {chi2_stat}") print(f"P-value: {p_value}") -print("\nExpected Counts:") -print(expected_table) -# Interpret the p-value +# Interpret results alpha = 0.05 if p_value < alpha: - print("\nReject the null hypothesis: The proportions are significantly different.") + print("Reject the null hypothesis: The proportions are significantly different.") else: - print("\nFail to reject the null hypothesis: The proportions are not significantly different.") + print("Fail to reject the null hypothesis: The proportions are not significantly different.") ``` The code above generates the ouput as follows: ```shell -Chi-square statistic: 50.66666666666667 -P-value: 5.761101160109705e-11 - -Expected Counts: -(array([[150., 250.], - [ 80., 320.], - [100., 300.], - [ 70., 330.]]), array([[100., 300.], - [100., 300.], - [100., 300.], - [100., 300.]])) - +Chi-square statistic: 38.0 +P-value: 2.8264748814532456e-08 Reject the null hypothesis: The proportions are significantly different. ```