Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add simons refined samples #24

Merged
merged 74 commits into from
May 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
74 commits
Select commit Hold shift + click to select a range
1dc9bc8
add simons refined samples
Veluchs Apr 24, 2024
13f566b
add simon
Veluchs Apr 25, 2024
77edb27
add requirements
Veluchs Apr 25, 2024
13697f7
remove library description
Veluchs Apr 25, 2024
77717f2
add requirements
Veluchs Apr 25, 2024
31c26f6
remove module description
Veluchs Apr 25, 2024
ac8e6a4
remove module description
Veluchs Apr 25, 2024
b1db0f6
remove module description
Veluchs Apr 25, 2024
69b4cb0
remove module description
Veluchs Apr 25, 2024
cde03d0
remove module description
Veluchs Apr 25, 2024
ae46f1b
remove module description
Veluchs Apr 25, 2024
97d676c
remove module description
Veluchs Apr 25, 2024
e1deb05
remove module description
Veluchs Apr 25, 2024
94291d5
remove module description
Veluchs Apr 25, 2024
9b5a724
remove module description
Veluchs Apr 25, 2024
1e386ed
remove module description
Veluchs Apr 25, 2024
426325e
remove module description
Veluchs Apr 25, 2024
0a76b0b
remove module description
Veluchs Apr 25, 2024
040102c
add requirements
Veluchs Apr 25, 2024
0a330ec
remove module description
Veluchs Apr 25, 2024
e3675ef
add requirements
Veluchs Apr 25, 2024
b5e940c
remove module description
Veluchs Apr 25, 2024
b03d8d5
remove module description
Veluchs Apr 25, 2024
889c494
remove module description
Veluchs Apr 25, 2024
aad9a64
remove module description
Veluchs Apr 25, 2024
b8156bd
remove module description
Veluchs Apr 25, 2024
bcfbb1b
fix annotation
Veluchs Apr 25, 2024
9df1184
add updated to raw
Veluchs Apr 25, 2024
46ee89f
remove external test data
Veluchs Apr 25, 2024
d2aaacf
Merge branch 'bigcode-project:main' into main
Veluchs Apr 25, 2024
fef081f
remove empty test
Veluchs Apr 25, 2024
207679b
Merge branch 'main' of https://github.com/Veluchs/open-eval
Veluchs Apr 25, 2024
5232f78
fix requirements
Veluchs Apr 26, 2024
c194244
fix return
Veluchs Apr 26, 2024
e4b9943
add examples
Veluchs Apr 26, 2024
4dd0c88
add temp directories for testing
Veluchs Apr 26, 2024
daaaa32
refine examples
Veluchs Apr 26, 2024
f7a55b7
fix examples
Veluchs Apr 26, 2024
5be00b9
fix examples
Veluchs Apr 26, 2024
ffd348f
fix example output
Veluchs Apr 26, 2024
40fa7a0
fix examples
Veluchs Apr 26, 2024
c602e33
fix example output
Veluchs Apr 26, 2024
a34c99f
fix test
Veluchs Apr 26, 2024
788799d
remove samples from clean
Veluchs Apr 26, 2024
39d5a54
fix temporary directory creation
Veluchs Apr 26, 2024
32b53c7
fix examples
Veluchs Apr 26, 2024
461b68f
fix examples
Veluchs Apr 26, 2024
3f3fff0
fix example output
Veluchs Apr 30, 2024
cfff1d5
fix example output
Veluchs Apr 30, 2024
03b300b
fix examples
Veluchs Apr 30, 2024
3084a4c
fix example
Veluchs Apr 30, 2024
e0cb51d
fix dtype
Veluchs Apr 30, 2024
56d11ec
add sympy
Veluchs Apr 30, 2024
d9617df
fix typo
Veluchs Apr 30, 2024
2add6a2
fix examples
Veluchs Apr 30, 2024
4777e19
Merge branch 'main' of https://github.com/Veluchs/open-eval
Veluchs Apr 30, 2024
4bda05b
fix
Veluchs Apr 30, 2024
5dc527d
fix output
Veluchs Apr 30, 2024
8ddfca9
fix
Veluchs Apr 30, 2024
50ebdd0
fix
Veluchs Apr 30, 2024
c42116a
fix
Veluchs Apr 30, 2024
f4f26ce
f
Veluchs Apr 30, 2024
1d830a1
add seed
Veluchs Apr 30, 2024
c0ccd03
f
Veluchs Apr 30, 2024
90f428f
f
Veluchs Apr 30, 2024
fc03357
add additional libary usage
Veluchs May 1, 2024
459b331
add explicit second library usage
Veluchs May 2, 2024
5e09c76
fix test case
Veluchs May 2, 2024
a9e6204
remove external data
Veluchs May 3, 2024
6b5390a
Merge branch 'main' of https://github.com/bigcode-project/open-eval
Veluchs May 3, 2024
ddcb868
remove unused libraries
Veluchs May 3, 2024
bb00db4
Merge branch 'main' into main
terryyz May 3, 2024
78f47b5
remove: remove directorio/custom1.txt
terryyz May 3, 2024
13d01e2
remove: remove directorio/custom2.txt
terryyz May 3, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 122 additions & 0 deletions data/clean/f_644_simon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import pandas as pd
from sklearn.preprocessing import MinMaxScaler


def f_644(list_of_pairs):
"""
Create a Pandas DataFrame from a list of pairs and normalize the data using MinMaxScaler.

Parameters:
list_of_pairs (list): A list of tuples, where the first element is the category and
the second element is the value.

Returns:
DataFrame: A pandas DataFrame containing the columns 'Category' and 'Value'.
Category contains the the first elements of each tuple.
Value contains the normalized values of each tuple.

Raises:
Exception: If the input array is empty.
ValueError: If Values are not numeric.

Requirements:
- pandas
- sklearn.preprocessing.MinMaxScaler

Example:
>>> list_of_pairs = [('Fruits', 5), ('Vegetables', 9), ('Dairy', -1), ('Bakery', -2), ('Meat', 4)]
>>> df = f_644(list_of_pairs)
>>> print(df)
Category Value
0 Fruits 0.636364
1 Vegetables 1.000000
2 Dairy 0.090909
3 Bakery 0.000000
4 Meat 0.545455
>>> list_of_pairs = [('car', 3.2), ('bike', 0), ('train', -1), ('plane', -6.2), ('ship', 1234)]
>>> df = f_644(list_of_pairs)
>>> print(df)
Category Value
0 car 0.007579
1 bike 0.004999
2 train 0.004193
3 plane 0.000000
4 ship 1.000000
"""

if len(list_of_pairs) == 0:
raise Exception('The input array should not be empty.')

df = pd.DataFrame(list_of_pairs, columns=['Category', 'Value'])

if pd.api.types.is_numeric_dtype(df.Value) is not True:
raise ValueError('The values have to be numeric.')

scaler = MinMaxScaler()
df['Value'] = scaler.fit_transform(df[['Value']])

return df


import unittest

class TestCases(unittest.TestCase):
def test_case_1(self):
'''test with normal input data'''
input_data = [('traditional', -4), ('we', 7), ('because', 3), ('ability', 10), ('exactly', -7)]
result = f_644(input_data)
self.assertTrue(isinstance(result, pd.DataFrame))
self.assertTrue('Value' in result.columns)
self.assertAlmostEqual(result[result['Category'] == 'traditional']['Value'].item(), 0.176471, places=6)
self.assertAlmostEqual(result[result['Category'] == 'we']['Value'].item(), 0.823529, places=6)
self.assertAlmostEqual(result[result['Category'] == 'because']['Value'].item(), 0.588235, places=6)
self.assertAlmostEqual(result[result['Category'] == 'ability']['Value'].item(), 1.000000, places=6)
self.assertAlmostEqual(result[result['Category'] == 'exactly']['Value'].item(), 0.000000, places=6)


def test_case_2(self):
'''test empty input'''
input_data = []
self.assertRaises(Exception, f_644, input_data)

def test_case_3(self):
'''non numeric values'''
input_data = [('fast', 'test'), ('ago', -8), ('player', 7), ('standard', 2), ('specific', 0)]
self.assertRaises(Exception, f_644, input_data)


def test_case_4(self):
'''Floating point values'''
input_data = [('real', 4.453), ('others', -1.12), ('professor', -2.2), ('other', -5), ('task', -7.933)]
result = f_644(input_data)
self.assertTrue(isinstance(result, pd.DataFrame))
self.assertTrue('Value' in result.columns)
self.assertAlmostEqual(result[result['Category'] == 'real']['Value'].item(), 1.000000, places=6)
self.assertAlmostEqual(result[result['Category'] == 'others']['Value'].item(), 0.550057, places=6)
self.assertAlmostEqual(result[result['Category'] == 'professor']['Value'].item(), 0.462861, places=6)
self.assertAlmostEqual(result[result['Category'] == 'other']['Value'].item(), 0.236800, places=6)
self.assertAlmostEqual(result[result['Category'] == 'task']['Value'].item(), 0.000000, places=6)


def test_case_5(self):
'''test for basic output structure'''
input_data = [('visit', 4), ('brother', -2), ('experience', -10), ('whether', 8), ('hand', 3)]
result = f_644(input_data)
self.assertTrue(isinstance(result, pd.DataFrame))
self.assertTrue('Value' in result.columns)
self.assertTrue('Category' in result.columns)
self.assertTrue(0 <= result['Value'].min() <= 1)
self.assertTrue(0 <= result['Value'].max() <= 1)

def run_tests():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestCases))
runner = unittest.TextTestRunner()
runner.run(suite)

if __name__ == "__main__":
list_of_pairs = [('Fruits', 5), ('Vegetables', 9), ('Dairy', -1), ('Bakery', -2), ('Meat', 4)]
df = f_644(list_of_pairs)
print(df)

# run_tests()
137 changes: 137 additions & 0 deletions data/clean/f_645_simon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import nltk
from string import punctuation
import pandas as pd


def f_645(text):
"""
Finds all words in a text, that are seperated by whitespace,
beginning with the "$" character and computes their number of occurences.

Parameters:
text (str): The input text.

Returns:
DataFrame: A pandas DataFrame with two columns: "Word" and "Frequency".
"Word" contains the '$' prefixed words, and "Frequency" contains their occurrences.


Raises:
ValueError: if text is not a string

Requirements:
- nltk
- string
- pandas

The function ignores words that are entirely made up of punctuation, even if they start with a '$'.

Example:
>>> text = "$abc def $efg $hij klm $ $abc $abc $hij $hij"
>>> f_645(text)
Word Frequency
0 $abc 3
1 $efg 1
2 $hij 3

>>> text = "$hello this i$s a $test $test $test"
>>> f_645(text)
Word Frequency
0 $hello 1
1 $test 3
"""
if not isinstance(text, str):
raise ValueError("The input should be a string.")

tk = nltk.WhitespaceTokenizer()
words = tk.tokenize(text)
dollar_words = [word for word in words if word.startswith('$') and not all(c in set(punctuation) for c in word)]
freq = nltk.FreqDist(dollar_words)
df = pd.DataFrame(list(freq.items()), columns=["Word", "Frequency"])
return df

import unittest


class TestCases(unittest.TestCase):

def test_case_1(self):
text = "$abc def $efg $hij klm $ $abc $abc $hij $hij"
result = f_645(text)
expected_words = ["$abc", "$efg", "$hij"]
expected_freqs = [3, 1, 3]
self.assertListEqual(result["Word"].tolist(), expected_words)
self.assertListEqual(result["Frequency"].tolist(), expected_freqs)

def test_case_2(self):
text = "This is a test without dollar words."
result = f_645(text)
self.assertEqual(len(result), 0)

def test_case_3(self):
text = "$test1 $test2 $test1 $test3"
result = f_645(text)
expected_words = ["$test1", "$test2", "$test3"]
expected_freqs = [2, 1, 1]
self.assertListEqual(result["Word"].tolist(), expected_words)
self.assertListEqual(result["Frequency"].tolist(), expected_freqs)

def test_case_4(self):
text = "$! $$ $a $a $a"
result = f_645(text)
expected_words = ["$a"]
expected_freqs = [3]
self.assertListEqual(result["Word"].tolist(), expected_words)
self.assertListEqual(result["Frequency"].tolist(), expected_freqs)

def test_case_5(self):
text = "$word1 word2 $word2 $word1 $word3 $word1"
result = f_645(text)
expected_words = ["$word1", "$word2", "$word3"]
expected_freqs = [3, 1, 1]
self.assertListEqual(result["Word"].tolist(), expected_words)
self.assertListEqual(result["Frequency"].tolist(), expected_freqs)

def test_case_6(self):
'''empty input string'''
text = ""
result = f_645(text)
expected_words = []
expected_freqs = []
self.assertListEqual(result["Word"].tolist(), expected_words)
self.assertListEqual(result["Frequency"].tolist(), expected_freqs)

def test_case_7(self):
'''check for correct return type'''
text = "$test 123 abcd.aef"
result = f_645(text)
self.assertTrue(isinstance(result, pd.DataFrame))
self.assertTrue('Word' in result.columns)
self.assertTrue('Frequency' in result.columns)

def test_case_8(self):
'''word with $ in the middle'''
text = "asdfj;alskdfj;$kjhkjhdf"
result = f_645(text)
expected_words = []
expected_freqs = []
self.assertListEqual(result["Word"].tolist(), expected_words)
self.assertListEqual(result["Frequency"].tolist(), expected_freqs)

def test_case_9(self):
'''non string input'''
input = 24
self.assertRaises(Exception, f_645, input)




def run_tests():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestCases))
runner = unittest.TextTestRunner()
runner.run(suite)


if __name__ == "__main__":
run_tests()
Loading
Loading