Skip to content

ratna: annotation results from first half of zhihan data #37

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
May 8, 2024
113 changes: 113 additions & 0 deletions data/clean/f_933_zhihan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import numpy as np
from collections import Counter


def f_933(list_of_tuples):
"""
Computes the sum of numeric values and counts the occurrences of categories in a list of tuples.

Each tuple in the input list contains a numeric value and a category. This function calculates
the sum of all the numeric values and also counts how many times each category appears in the list.

Parameters:
- list_of_tuples (list of tuple): A list where each tuple contains a numeric value and a category.

Returns:
- tuple: A 2-element tuple where the first element is the sum of the numeric values, and the
second element is a dictionary with categories as keys and their counts as values.

Requirements:
- numpy
- collections.Counter

Example:
>>> list_of_tuples = [(5, 'Fruits'), (9, 'Vegetables'), (-1, 'Dairy'), (-2, 'Bakery'), (4, 'Meat')]
>>> sum_of_values, category_counts = f_933(list_of_tuples)
>>> print(sum_of_values)
15
>>> print(category_counts)
{'Fruits': 1, 'Vegetables': 1, 'Dairy': 1, 'Bakery': 1, 'Meat': 1}
"""

numeric_values = [pair[0] for pair in list_of_tuples]
categories = [pair[1] for pair in list_of_tuples]

total_sum = np.sum(numeric_values)
category_counts = Counter(categories)

return total_sum, dict(category_counts)


import unittest


def run_tests():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestCases))
runner = unittest.TextTestRunner()
runner.run(suite)


class TestCases(unittest.TestCase):
def test_case_1(self):
# Regular list of tuples with different categories
input_data = [(5, 'Fruits'), (9, 'Vegetables'), (-1, 'Dairy'), (-2, 'Bakery'), (4, 'Meat')]
sum_values, count_values = f_933(input_data)
self.assertEqual(sum_values, 15)
self.assertEqual(count_values, {'Fruits': 1, 'Vegetables': 1, 'Dairy': 1, 'Bakery': 1, 'Meat': 1})

def test_case_2(self):
# List of tuples with all the same categories
input_data = [(5, 'Fruits'), (9, 'Fruits'), (-1, 'Fruits'), (-2, 'Fruits')]
sum_values, count_values = f_933(input_data)
self.assertEqual(sum_values, 11)
self.assertEqual(count_values, {'Fruits': 4})

def test_case_3(self):
# List of tuples with all negative numeric values
input_data = [(-5, 'Fruits'), (-9, 'Vegetables'), (-1, 'Dairy')]
sum_values, count_values = f_933(input_data)
self.assertEqual(sum_values, -15)
self.assertEqual(count_values, {'Fruits': 1, 'Vegetables': 1, 'Dairy': 1})

def test_case_4(self):
# Empty list
input_data = []
sum_values, count_values = f_933(input_data)
self.assertEqual(sum_values, 0)
self.assertEqual(count_values, {})

def test_case_5(self):
# List of tuples with mixed positive and negative numeric values for the same category
input_data = [(5, 'Fruits'), (-5, 'Fruits'), (3, 'Fruits')]
sum_values, count_values = f_933(input_data)
self.assertEqual(sum_values, 3)
self.assertEqual(count_values, {'Fruits': 3})

def test_empty_list(self):
"""Test with an empty list."""
self.assertEqual(f_933([]), (0, {}))

def test_all_negative_values(self):
"""Test with all negative numeric values."""
list_of_tuples = [(-5, 'Fruits'), (-2, 'Vegetables')]
self.assertEqual(f_933(list_of_tuples), (-7, {'Fruits': 1, 'Vegetables': 1}))

def test_duplicate_categories(self):
"""Test with duplicate categories."""
list_of_tuples = [(1, 'Fruits'), (2, 'Fruits'), (3, 'Vegetables')]
self.assertEqual(f_933(list_of_tuples), (6, {'Fruits': 2, 'Vegetables': 1}))

def test_single_tuple_in_list(self):
"""Test with a single tuple in the list."""
list_of_tuples = [(10, 'Meat')]
self.assertEqual(f_933(list_of_tuples), (10, {'Meat': 1}))

def test_float_numeric_values(self):
"""Test with non-integer numeric values (floats)."""
list_of_tuples = [(1.5, 'Fruits'), (2.5, 'Vegetables')]
self.assertEqual(f_933(list_of_tuples), (4.0, {'Fruits': 1, 'Vegetables': 1}))


if __name__ == "__main__":
run_tests()
141 changes: 141 additions & 0 deletions data/clean/f_934_zhihan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import ast
import json
from collections import Counter


def f_934(file_pointer):
"""
Reads from a given file pointer to a JSON file, evaluates strings that represent dictionaries to actual dictionaries,
and counts the frequency of each key across all dictionary entries in the JSON data.


Parameters:
file_pointer (file object): An open file object pointing to the JSON file containing the data. This file should
already be opened in the correct mode (e.g., 'r' for reading).

Returns:
collections.Counter: A Counter object representing the frequency of each key found in the dictionaries.

Requirements:
- ast
- json
- collections.Counter

Note:
This function assumes the input JSON data is a list of dictionaries or strings that can be evaluated as dictionaries.

Example:
>>> with open("data.json", "r") as file:
>>> key_frequency = f_934(file)
>>> print(key_frequency)
Counter({'name': 5, 'age': 5, 'city': 3})
"""

data = json.load(file_pointer)
key_frequency_counter = Counter()

for item in data:
if isinstance(item, str):
try:
item = ast.literal_eval(item)
except ValueError:
continue

if isinstance(item, dict):
key_frequency_counter.update(item.keys())

return key_frequency_counter


import unittest
from io import BytesIO
from collections import Counter
import json


def run_tests():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestCases))
runner = unittest.TextTestRunner()
runner.run(suite)


class TestCases(unittest.TestCase):
def test_with_dicts(self):
# Simulate a JSON file containing dictionaries
data = json.dumps([{"name": "John", "age": 30}, {"name": "Jane", "age": 25}, {"name": "Jake"}]).encode('utf-8')
json_file = BytesIO(data)

# Expected result is a Counter object with the frequency of each key
expected = Counter({'name': 3, 'age': 2})
result = f_934(json_file)
self.assertEqual(result, expected)

def test_with_string_repr_dicts(self):
# Simulate a JSON file containing string representations of dictionaries
data = json.dumps(['{"city": "New York"}', '{"city": "Los Angeles", "temp": 75}']).encode('utf-8')
json_file = BytesIO(data)

expected = Counter({'city': 2, 'temp': 1})
result = f_934(json_file)
self.assertEqual(result, expected)

def test_with_invalid_json(self):
# Simulate an invalid JSON file
data = b'invalid json'
json_file = BytesIO(data)

# In this case, the function should either return an empty Counter or raise a specific exception
# Depending on how you've implemented error handling in your function, adjust this test accordingly
with self.assertRaises(json.JSONDecodeError):
f_934(json_file)

def test_empty_json(self):
# Simulate an empty JSON file
data = json.dumps([]).encode('utf-8')
json_file = BytesIO(data)

expected = Counter()
result = f_934(json_file)
self.assertEqual(result, expected)

def test_mixed_valid_invalid_dicts(self):
# Simulate a JSON file with a mix of valid and invalid dictionary strings
data = json.dumps(['{"name": "John"}', 'Invalid', '{"age": 30}']).encode('utf-8')
json_file = BytesIO(data)

expected = Counter({'name': 1, 'age': 1})
result = f_934(json_file)
self.assertEqual(result, expected)

def test_nested_dicts(self):
# Simulate a JSON file containing nested dictionaries (should only count top-level keys)
data = json.dumps([{"person": {"name": "John", "age": 30}}, {"person": {"city": "New York"}}]).encode('utf-8')
json_file = BytesIO(data)

expected = Counter({'person': 2})
result = f_934(json_file)
self.assertEqual(result, expected)

def test_with_actual_json_objects_instead_of_strings(self):
# Simulate a JSON file with actual JSON objects (dictionaries) instead of string representations
data = json.dumps([{"key1": "value1"}, {"key2": "value2", "key3": "value3"}]).encode('utf-8')
json_file = BytesIO(data)

expected = Counter({'key1': 1, 'key2': 1, 'key3': 1})
result = f_934(json_file)
self.assertEqual(result, expected)

def test_invalid_json_structure(self):
# Simulate a JSON file that is not a list
data = json.dumps({"not": "a list"}).encode('utf-8')
json_file = BytesIO(data)

# Depending on how you've implemented error handling, adjust this test accordingly
# Here we expect an error or a specific handling
with self.assertRaises(SyntaxError):
f_934(json_file)


if __name__ == "__main__":
run_tests()
142 changes: 142 additions & 0 deletions data/clean/f_935_zhihan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import ast
import os
import glob

# Constants
DIRECTORY = 'data'

def f_935(directory):
"""
Convert all Unicode string representations of dictionaries in all text files
in the specified directory to Python dictionaries.

Parameters:
directory (str): The path to the directory containing the text files.

Returns:
list: A list of dictionaries extracted from the text files.

Requirements:
- ast
- os
- glob

Example:
>>> f_935("sample_directory/")
[{'key1': 'value1'}, {'key2': 'value2'}]

Note:
Ensure that the text files in the directory contain valid Unicode string representations of dictionaries.

Raises:
- The function would raise a ValueError if there are text file(s) that have invalid dictionary representation
"""
path = os.path.join(directory, '*.txt')
files = glob.glob(path)

results = []
for file in files:
with open(file, 'r') as f:
for line in f:
results.append(ast.literal_eval(line.strip()))

return results

import unittest
import os
import ast
import shutil


def run_tests():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestCases))
runner = unittest.TextTestRunner()
runner.run(suite)

class TestCases(unittest.TestCase):
def setUp(self):
self.test_dir = 'testdir_f_935'
os.makedirs(self.test_dir, exist_ok=True)
self.sample_directory = 'testdir_f_935/sample_directory'
os.makedirs(self.sample_directory, exist_ok=True)
f = open(self.sample_directory+"/1.txt","w")
f.write("{'key1': 'value1'}")
f.close()
f = open(self.sample_directory+"/2.txt","w")
f.write("{'key2': 'value2', 'key3': 'value3'}")
f.close()
f = open(self.sample_directory+"/3.txt","w")
f.write("{'key4': 'value4'}")
f.close()
f = open(self.sample_directory+"/4.txt","w")
f.write("{'key5': 'value5', 'key6': 'value6', 'key7': 'value7'}")
f.close()
f = open(self.sample_directory+"/5.txt","w")
f.write("{'key8': 'value8'}")
f.close()
self.empty_directory = "testdir_f_935/empty_directory"
os.makedirs(self.empty_directory, exist_ok=True)
self.multi_line_directory = "testdir_f_935/multi_line_directory"
os.makedirs(self.multi_line_directory, exist_ok=True)
f = open(self.multi_line_directory+"/1.txt","w")
f.write("{'key1': 'value1'}\n{'key2': 'value2'}")
f.close()
self.mixed_directory = "testdir_f_935/mixed_directory"
os.makedirs(self.mixed_directory, exist_ok=True)
f = open(self.mixed_directory+"/1.txt","w")
f.write("invalid")
f.close()
self.invalid_directory = "testdir_f_935/invalid_directory"
os.makedirs(self.invalid_directory, exist_ok=True)
f = open(self.invalid_directory+"/1.txt","w")
f.write("invalid")
f.close()
f = open(self.invalid_directory+"/2.txt","w")
f.write("{'key1': 'value1'}")
f.close()


def tearDown(self):
# Clean up the test directory
shutil.rmtree(self.test_dir)

def test_case_1(self):
# Test with the sample directory
result = f_935(self.sample_directory)
expected_result = [
{'key1': 'value1'},
{'key2': 'value2', 'key3': 'value3'},
{'key4': 'value4'},
{'key5': 'value5', 'key6': 'value6', 'key7': 'value7'},
{'key8': 'value8'}
]
for i in expected_result:
self.assertTrue(i in result)

def test_case_2(self):
# Test with an empty directory
result = f_935(self.empty_directory)
self.assertEqual(result, [])

def test_case_3(self):
# Test with a directory containing a text file without valid dictionary representation
with self.assertRaises(ValueError):
f_935(self.invalid_directory)

def test_case_4(self):
# Test with a directory containing multiple text files, some of which are invalid
with self.assertRaises(ValueError):
f_935(self.mixed_directory)

def test_case_5(self):
# Test with a directory containing a text file with multiple valid dictionary representations
result = f_935(self.multi_line_directory)
expected_result = [
{'key1': 'value1'},
{'key2': 'value2'}
]
self.assertEqual(result, expected_result)

if __name__ == "__main__":
run_tests()
Loading