diff --git a/CMakeLists.txt b/CMakeLists.txt index 6dde857..05ca223 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,3 +11,4 @@ add_subdirectory(homework_3) add_subdirectory(homework_4) add_subdirectory(homework_5) add_subdirectory(homework_8) +add_subdirectory(homework_9) diff --git a/homework_9/CMakeLists.txt b/homework_9/CMakeLists.txt new file mode 100644 index 0000000..0fa5d1f --- /dev/null +++ b/homework_9/CMakeLists.txt @@ -0,0 +1,5 @@ +project(homework_9) + +set(homeworkName "${PROJECT_NAME}") + +add_subdirectory(task_1) diff --git a/homework_9/README.md b/homework_9/README.md new file mode 100644 index 0000000..af92659 --- /dev/null +++ b/homework_9/README.md @@ -0,0 +1,3 @@ +# Homework 9 + +[Task 1. Hashtable](/homework_9/task_1) diff --git a/homework_9/task_1/.gitignore b/homework_9/task_1/.gitignore new file mode 100644 index 0000000..06c798b --- /dev/null +++ b/homework_9/task_1/.gitignore @@ -0,0 +1 @@ +input.txt diff --git a/homework_9/task_1/CMakeLists.txt b/homework_9/task_1/CMakeLists.txt new file mode 100644 index 0000000..071dedb --- /dev/null +++ b/homework_9/task_1/CMakeLists.txt @@ -0,0 +1,9 @@ +project("${homeworkName}_task_1") + +add_library(frequencyLookup frequencyList.c frequencyLookup.c) + +add_executable(${PROJECT_NAME} main.c) +target_link_libraries(${PROJECT_NAME} frequencyLookup) + +add_executable(${PROJECT_NAME}_test test.c) +target_link_libraries(${PROJECT_NAME}_test frequencyLookup) diff --git a/homework_9/task_1/frequencyList.c b/homework_9/task_1/frequencyList.c new file mode 100644 index 0000000..b293f98 --- /dev/null +++ b/homework_9/task_1/frequencyList.c @@ -0,0 +1,102 @@ +#include "frequencyList.h" + +#include +#include +#include + +typedef struct FrequencyElement { + char *string; + int frequency; + FrequencyElement *next; +} FrequencyElement; + +typedef struct FrequencyList { + FrequencyElement *first; + FrequencyElement *last; + int length; +} FrequencyList; + +bool createFrequencyList(FrequencyList **list) { + *list = malloc(sizeof(FrequencyList)); + + if (*list == NULL) { + return false; + } + + (*list)->first = NULL; + (*list)->last = NULL; + (*list)->length = 0; + + return true; +} + +FrequencyElement *getFirstElement(FrequencyList *list) { + return list->first; +} + +FrequencyElement *addNewElement(FrequencyList *list) { + FrequencyElement *element = malloc(sizeof(FrequencyElement)); + if (element == NULL) { + return NULL; + } + + element->string = NULL; + element->frequency = 0; + + element->next = NULL; + if (list->first == NULL) { + list->first = element; + list->last = element; + list->length = 1; + return element; + } + + list->last->next = element; + list->last = element; + + ++list->length; + + return element; +} + +int getLength(FrequencyList *list) { + return list->length; +} + +void disposeList(FrequencyList *list) { + FrequencyElement *element = list->first; + while (element != NULL) { + FrequencyElement *next = element->next; + free(element->string); + free(element); + element = next; + } + + free(list); +} + +FrequencyElement *getNextElement(FrequencyElement *element) { + return element->next; +} + +const char *getString(FrequencyElement *element) { + return element->string; +} + +int getFrequency(FrequencyElement *element) { + return element->frequency; +} + +bool setString(FrequencyElement *element, const char *string) { + char *newString = strdup(string); + if (newString == NULL) { + return false; + } + free(element->string); + element->string = newString; + return true; +} + +void setFrequency(FrequencyElement *element, int frequency) { + element->frequency = frequency; +} diff --git a/homework_9/task_1/frequencyList.h b/homework_9/task_1/frequencyList.h new file mode 100644 index 0000000..d6affd4 --- /dev/null +++ b/homework_9/task_1/frequencyList.h @@ -0,0 +1,58 @@ +#pragma once + +#include + +/// @brief Element of a `FrequencyList` that contains string and frequency +typedef struct FrequencyElement FrequencyElement; + +/// @brief List where every element contains string and frequency +typedef struct FrequencyList FrequencyList; + +/// @brief Creates `FrequencyList` +/// @param list Pointer to store `FrequencyList` to +/// @return `true` if created successfully, `false` otherwise (allocation failed) +bool createFrequencyList(FrequencyList **list); + +/// @brief Gets first element of a list +/// @param list List to get first element from +/// @return Fist element, may be `NULL` if list is empty +FrequencyElement *getFirstElement(FrequencyList *list); + +/// @brief Creates new element, adds it to list, and returns it +/// @param list List to add new element to +/// @return Created element +FrequencyElement *addNewElement(FrequencyList *list); + +/// @brief Gets length of a list +/// @param list List to get lenght of +/// @return Length of the given list +int getLength(FrequencyList *list); + +/// @brief Disposes list and all of its element +/// @param list List to dispose +void disposeList(FrequencyList *list); + +/// @brief Gets next element of given element +/// @param element Element to get next element of +/// @return Element that follows given element, may be `NULL` +FrequencyElement *getNextElement(FrequencyElement *element); + +/// @brief Gets string stored in an element +/// @param element Element to get string from +/// @return String that is stored in an element +const char *getString(FrequencyElement *element); + +/// @brief Gets frequency stored in an element +/// @param element Element to get frequency from +/// @return Frequency that is stored in an element +int getFrequency(FrequencyElement *element); + +/// @brief Sets string into an element +/// @param element Element to set string into +/// @param string String to set +bool setString(FrequencyElement *element, const char *string); + +/// @brief Sets frequency into an element +/// @param element Element to set frequency into +/// @param frequency Frequency to set +void setFrequency(FrequencyElement *element, int frequency); diff --git a/homework_9/task_1/frequencyLookup.c b/homework_9/task_1/frequencyLookup.c new file mode 100644 index 0000000..f08a860 --- /dev/null +++ b/homework_9/task_1/frequencyLookup.c @@ -0,0 +1,220 @@ +#include "frequencyLookup.h" + +#include +#include +#include + +#include "frequencyList.h" + +typedef struct FrequencyLookup { + FrequencyList **buckets; + int capacity; + int count; +} FrequencyLookup; + +typedef struct LookupIterator { + FrequencyLookup *lookup; + int bucketIndex; + FrequencyElement *currentElement; +} LookupIterator; + +static bool createLookupInternal(FrequencyLookup **lookup, int capacity) { + FrequencyLookup *newLookup = malloc(sizeof(FrequencyLookup)); + + if (newLookup == NULL) { + return false; + } + + newLookup->count = 0; + newLookup->capacity = capacity; + newLookup->buckets = malloc(sizeof(FrequencyList *) * newLookup->capacity); + + bool failed = false; + for (int i = 0; i < newLookup->capacity; ++i) { + if (!createFrequencyList(&(newLookup->buckets[i]))) { + failed = true; + break; + } + } + + if (failed) { + disposeLookup(newLookup); + return false; + } + + *lookup = newLookup; + return true; +} + +bool createLookup(FrequencyLookup **lookup) { + return createLookupInternal(lookup, 64); +} + +static unsigned int getStringHash(const char *string) { + unsigned int hash = 0; + for (int i = 0; string[i] != '\0'; ++i) { + hash = hash * 57 + string[i]; + } + return hash; +} + +static void disposeBuckets(FrequencyLookup *lookup) { + for (int i = 0; i < lookup->capacity; ++i) { + disposeList(lookup->buckets[i]); + } +} + +static bool expandLookup(FrequencyLookup *lookup) { + int newCapacity = lookup->capacity * 4; + + FrequencyLookup *newLookup = NULL; + if (!createLookupInternal(&newLookup, newCapacity)) { + return false; + } + + LookupIterator *iterator = getIterator(lookup); + while (moveNext(iterator)) { + const char *string = NULL; + int frequency = 0; + getCurrent(iterator, &string, &frequency); + + if (!addFrequency(newLookup, string, frequency)) { + return false; + } + } + disposeIterator(iterator); + + disposeBuckets(lookup); + + // copy new lookup to old one + *lookup = *newLookup; + + // free(), because disposeLookup() will dispose buckets + free(newLookup); + + return true; +} + +bool addFrequency(FrequencyLookup *lookup, const char *string, int frequency) { + if ((float)lookup->count / lookup->capacity > 4.0) { + expandLookup(lookup); + } + + int bucketIndex = getStringHash(string) % lookup->capacity; + FrequencyList *bucket = lookup->buckets[bucketIndex]; + FrequencyElement *element = getFirstElement(bucket); + + while (element != NULL) { + const char *elementString = getString(element); + if (strcmp(string, elementString) == 0) { + setFrequency(element, frequency); + return true; + } + element = getNextElement(element); + } + + element = addNewElement(bucket); + if (element == NULL) { + return false; + } + + if (!setString(element, string)) { + return false; + } + setFrequency(element, frequency); + + ++lookup->count; + return true; +} + +bool getFrequencyByString(FrequencyLookup *lookup, const char *string, int *frequency) { + int bucketIndex = getStringHash(string) % lookup->capacity; + FrequencyList *bucket = lookup->buckets[bucketIndex]; + FrequencyElement *element = getFirstElement(bucket); + + while (element != NULL) { + const char *elementString = getString(element); + if (strcmp(string, elementString) == 0) { + *frequency = getFrequency(element); + return true; + } + element = getNextElement(element); + } + + return false; +} + +LookupIterator *getIterator(FrequencyLookup *lookup) { + LookupIterator *iterator = malloc(sizeof(LookupIterator)); + if (iterator == NULL) { + return NULL; + } + + iterator->lookup = lookup; + iterator->bucketIndex = -1; + iterator->currentElement = NULL; + + return iterator; +} + +int getCount(FrequencyLookup *lookup) { + return lookup->count; +} + +int getCapacity(FrequencyLookup *lookup) { + return lookup->capacity; +} + +int getMaxBucketLength(FrequencyLookup *lookup) { + int length = -1; + for (int i = 0; i < lookup->capacity; ++i) { + int bucketLength = getLength(lookup->buckets[i]); + if (length == -1 || bucketLength > length) { + length = bucketLength; + } + } + return length; +} + +float getAverageBucketLength(FrequencyLookup *lookup) { + if (lookup->capacity == 0) { + return 0; + } + + int totalLength = -1; + for (int i = 0; i < lookup->capacity; ++i) { + int bucketLength = getLength(lookup->buckets[i]); + if (bucketLength != 0) { + totalLength += bucketLength; + } + } + return (double)totalLength / lookup->capacity; +} + +void disposeLookup(FrequencyLookup *lookup) { + disposeBuckets(lookup); + free(lookup); +} + +bool moveNext(LookupIterator *iterator) { + while (iterator->currentElement == NULL) { + ++iterator->bucketIndex; + if (iterator->bucketIndex >= iterator->lookup->capacity) { + return false; + } + iterator->currentElement = getFirstElement(iterator->lookup->buckets[iterator->bucketIndex]); + } + + return true; +} + +void getCurrent(LookupIterator *iterator, const char **string, int *frequency) { + *string = getString(iterator->currentElement); + *frequency = getFrequency(iterator->currentElement); + + iterator->currentElement = getNextElement(iterator->currentElement); +} + +void disposeIterator(LookupIterator *iterator) { + free(iterator); +} diff --git a/homework_9/task_1/frequencyLookup.h b/homework_9/task_1/frequencyLookup.h new file mode 100644 index 0000000..d2d03a7 --- /dev/null +++ b/homework_9/task_1/frequencyLookup.h @@ -0,0 +1,72 @@ +#pragma once + +#include + +/// @brief Lookup of frequencies, where key is string and value is int +typedef struct FrequencyLookup FrequencyLookup; + +/// @brief Iterator that iterates through lookup +typedef struct LookupIterator LookupIterator; + +/// @brief Creates `FrequencyLookup` +/// @param lookup Pointer to store `FrequencyLookup` to +/// @return `true` if created successfully, `false` otherwise (allocation failed) +bool createLookup(FrequencyLookup **lookup); + +/// @brief Adds new frequency to lookup or updates existing one +/// @param lookup Lookup to add frequency to +/// @param string String to add +/// @param frequency Frequency to add +/// @return `true` if added successfully, `false` otherwise (allocation failed) +bool addFrequency(FrequencyLookup *lookup, const char *string, int frequency); + +/// @brief Tries to get frequency by string +/// @param lookup Lookup to get frequency from +/// @param string String to get frequency by +/// @param frequency Pointer to store frequency to +/// @return `true` if found corresponding frequency, `false otherwise` +bool getFrequencyByString(FrequencyLookup *lookup, const char *string, int *frequency); + +/// @brief Creates new iterator +/// @param lookup Lookup to get iterator of +/// @return New iterator +LookupIterator *getIterator(FrequencyLookup *lookup); + +/// @brief Gets count of entries in lookup +/// @param lookup Lookup to get count of +/// @return Count of entries +int getCount(FrequencyLookup *lookup); + +/// @brief Gets capacity of lookup +/// @param lookup Lookup to get capacity of +/// @return Capacity of lookup +int getCapacity(FrequencyLookup *lookup); + +/// @brief Gets max length of all buckets in lookup +/// @param lookup A lookup +/// @return Max length of all buckets in lookup +int getMaxBucketLength(FrequencyLookup *lookup); + +/// @brief Gets average length of non-empty buckets in lookup +/// @param lookup A lookup +/// @return Average length of non-empty buckets in lookup +float getAverageBucketLength(FrequencyLookup *lookup); + +/// @brief Disposes lookup and all of its entries +/// @param lookup Lookup to dispose +void disposeLookup(FrequencyLookup *lookup); + +/// @brief Moves to next element in iterator +/// @param iterator An iterator +/// @return `true` if there are available element, `false` otherwise (end of lookup) +bool moveNext(LookupIterator *iterator); + +/// @brief Gets current string and frequency in iterator +/// @param iterator An iterator +/// @param string Pointer to store string to +/// @param frequency Pointer to store frequency to +void getCurrent(LookupIterator *iterator, const char **string, int *frequency); + +/// @brief Disposes iterator +/// @param iterator Iterator to dispose +void disposeIterator(LookupIterator *iterator); diff --git a/homework_9/task_1/main.c b/homework_9/task_1/main.c new file mode 100644 index 0000000..f0c3b2d --- /dev/null +++ b/homework_9/task_1/main.c @@ -0,0 +1,83 @@ +#include +#include +#include +#include + +#include "frequencyLookup.h" + +bool isNonWord(char c) { + return c == ' ' || c == EOF || c == '\n' || c == '\r'; +} + +int main(void) { + FILE *file = fopen("input.txt", "r"); + if (file == NULL) { + printf("couldn't open file\n"); + return 1; + } + + FrequencyLookup *lookup = NULL; + if (!createLookup(&lookup)) { + printf("allocation error\n"); + return 1; + } + + char buffer[1024] = { '\0' }; + int bufferIndex = 0; + while (!feof(file)) { + int c = fgetc(file); + if (bufferIndex >= (int)sizeof(buffer) || isNonWord(c)) { + if (bufferIndex == 0) { + continue; + } + + int frequency = -1; + if (!getFrequencyByString(lookup, buffer, &frequency)) { + frequency = 0; + } + ++frequency; + addFrequency(lookup, buffer, frequency); + memset(buffer, 0, sizeof(buffer)); + bufferIndex = 0; + } else { + buffer[bufferIndex] = c; + ++bufferIndex; + } + } + + int count = getCount(lookup); + printf("word count: %d\n", count); + + int capacity = getCapacity(lookup); + int maxBucketLength = getMaxBucketLength(lookup); + float averageBucketLength = getAverageBucketLength(lookup); + printf("capacity: %d, max bucket length: %d, average length of non-empty buckets: %lf\n", capacity, maxBucketLength, averageBucketLength); + printf("occupancy ratio: %lf\n", (float)count / capacity); + + const char **strings = malloc(sizeof(char *) * count); + int *frequencies = malloc(sizeof(int) * count); + + LookupIterator *iterator = getIterator(lookup); + int index = 0; + while (moveNext(iterator)) { + const char *string = NULL; + int frequency = 0; + getCurrent(iterator, &string, &frequency); + + strings[index] = string; + frequencies[index] = frequency; + ++index; + } + disposeIterator(iterator); + + printf("words:\n"); + + for (int i = 0; i < count; ++i) { + int j = count - i - 1; + printf("\"%s\" : %d\n", strings[j], frequencies[j]); + } + + disposeLookup(lookup); + + fclose(file); +} diff --git a/homework_9/task_1/test.c b/homework_9/task_1/test.c new file mode 100644 index 0000000..1676060 --- /dev/null +++ b/homework_9/task_1/test.c @@ -0,0 +1,52 @@ +#define CTEST_MAIN +#define CTEST_SEGFAULT +#include "../../ctest/ctest.h" + +#include + +#include "frequencyLookup.h" + +int main(int argc, const char *argv[]) { + return ctest_main(argc, argv); +} + +FrequencyLookup *createNewLookup(void) { + FrequencyLookup *lookup = NULL; + ASSERT_TRUE(createLookup(&lookup)); + return lookup; +} + +CTEST(lookupTests, createTest) { + FrequencyLookup *lookup = createNewLookup(); + disposeLookup(lookup); +} + +CTEST(lookupTests, addSomeValues) { +#define size 10 + char *strings[size] = { "abc", "def", "ghi", "testString", "testString2", "qwertyuiop", "asdfghjkl", "zxcvbnm", "0123456789", "abcdefghijklmnopqrstuvwxyz" }; + int frequencies[size] = { 12, 542, 1, 4, 61, 234, 101, 456, 789, 42 }; + + FrequencyLookup *lookup = createNewLookup(); + + for (int i = 0; i < size; ++i) { + ASSERT_TRUE(addFrequency(lookup, strings[i], frequencies[i])); + + int frequency = 0; + ASSERT_TRUE(getFrequencyByString(lookup, strings[i], &frequency)); + ASSERT_EQUAL(frequency, frequencies[i]); + } + + for (int i = 0; i < size; ++i) { + int frequency = 0; + ASSERT_TRUE(getFrequencyByString(lookup, strings[i], &frequency)); + + int newFrequency = frequency * 11; + ASSERT_TRUE(addFrequency(lookup, strings[i], newFrequency)); + + ASSERT_TRUE(getFrequencyByString(lookup, strings[i], &frequency)); + ASSERT_EQUAL(frequency, newFrequency); + } + + disposeLookup(lookup); +#undef size +}