diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..3e353ea --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,31 @@ +name: CI + +on: + push: + branches: [ main, develop ] + pull_request: + branches: [ main ] + +jobs: + Tests: + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + fail-fast: false + + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v4 + + - name: Build and Test + shell: bash + run: | + if [ "$RUNNER_OS" == "Linux" ]; then + sudo apt-get update && sudo apt-get install -y cmake build-essential libgtest-dev + fi + + cmake -B build + cmake --build build + cd build/tests + ./test-runner \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index ec0480c..44c5fac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,11 +3,22 @@ project(LogAnalyzer VERSION 0.1.0) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) +cmake_policy(SET CMP0079 NEW) + + +find_package(GTest QUIET) +if(NOT GTest_FOUND) + include(FetchContent) + FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG v1.14.0 + ) + + FetchContent_MakeAvailable(googletest) +endif() + add_subdirectory(core) add_subdirectory(cli) - -find_package(GTest) -if(GTest_FOUND) - add_subdirectory(tests) -endif() \ No newline at end of file +add_subdirectory(tests) diff --git a/README.md b/README.md index 4782719..542bce5 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,54 @@ -# Log Analyzer v0.1.0 +# Log Analyzer v0.2.0 ## 💻 Платформы -- ✅ macOS -- ❌ Windows (в разработке) -- ❌ Linux (в разработке) +- ✅ macOS (clang/gcc, CMake ≥ 3.10) +- ✅ Linux (gcc, CMake ≥ 3.10) ## 🚀 Возможности -- Чтение Nginx access логов -- Базовый анализ: IP, ошибки, временные метрики -- Консольный интерфейс +### 📋 Поддерживаемые форматы логов: +- Nginx +- Apache + +### 📊 Анализируемая статистика: +- Общая статистика запросов (успешные/ошибочные) +- Распределение по форматам логов +- Топ IP-адресов с ошибками (топ-10) +- Типы HTTP-ошибок (400, 401 и т.д.) +- Временное распределение ошибок (по часам) +- Количество неудачных парсингов + +## 🛠️ Технологический стек +- Язык: C++17 +- Сборка: CMake +- Тестирование: Google Test (gtest/gmock) +- CI: Сборка и тестирование ## 🛠️ В разработке: -- CI/CD - GUI -- Gcov -- Автоопределение формата -- Apache Access Logs -## 📦 Установка -```bash +## 📦 Установка и сборка +- Клонирование репозитория +``` git clone https://github.com/Arkilleru/log-analyzer.git cd log-analyzer -mkdir build && cd build -cmake .. && make \ No newline at end of file +``` + +- Сборка +``` +./run.sh build +``` + +- Тестирование +``` +./run.sh test +``` + +- Очистка +``` +./run.sh clean +``` + +- Очистка + Сборка + Тестирование +``` +./run.sh all +``` \ No newline at end of file diff --git a/cli/main.cpp b/cli/main.cpp index bb5eb16..20cf373 100644 --- a/cli/main.cpp +++ b/cli/main.cpp @@ -3,6 +3,7 @@ int main() { std::string path; + std::cout << "Enter log file path:\n"; std::getline(std::cin, path); Analyzer analysis; diff --git a/core/analyzer.cpp b/core/analyzer.cpp index 60b220a..3126709 100644 --- a/core/analyzer.cpp +++ b/core/analyzer.cpp @@ -1,18 +1,17 @@ #include "analyzer.h" -#include -std::string Analyzer::analyze(std::string& path) { - reader.OpenFile(path); +std::string Analyzer::analyze(std::string path) { + reader_.OpenFile(path); AnalysisResult res; - while(reader.MoreLines()) { - std::string str = reader.ReadLine(); - LogInformation data = parser.Parse(str); - statistics.Process(data, res); + while(reader_.MoreLines()) { + std::string str = reader_.ReadLine(); + LogInformation data = parser_.Parse(str); + statistics_.Process(data, res); } - reader.CloseFile(); - std::string report = reporter.GenerateTextReport(res); + reader_.CloseFile(); + std::string report = reporter_.GenerateTextReport(res); return report; } \ No newline at end of file diff --git a/core/analyzer.h b/core/analyzer.h index f75765e..470f924 100644 --- a/core/analyzer.h +++ b/core/analyzer.h @@ -1,89 +1,19 @@ #pragma once +#include "reader.h" +#include "parser.h" +#include "statistics.h" +#include "reporter.h" #include -#include -#include -#include -#include -#include - -struct LogInformation { - std::string ip; - std::string time; - std::string operation; - std::string url; - int status; - size_t answer_size; - bool parse_success; -}; - -struct AnalysisResult { - std::unordered_map error_counts; - std::unordered_map ip; - std::unordered_map time_distribution; - int total_processed = 0; - int failed_parses = 0; - int successful_requests = 0; - int error_requests = 0; -}; - - - -class Parser { -private: - std::regex nginx_pattern; -public: - Parser() : nginx_pattern(R"(^(\d+\.\d+\.\d+\.\d+) - - \[(.*?)\] \"(\w+) (.*?) HTTP/.*?\" (\d+) (\d+))") {} - - LogInformation Parse(const std::string& line); -}; - - -class Reporter { -private: - std::string GenerateGeneralStats(const AnalysisResult& data); - std::string GenerateIpErrorsTop(const AnalysisResult& data); - std::string GenerateErrorTypes(const AnalysisResult& data); - std::string GenerateTimeDistribution(const AnalysisResult& data); - std::vector> ToSortedVector(const std::unordered_map& mp); -public: - std::string GenerateTextReport(const AnalysisResult& data); -}; - - -class Statistics { -public: - void Process(LogInformation& data, AnalysisResult& res); - std::string ExtractHour(std::string& time); - -}; - - -class Reader { -private: - std::unique_ptr file; - size_t line_number; - -public: - Reader() = default; - ~Reader() = default; - - bool OpenFile(const std::string& path); - std::string ReadLine(); - bool MoreLines(); - void CloseFile(); -}; - - class Analyzer { private: - Reader reader; - Parser parser; - Reporter reporter; - Statistics statistics; + Reader reader_; + Parser parser_; + Reporter reporter_; + Statistics statistics_; public: - std::string analyze(std::string& path); + std::string analyze(std::string path); }; diff --git a/core/common.h b/core/common.h new file mode 100644 index 0000000..ffb7eab --- /dev/null +++ b/core/common.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include +#include + +enum class LogFormat { + Nginx, + Apache, + Unknown +}; + +inline std::string LogFormatToString(LogFormat format) { + switch(format) { + case LogFormat::Apache: return "Apache"; + case LogFormat::Nginx: return "Nginx"; + default: return "Unknow"; + } +} + +struct LogInformation { + std::string ip; + std::string time; + std::string operation; + std::string url; + int status; + size_t answer_size; + bool parse_success; + LogFormat format; +}; + +struct AnalysisResult { + std::unordered_map error_counts; + std::unordered_map ip; + std::unordered_map time_distribution; + std::unordered_map format_counts; + int total_processed = 0; + int failed_parses = 0; + int successful_requests = 0; + int error_requests = 0; +}; \ No newline at end of file diff --git a/core/parser.cpp b/core/parser.cpp index 66a9d86..02d28f5 100644 --- a/core/parser.cpp +++ b/core/parser.cpp @@ -1,12 +1,20 @@ -#include "analyzer.h" +#include "parser.h" #include +LogFormat Parser::DetectFormat(const std::string& line) { + if (line.find("HTTP/1.0") != std::string::npos) { + return LogFormat::Nginx; -LogInformation Parser::Parse(const std::string& line) { - LogInformation data; - std::smatch matches; + } else if (line.find("HTTP/1.1") != std::string::npos) { + return LogFormat::Apache; + } - if (std::regex_match(line, matches, nginx_pattern)) { + return LogFormat::Unknown; +} + +void Parser::RegexParse(const std::string& line, const std::regex& pattern, LogInformation& data) { + std::smatch matches; + if (std::regex_match(line, matches, pattern)) { data.ip = matches[1]; data.time = matches[2]; data.operation = matches[3]; @@ -15,10 +23,26 @@ LogInformation Parser::Parse(const std::string& line) { data.answer_size = std::stoul(matches[6]); data.parse_success = true; - return data; + return; } data.parse_success = false; +} + +LogInformation Parser::Parse(const std::string& line) { + LogInformation data; + data.format = DetectFormat(line); + + if (data.format == LogFormat::Apache) { + RegexParse(line, apache_pattern_, data); + } + else if (data.format == LogFormat::Nginx) { + RegexParse(line, nginx_pattern_, data); + } + else { + data.parse_success = false; + } + return data; } \ No newline at end of file diff --git a/core/parser.h b/core/parser.h new file mode 100644 index 0000000..8691989 --- /dev/null +++ b/core/parser.h @@ -0,0 +1,20 @@ +#pragma once +#include "common.h" + +#include + +class Parser { +private: + std::regex nginx_pattern_; + std::regex apache_pattern_; +public: + Parser() { + nginx_pattern_ = R"(^(\d+\.\d+\.\d+\.\d+) - - \[([^\]]+)\] \"(\w+) (.*?) HTTP/1.0" (\d+) (\d+))"; + apache_pattern_ = R"(^(\d+\.\d+\.\d+\.\d+) - - \[([^\]]+)\] \"(\w+) (.*?) HTTP/1.1" (\d+) (\d+))"; + } + + LogFormat DetectFormat(const std::string& line); + LogInformation Parse(const std::string& line); + void RegexParse(const std::string& line, const std::regex& pattern, LogInformation& data); + +}; \ No newline at end of file diff --git a/core/reader.cpp b/core/reader.cpp index f3a078f..dd014b0 100644 --- a/core/reader.cpp +++ b/core/reader.cpp @@ -1,21 +1,21 @@ -#include "analyzer.h" +#include "reader.h" bool Reader::OpenFile(const std::string& path) { - file = std::make_unique(path); - return file->is_open(); + file_ = std::make_unique(path); + return file_->is_open(); } std::string Reader::ReadLine() { std::string line = ""; - std::getline(*file, line); + std::getline(*file_, line); return line; } bool Reader::MoreLines() { - return file && file->peek() != EOF; + return file_ && file_->peek() != EOF; } void Reader::CloseFile() { - file.reset(); + file_.reset(); } diff --git a/core/reader.h b/core/reader.h new file mode 100644 index 0000000..2fe3cb8 --- /dev/null +++ b/core/reader.h @@ -0,0 +1,19 @@ +#pragma once +#include "common.h" + +#include +#include + +class Reader { +private: + std::unique_ptr file_; + +public: + Reader() = default; + ~Reader() = default; + + bool OpenFile(const std::string& path); + std::string ReadLine(); + bool MoreLines(); + void CloseFile(); +}; \ No newline at end of file diff --git a/core/reporter.cpp b/core/reporter.cpp index 93c0698..7361ee4 100644 --- a/core/reporter.cpp +++ b/core/reporter.cpp @@ -1,4 +1,4 @@ -#include "analyzer.h" +#include "reporter.h" std::string Reporter::GenerateGeneralStats(const AnalysisResult& data) { std::string stats = "\033[34m"; @@ -13,6 +13,17 @@ std::string Reporter::GenerateGeneralStats(const AnalysisResult& data) { return stats; } +std::string Reporter::GenerateFormat(const AnalysisResult& data) { + std::string stats = "\033[31m"; + stats += "Formats lines\n\n"; + stats += "\033[0m"; + + for(const auto& format : data.format_counts) { + stats += LogFormatToString(format.first) + " - " + std::to_string(format.second) + "\n"; + } + return stats + "\n"; +} + std::vector> Reporter::ToSortedVector(const std::unordered_map& mp) { std::vector> sorted(mp.begin(), mp.end()); @@ -42,7 +53,7 @@ std::string Reporter::GenerateErrorTypes(const AnalysisResult& data) { std::vector> sorted = ToSortedVector(data.error_counts); std::string types = "\033[32m"; - types += "Types errors IP\n\n"; + types += "Types errors\n\n"; types += "\033[0m"; for(int i = 0; i < sorted.size() && sorted[i].second != 0; ++i) { @@ -73,7 +84,8 @@ std::string Reporter::GenerateTimeDistribution(const AnalysisResult& data) { std::string Reporter::GenerateTextReport(const AnalysisResult& data) { if (data.total_processed > 0) { - std::string report = GenerateGeneralStats(data) ; + std::string report = GenerateGeneralStats(data); + report += GenerateFormat(data); report += GenerateIpErrorsTop(data); report += GenerateErrorTypes(data); report += GenerateTimeDistribution(data); diff --git a/core/reporter.h b/core/reporter.h new file mode 100644 index 0000000..256a6f1 --- /dev/null +++ b/core/reporter.h @@ -0,0 +1,16 @@ +#pragma once +#include "common.h" + +#include + +class Reporter { +private: + std::string GenerateFormat(const AnalysisResult& data); + std::string GenerateGeneralStats(const AnalysisResult& data); + std::string GenerateIpErrorsTop(const AnalysisResult& data); + std::string GenerateErrorTypes(const AnalysisResult& data); + std::string GenerateTimeDistribution(const AnalysisResult& data); + std::vector> ToSortedVector(const std::unordered_map& mp); +public: + std::string GenerateTextReport(const AnalysisResult& data); +}; \ No newline at end of file diff --git a/core/statistics.cpp b/core/statistics.cpp index 8b1d971..6f76ca1 100644 --- a/core/statistics.cpp +++ b/core/statistics.cpp @@ -1,4 +1,4 @@ -#include "analyzer.h" +#include "statistics.h" std::string Statistics::ExtractHour(std::string& time) { int pos = time.find(":"); @@ -14,6 +14,7 @@ void Statistics::Process(LogInformation& data, AnalysisResult& res) { } res.total_processed++; + res.format_counts[data.format]++; if (data.status < 400) { res.successful_requests++; diff --git a/core/statistics.h b/core/statistics.h new file mode 100644 index 0000000..6319ddd --- /dev/null +++ b/core/statistics.h @@ -0,0 +1,9 @@ +#pragma once +#include "common.h" + +class Statistics { +public: + void Process(LogInformation& data, AnalysisResult& res); + std::string ExtractHour(std::string& time); + +}; \ No newline at end of file diff --git a/run.sh b/run.sh new file mode 100644 index 0000000..6b5590b --- /dev/null +++ b/run.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +BUILD_DIR="build" +TARGET="log-analyzer-cli" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +print_info() { + echo -e "${BLUE} $1${NC}" +} + +print_success() { + echo -e "${GREEN} $1${NC}" +} + +print_warning() { + echo -e "${YELLOW} $1${NC}" +} + +print_error() { + echo -e "${RED} $1${NC}" +} + +build_project() { + print_info "Building Log Analyzer..." + mkdir -p $BUILD_DIR + cd $BUILD_DIR + cmake -DCMAKE_BUILD_TYPE=Release .. + make -j$(nproc) + cd .. + print_success "Build complete!" +} + +run_tests() { + print_info "Running tests..." + if [ ! -d "$BUILD_DIR" ]; then + build_project + fi + cd build/tests + ./test-runner + print_success "All tests passed!" +} + +clean_project() { + print_warning "Cleaning build directory..." + rm -rf $BUILD_DIR + print_success "Clean complete!" +} + +run_all() { + print_info "Starting full pipeline..." + clean_project + build_project + run_tests +} + +case "$1" in + "build") + build_project + ;; + "test") + run_tests + ;; + "clean") + clean_project + ;; + "all") + run_all + ;; + *) + print_error "Unknown command: $1" + exit 1 + ;; +esac \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d605f0b..c046de1 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,3 +1,6 @@ +file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/test_data + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) + add_executable(test-runner test_reader.cpp test_parser.cpp @@ -6,7 +9,12 @@ add_executable(test-runner test_analyzer.cpp ) -file(COPY ${CMAKE_SOURCE_DIR}/tests/test_data DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) -target_link_libraries(test-runner core_lib GTest::GTest GTest::Main) +target_link_libraries( + test-runner + core_lib + gtest + gtest_main +) + +add_custom_target(test DEPENDS test-runner COMMAND ./test-runner) -add_custom_target(test COMMAND ./test-runner DEPENDS test-runner) \ No newline at end of file diff --git a/tests/test_analyzer.cpp b/tests/test_analyzer.cpp index 51c993c..528d7b0 100644 --- a/tests/test_analyzer.cpp +++ b/tests/test_analyzer.cpp @@ -2,8 +2,8 @@ TEST(AnalyzerTest, StandardTest) { Analyzer analyzer; - std::string path = "tests/test_data/normal_test.log"; - std::string report = analyzer.analyze(path); + fs::path path = fs::path("test_data") / "normal_test.log"; + std::string report = analyzer.analyze(path.string()); EXPECT_TRUE(report.find("Sucсessful - 5") != std::string::npos); EXPECT_TRUE(report.find("Failed Parsing - 8") != std::string::npos); @@ -15,8 +15,8 @@ TEST(AnalyzerTest, StandardTest) { TEST(AnalyzerTest, EmptyTest) { Analyzer analyzer; - std::string path = "tests/test_data/empty_test.log"; - std::string report = analyzer.analyze(path); + fs::path path = fs::path("test_data") / "empty_test.log"; + std::string report = analyzer.analyze(path.string()); EXPECT_TRUE(report == "invalid file format or file empty"); } \ No newline at end of file diff --git a/tests/test_reader.cpp b/tests/test_reader.cpp index cafac5b..2db685b 100644 --- a/tests/test_reader.cpp +++ b/tests/test_reader.cpp @@ -1,9 +1,12 @@ #include "tests.h" +#include +#include + TEST(ReaderTest, SimpleTest) { Reader reader; - std::string testFile = "tests/test_data/test_simple.txt"; - EXPECT_TRUE(reader.OpenFile(testFile)); + fs::path test_path = fs::path("test_data") / "test_simple.txt"; + EXPECT_TRUE(reader.OpenFile(test_path.string())); EXPECT_TRUE(reader.MoreLines()); reader.CloseFile(); } @@ -11,22 +14,22 @@ TEST(ReaderTest, SimpleTest) { TEST(ReaderTest, EmptyTest) { Reader reader; - std::string testFile = "tests/test_data/test_empty.txt"; - EXPECT_TRUE(reader.OpenFile(testFile)); + fs::path test_path = fs::path("test_data") / "test_empty.txt"; + EXPECT_TRUE(reader.OpenFile(test_path.string())); EXPECT_FALSE(reader.MoreLines()); reader.CloseFile(); } TEST(ReaderTest, SingleTest) { Reader reader; - std::string testFile = "tests/test_data/test_single_line.txt"; - EXPECT_TRUE(reader.OpenFile(testFile)); + fs::path test_path = fs::path("test_data") / "test_single_line.txt"; + EXPECT_TRUE(reader.OpenFile(test_path.string())); EXPECT_TRUE(reader.MoreLines()); reader.CloseFile(); } TEST(ReaderTest, ImaginaryTest) { Reader reader; - std::string testFile = "Imaginary.txt"; - EXPECT_FALSE(reader.OpenFile(testFile)); + fs::path test_path = fs::path("test_data") / "Imaginary.txt"; + EXPECT_FALSE(reader.OpenFile(test_path.string())); } \ No newline at end of file diff --git a/tests/tests.h b/tests/tests.h index 7cd6896..a591ed3 100644 --- a/tests/tests.h +++ b/tests/tests.h @@ -1,4 +1,13 @@ #pragma once +#include "../core/reader.h" +#include "../core/parser.h" +#include "../core/statistics.h" +#include "../core/reporter.h" #include "../core/analyzer.h" + +#include + +namespace fs = std::filesystem; + #include