diff --git a/.gitattributes b/.gitattributes
index 87e654b..a98b071 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +1 @@
-*.csv filter=lfs diff=lfs merge=lfs -text
+input-data/*.csv filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9d0e170
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,46 @@
+.gradle
+build/
+!gradle/wrapper/gradle-wrapper.jar
+!**/src/main/**/build/
+!**/src/test/**/build/
+.kotlin
+
+# Data csv files
+input-data/
+
+# Hadoop internals
+hadoop/data
+
+### IntelliJ IDEA ###
+.idea
+*.iws
+*.iml
+*.ipr
+out/
+!**/src/main/**/out/
+!**/src/test/**/out/
+
+### Eclipse ###
+.apt_generated
+.classpath
+.factorypath
+.project
+.settings
+.springBeans
+.sts4-cache
+bin/
+!**/src/main/**/bin/
+!**/src/test/**/bin/
+
+### NetBeans ###
+/nbproject/private/
+/nbbuild/
+/dist/
+/nbdist/
+/.nb-gradle/
+
+### VS Code ###
+.vscode/
+
+### Mac OS ###
+.DS_Store
\ No newline at end of file
diff --git a/.lfsconfig b/.lfsconfig
new file mode 100644
index 0000000..e45cd60
--- /dev/null
+++ b/.lfsconfig
@@ -0,0 +1,2 @@
+[lfs]
+    url = https://github.com/AdvancedJavaLabs/lab4-parallel.git/info/lfs
\ No newline at end of file
diff --git a/README.md b/README.md
index 948dc85..84b20f4 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,4 @@
+[![Review Assignment Due Date](https://classroom.github.com/assets/deadline-readme-button-22041afd0340ce965d47ae6ef1cefeee28c7c493a6346c4f15d667ab976d596c.svg)](https://classroom.github.com/a/uyodabcP)
 ## Лабораторная работа: Реализация MapReduce для анализа данных о продажах с ипользованием HADOOP!!!
 # Цель работы
 
diff --git a/build.gradle.kts b/build.gradle.kts
new file mode 100644
index 0000000..79eea39
--- /dev/null
+++ b/build.gradle.kts
@@ -0,0 +1,34 @@
+plugins {
+    id("java")
+}
+
+group = "com.yu_ufimtsev.itmo.sales_data_analysis"
+version = "1.0-SNAPSHOT"
+
+java {
+    toolchain {
+        languageVersion.set(JavaLanguageVersion.of(8))
+    }
+}
+
+repositories {
+    mavenCentral()
+}
+
+dependencies {
+    implementation("org.apache.hadoop:hadoop-common:3.2.1")
+    implementation("org.apache.hadoop:hadoop-mapreduce-client-core:3.2.1")
+    implementation("org.apache.hadoop:hadoop-mapreduce-client-jobclient:3.2.1")
+}
+
+tasks.jar {
+    archiveBaseName.set("sales-analyzer-job")
+    archiveVersion.set("")
+    archiveClassifier.set("")
+
+    manifest {
+        attributes(
+            "Main-Class" to "com.yu_ufimtsev.itmo.sales_data_analysis.SalesPipelineJob"
+        )
+    }
+}
diff --git a/figures/seconds_by_reducers_figure.png b/figures/seconds_by_reducers_figure.png
new file mode 100644
index 0000000..b8916aa
Binary files /dev/null and b/figures/seconds_by_reducers_figure.png differ
diff --git a/figures/seconds_by_splitting_figure.png b/figures/seconds_by_splitting_figure.png
new file mode 100644
index 0000000..d5f8757
Binary files /dev/null and b/figures/seconds_by_splitting_figure.png differ
diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 0000000..249e583
Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ
diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000..5c82cb0
--- /dev/null
+++ b/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,5 @@
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.14-bin.zip
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
diff --git a/gradlew b/gradlew
new file mode 100644
index 0000000..1b6c787
--- /dev/null
+++ b/gradlew
@@ -0,0 +1,234 @@
+#!/bin/sh
+
+#
+# Copyright © 2015-2021 the original authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+#
+#   Gradle start up script for POSIX generated by Gradle.
+#
+#   Important for running:
+#
+#   (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
+#       noncompliant, but you have some other compliant shell such as ksh or
+#       bash, then to run this script, type that shell name before the whole
+#       command line, like:
+#
+#           ksh Gradle
+#
+#       Busybox and similar reduced shells will NOT work, because this script
+#       requires all of these POSIX shell features:
+#         * functions;
+#         * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
+#           «${var#prefix}», «${var%suffix}», and «$( cmd )»;
+#         * compound commands having a testable exit status, especially «case»;
+#         * various built-in commands including «command», «set», and «ulimit».
+#
+#   Important for patching:
+#
+#   (2) This script targets any POSIX shell, so it avoids extensions provided
+#       by Bash, Ksh, etc; in particular arrays are avoided.
+#
+#       The "traditional" practice of packing multiple parameters into a
+#       space-separated string is a well documented source of bugs and security
+#       problems, so this is (mostly) avoided, by progressively accumulating
+#       options in "$@", and eventually passing that to Java.
+#
+#       Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
+#       and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
+#       see the in-line comments for details.
+#
+#       There are tweaks for specific operating systems such as AIX, CygWin,
+#       Darwin, MinGW, and NonStop.
+#
+#   (3) This script is generated from the Groovy template
+#       https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
+#       within the Gradle project.
+#
+#       You can find Gradle at https://github.com/gradle/gradle/.
+#
+##############################################################################
+
+# Attempt to set APP_HOME
+
+# Resolve links: $0 may be a link
+app_path=$0
+
+# Need this for daisy-chained symlinks.
+while
+    APP_HOME=${app_path%"${app_path##*/}"}  # leaves a trailing /; empty if no leading path
+    [ -h "$app_path" ]
+do
+    ls=$( ls -ld "$app_path" )
+    link=${ls#*' -> '}
+    case $link in             #(
+      /*)   app_path=$link ;; #(
+      *)    app_path=$APP_HOME$link ;;
+    esac
+done
+
+APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
+
+APP_NAME="Gradle"
+APP_BASE_NAME=${0##*/}
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD=maximum
+
+warn () {
+    echo "$*"
+} >&2
+
+die () {
+    echo
+    echo "$*"
+    echo
+    exit 1
+} >&2
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "$( uname )" in                #(
+  CYGWIN* )         cygwin=true  ;; #(
+  Darwin* )         darwin=true  ;; #(
+  MSYS* | MINGW* )  msys=true    ;; #(
+  NONSTOP* )        nonstop=true ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+        # IBM's JDK on AIX uses strange locations for the executables
+        JAVACMD=$JAVA_HOME/jre/sh/java
+    else
+        JAVACMD=$JAVA_HOME/bin/java
+    fi
+    if [ ! -x "$JAVACMD" ] ; then
+        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+else
+    JAVACMD=java
+    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
+    case $MAX_FD in #(
+      max*)
+        MAX_FD=$( ulimit -H -n ) ||
+            warn "Could not query maximum file descriptor limit"
+    esac
+    case $MAX_FD in  #(
+      '' | soft) :;; #(
+      *)
+        ulimit -n "$MAX_FD" ||
+            warn "Could not set maximum file descriptor limit to $MAX_FD"
+    esac
+fi
+
+# Collect all arguments for the java command, stacking in reverse order:
+#   * args from the command line
+#   * the main class name
+#   * -classpath
+#   * -D...appname settings
+#   * --module-path (only if needed)
+#   * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if "$cygwin" || "$msys" ; then
+    APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
+    CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
+
+    JAVACMD=$( cygpath --unix "$JAVACMD" )
+
+    # Now convert the arguments - kludge to limit ourselves to /bin/sh
+    for arg do
+        if
+            case $arg in                                #(
+              -*)   false ;;                            # don't mess with options #(
+              /?*)  t=${arg#/} t=/${t%%/*}              # looks like a POSIX filepath
+                    [ -e "$t" ] ;;                      #(
+              *)    false ;;
+            esac
+        then
+            arg=$( cygpath --path --ignore --mixed "$arg" )
+        fi
+        # Roll the args list around exactly as many times as the number of
+        # args, so each arg winds up back in the position where it started, but
+        # possibly modified.
+        #
+        # NB: a `for` loop captures its iteration list before it begins, so
+        # changing the positional parameters here affects neither the number of
+        # iterations, nor the values presented in `arg`.
+        shift                   # remove old arg
+        set -- "$@" "$arg"      # push replacement arg
+    done
+fi
+
+# Collect all arguments for the java command;
+#   * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
+#     shell script including quotes and variable substitutions, so put them in
+#     double quotes to make sure that they get re-expanded; and
+#   * put everything else in single quotes, so that it's not re-expanded.
+
+set -- \
+        "-Dorg.gradle.appname=$APP_BASE_NAME" \
+        -classpath "$CLASSPATH" \
+        org.gradle.wrapper.GradleWrapperMain \
+        "$@"
+
+# Use "xargs" to parse quoted args.
+#
+# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
+#
+# In Bash we could simply go:
+#
+#   readarray ARGS < <( xargs -n1 <<<"$var" ) &&
+#   set -- "${ARGS[@]}" "$@"
+#
+# but POSIX shell has neither arrays nor command substitution, so instead we
+# post-process each arg (as a line of input to sed) to backslash-escape any
+# character that might be a shell metacharacter, then use eval to reverse
+# that process (while maintaining the separation between arguments), and wrap
+# the whole thing up as a single "set" statement.
+#
+# This will of course break if any of these variables contains a newline or
+# an unmatched quote.
+#
+
+eval "set -- $(
+        printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
+        xargs -n1 |
+        sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
+        tr '\n' ' '
+    )" '"$@"'
+
+exec "$JAVACMD" "$@"
diff --git a/gradlew.bat b/gradlew.bat
new file mode 100644
index 0000000..107acd3
--- /dev/null
+++ b/gradlew.bat
@@ -0,0 +1,89 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem      https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem  Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/hadoop/docker-compose.yml b/hadoop/docker-compose.yml
new file mode 100644
index 0000000..a85b382
--- /dev/null
+++ b/hadoop/docker-compose.yml
@@ -0,0 +1,75 @@
+services:
+  namenode:
+    image: bde2020/hadoop-namenode:2.0.0-hadoop3.2.1-java8
+    container_name: namenode
+    restart: always
+    ports:
+      - "9000:9000"
+      - "9870:9870"
+    volumes:
+      - ./../input-data:/opt/hadoop/jobs
+      - ./data/namenode:/hadoop/dfs/name
+    environment:
+      - CLUSTER_NAME=my-hadoop-cluster
+    env_file:
+      - ./hadoop.env
+
+  datanode:
+    image: bde2020/hadoop-datanode:2.0.0-hadoop3.2.1-java8
+    container_name: datanode
+    restart: always
+    volumes:
+      - ./data/datanode:/hadoop/dfs/data
+    environment:
+      SERVICE_PRECONDITION: "namenode:9870"
+    env_file:
+      - ./hadoop.env
+    depends_on:
+      - namenode
+
+  resourcemanager:
+    image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.2.1-java8
+    container_name: resourcemanager
+    restart: always
+    volumes:
+      - ./../jobs:/opt/hadoop/jobs
+      - ./../reports:/opt/hadoop/reports
+    ports:
+      - "8088:8088"
+    environment:
+      SERVICE_PRECONDITION: "namenode:9000 namenode:9870 datanode:9864"
+    env_file:
+      - ./hadoop.env
+    depends_on:
+      - namenode
+      - datanode
+
+  nodemanager:
+    image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.2.1-java8
+    container_name: nodemanager
+    restart: always
+    ports:
+      - "8042:8042"
+    environment:
+      SERVICE_PRECONDITION: "namenode:9000 namenode:9870 datanode:9864 resourcemanager:8088"
+    env_file:
+      - ./hadoop.env
+    depends_on:
+      - resourcemanager
+
+  historyserver:
+    image: bde2020/hadoop-historyserver:2.0.0-hadoop3.2.1-java8
+    container_name: historyserver
+    restart: always
+    ports:
+      - "8188:8188"
+    environment:
+      SERVICE_PRECONDITION: "namenode:9000 namenode:9870 datanode:9864 resourcemanager:8088"
+    volumes:
+      - ./data/history:/hadoop/yarn/timeline
+    env_file:
+      - ./hadoop.env
+    depends_on:
+      - namenode
+      - datanode
+      - resourcemanager
\ No newline at end of file
diff --git a/hadoop/hadoop.env b/hadoop/hadoop.env
new file mode 100644
index 0000000..f8034e4
--- /dev/null
+++ b/hadoop/hadoop.env
@@ -0,0 +1,10 @@
+CORE_CONF_fs_defaultFS=hdfs://namenode:9000
+HDFS_CONF_dfs_replication=1
+
+YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
+YARN_CONF_yarn_nodemanager_aux___services=mapreduce_shuffle
+
+MAPRED_CONF_mapreduce_framework_name=yarn
+MAPRED_CONF_yarn_app_mapreduce_am_env=HADOOP_MAPRED_HOME=/opt/hadoop-3.2.1/
+MAPRED_CONF_mapreduce_map_env=HADOOP_MAPRED_HOME=/opt/hadoop-3.2.1/
+MAPRED_CONF_mapreduce_reduce_env=HADOOP_MAPRED_HOME=/opt/hadoop-3.2.1/
\ No newline at end of file
diff --git a/0.csv b/input-data/0.csv
similarity index 100%
rename from 0.csv
rename to input-data/0.csv
diff --git a/1.csv b/input-data/1.csv
similarity index 100%
rename from 1.csv
rename to input-data/1.csv
diff --git a/2.csv b/input-data/2.csv
similarity index 100%
rename from 2.csv
rename to input-data/2.csv
diff --git a/3.csv b/input-data/3.csv
similarity index 100%
rename from 3.csv
rename to input-data/3.csv
diff --git a/4.csv b/input-data/4.csv
similarity index 100%
rename from 4.csv
rename to input-data/4.csv
diff --git a/5.csv b/input-data/5.csv
similarity index 100%
rename from 5.csv
rename to input-data/5.csv
diff --git a/6.csv b/input-data/6.csv
similarity index 100%
rename from 6.csv
rename to input-data/6.csv
diff --git a/7.csv b/input-data/7.csv
similarity index 100%
rename from 7.csv
rename to input-data/7.csv
diff --git a/reports/results.csv b/reports/results.csv
new file mode 100644
index 0000000..314cf21
--- /dev/null
+++ b/reports/results.csv
@@ -0,0 +1,48 @@
+splitMB,reducersCount,aggregationMs,sortingMs,totalMs,timestamp
+1,1,160695,18419,179114,2025-12-08T14:27:50.125
+1,2,141069,19424,160493,2025-12-08T14:30:51.350
+1,4,158142,18442,176584,2025-12-08T14:34:09.211
+1,8,159976,20819,180795,2025-12-08T14:37:29.671
+1,16,170142,26454,196596,2025-12-08T14:41:07.208
+
+2,1,84894,18507,103401,2025-12-08T14:43:11.058
+2,2,94107,19498,113605,2025-12-08T14:45:26.429
+2,4,94743,18494,113237,2025-12-08T14:47:40.043
+2,8,94850,22470,117320,2025-12-08T14:49:58.593
+2,16,100814,29557,130371,2025-12-08T14:52:29.284
+
+4,1,52675,20513,73188,2025-12-08T14:54:04.802
+4,2,56703,19529,76232,2025-12-08T14:55:43.205
+4,4,55538,19418,74956,2025-12-08T14:57:19.289
+4,8,57566,21692,79258,2025-12-08T14:58:59.552
+4,16,61524,27629,89153,2025-12-08T15:00:50.737
+
+8,1,33411,18700,52111,2025-12-08T15:02:02.939
+8,2,33741,19423,53164,2025-12-08T15:03:15.939
+8,4,35330,19415,54745,2025-12-08T15:04:30.353
+8,8,39364,21493,60857,2025-12-08T15:05:50.757
+8,16,43846,28497,72343,2025-12-08T15:07:23.098
+
+16,1,26251,18418,44669,2025-12-08T15:08:27.459
+16,2,25265,18646,43911,2025-12-08T15:09:30.894
+16,4,27269,19431,46700,2025-12-08T15:10:37.068
+16,8,29336,23506,52842,2025-12-08T15:11:49.378
+16,16,36340,26477,62817,2025-12-08T15:13:12.353
+
+32,1,23254,19026,42280,2025-12-08T15:14:14.512
+32,2,22223,18424,40647,2025-12-08T15:15:15.050
+32,4,23237,20431,43668,2025-12-08T15:16:18.604
+32,8,28372,21497,49869,2025-12-08T15:17:29.782
+32,16,31777,26562,58339,2025-12-08T15:18:47.912
+
+64,1,23269,19449,42718,2025-12-08T15:19:50.265
+64,2,21181,19421,40602,2025-12-08T15:20:50.422
+64,4,22258,19442,41700,2025-12-08T15:21:51.863
+64,8,25326,21460,46786,2025-12-08T15:22:58.374
+64,16,29311,26512,55823,2025-12-08T15:24:13.962
+
+140,1,22192,19623,41815,2025-12-08T15:25:15.858
+140,2,22231,19443,41674,2025-12-08T15:26:17.224
+140,4,25216,19439,44655,2025-12-08T15:27:21.432
+140,8,27327,21675,49002,2025-12-08T15:28:30.158
+140,16,30395,28575,58970,2025-12-08T15:29:49.271
diff --git a/result.txt b/result.txt
new file mode 100644
index 0000000..a3e2f09
--- /dev/null
+++ b/result.txt
@@ -0,0 +1,21 @@
+Category             Revenue         Quantity  
+clothing             4560302171.99   911487    
+video games          4560108307.50   913326    
+baby products        4541435362.25   907186    
+beauty products      4533874327.85   906417    
+gardening tools      4531880837.74   905841    
+automotive           4529861310.74   904962    
+music instruments    4512294466.14   902389    
+furniture            4503986763.16   900244    
+electronics          4497526631.04   903266    
+pet supplies         4488741730.38   896724    
+stationery           4481794912.39   898265    
+home appliances      4473888361.73   895815    
+sports equipment     4469387812.34   894287    
+groceries            4466915230.97   895470    
+footwear             4465574983.36   894424    
+jewelry              4463823670.79   893980    
+office equipment     4463564947.38   892370    
+toys                 4462453654.12   892741    
+books                4457620825.95   890948    
+health & wellness    4454082892.49   890475    
diff --git a/run_experiments.sh b/run_experiments.sh
new file mode 100644
index 0000000..e71954e
--- /dev/null
+++ b/run_experiments.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+# Всего данных ~ 132 MB
+
+# actual splits numbers = 66,
+SPLITS_MB=(1 2 4 8 16 32 64 140)
+REDUCERS_COUNT=(1 2 4 8 16)
+
+for s in "${SPLITS_MB[@]}"; do
+  for r in "${REDUCERS_COUNT[@]}"; do
+    echo "=== Running split=$s reducers=$r ==="
+    ./run_pipeline.sh $s $r
+    echo ""
+  done
+done
+
+echo "Experiments completed. View report in ./reports/results.csv"
diff --git a/run_pipeline.sh b/run_pipeline.sh
new file mode 100644
index 0000000..ad1d3ab
--- /dev/null
+++ b/run_pipeline.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# Чтобы предотвратить path rewriting от Git Bash
+export MSYS_NO_PATHCONV=1
+
+# === Параметры для главной job-ы ===
+SPLIT_MB=${1:-10}
+REDUCERS_COUNT=${2:-10}
+
+HDFS_INPUT_DIR="/input"
+HDFS_INTERMEDIATE_DIR="/tmp/pipeline-temp"
+HDFS_OUTPUT_DIR="/output"
+
+echo "=== Starting Hadoop containers ==="
+docker compose -f ./hadoop/docker-compose.yml up -d
+
+echo "=== Waiting for HDFS to leave safemode ==="
+docker exec namenode hdfs dfsadmin -safemode wait
+
+echo "=== Copying input CSV files to HDFS ==="
+if docker exec namenode hdfs dfs -test -d /input; then
+    echo "HDFS directory /input already exists"
+else
+    echo "Creating HDFS directory /input and put csv files"
+    docker exec namenode hdfs dfs -mkdir /input
+    docker exec namenode hdfs dfs -put -f /opt/hadoop/jobs/*.csv /input
+fi
+
+echo "=== HDFS input csv files ==="
+docker exec namenode hdfs dfs -ls /input
+
+echo "=== Building JAR ==="
+./gradlew clean jar
+
+echo "=== Copy JAR to jobs folder ==="
+cp build/libs/sales-analyzer-job.jar ./jobs/sales-analyzer-job.jar
+
+echo "=== Cleaning previous HDFS directories ==="
+docker exec namenode hdfs dfs -rm -r -f "$HDFS_INTERMEDIATE_DIR"
+docker exec namenode hdfs dfs -rm -r -f "$HDFS_OUTPUT_DIR"
+
+echo "=== Running pipeline ==="
+docker exec resourcemanager yarn jar \
+    /opt/hadoop/jobs/sales-analyzer-job.jar \
+      "$HDFS_INPUT_DIR" \
+      "$HDFS_INTERMEDIATE_DIR" \
+      "$HDFS_OUTPUT_DIR" \
+      "$SPLIT_MB" \
+      "$REDUCERS_COUNT"
+
+echo "=== Final output ==="
+docker exec namenode hdfs dfs -cat /output/part-r-00000 > result.txt
+
diff --git a/settings.gradle.kts b/settings.gradle.kts
new file mode 100644
index 0000000..c4c26d5
--- /dev/null
+++ b/settings.gradle.kts
@@ -0,0 +1 @@
+rootProject.name = "lab3-YuriUfimtsev"
\ No newline at end of file
diff --git a/src/main/java/com/yu_ufimtsev/itmo/sales_data_analysis/SalesPipelineJob.java b/src/main/java/com/yu_ufimtsev/itmo/sales_data_analysis/SalesPipelineJob.java
new file mode 100644
index 0000000..512a748
--- /dev/null
+++ b/src/main/java/com/yu_ufimtsev/itmo/sales_data_analysis/SalesPipelineJob.java
@@ -0,0 +1,159 @@
+package com.yu_ufimtsev.itmo.sales_data_analysis;
+
+import com.yu_ufimtsev.itmo.sales_data_analysis.aggregate_stage.CategoryStatsReducer;
+import com.yu_ufimtsev.itmo.sales_data_analysis.aggregate_stage.RevenueCalculationMapper;
+import com.yu_ufimtsev.itmo.sales_data_analysis.sort_stage.DescendingDoubleComparator;
+import com.yu_ufimtsev.itmo.sales_data_analysis.sort_stage.SortByRevenueMapper;
+import com.yu_ufimtsev.itmo.sales_data_analysis.sort_stage.SortedDataToFileReducer;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.fs.Path;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+
+public class SalesPipelineJob {
+
+    private static final String REPORT_CSV_PATH = "/opt/hadoop/reports/results.csv";
+
+    public static void main(String[] args) throws Exception {
+
+        if (args.length != 5) {
+            System.err.println("Please, use format:" +
+                    " SalesPipelineJob <inputDataDir> <intermediateDir> <outputDir> <splitMB> <reducersCount>");
+            System.exit(1);
+        }
+
+        long splitMb = Long.parseLong(args[3]);
+        long splitSizeInBytes = splitMb * 1024 * 1024;
+        int reducersCount = Integer.parseInt(args[4]);
+
+        String inputPath = args[0];
+        String intermediatePath = args[1];
+        String outputPath = args[2];
+
+        Configuration configuration = new Configuration();
+
+        long aggregationJobDuration = prepareAndExecuteAggregationSalesJob(
+                configuration, splitSizeInBytes, reducersCount, inputPath, intermediatePath
+        );
+        System.out.println("--------------------------------------------------");
+        System.out.printf("Sales aggregation job time: %d ms\n", aggregationJobDuration);
+        System.out.println("--------------------------------------------------");
+
+        long sortingJobDuration = prepareAndExecuteSortingCategoriesJob(
+                configuration, intermediatePath, outputPath
+        );
+        System.out.println("--------------------------------------------------");
+        System.out.printf("Categories sorting job time: %d ms\n", sortingJobDuration);
+        System.out.println("--------------------------------------------------");
+
+        long totalTime = aggregationJobDuration + sortingJobDuration;
+        System.out.println("--------------------------------------------------");
+        System.out.printf("Total jobs time: %d ms\n", totalTime);
+        System.out.println("--------------------------------------------------");
+
+        appendResultsToCsv(splitMb, reducersCount, aggregationJobDuration, sortingJobDuration, totalTime);
+    }
+
+    private static long prepareAndExecuteAggregationSalesJob(
+            Configuration configuration,
+            long splitSize, int reducersCount, String inputPath, String intermediatePath) throws Exception {
+
+        Job aggregationJob = Job.getInstance(configuration);
+        aggregationJob.setJobName("sales-aggregation");
+
+        aggregationJob.setJarByClass(SalesPipelineJob.class);
+        aggregationJob.setMapperClass(RevenueCalculationMapper.class);
+        aggregationJob.setReducerClass(CategoryStatsReducer.class);
+
+        aggregationJob.setMapOutputKeyClass(Text.class);
+        aggregationJob.setMapOutputValueClass(Text.class);
+        aggregationJob.setOutputKeyClass(Text.class);
+        aggregationJob.setOutputValueClass(Text.class);
+
+        FileInputFormat.setMaxInputSplitSize(aggregationJob, splitSize);
+        aggregationJob.setNumReduceTasks(reducersCount);
+
+        FileInputFormat.addInputPath(aggregationJob, new Path(inputPath));
+        FileOutputFormat.setOutputPath(aggregationJob, new Path(intermediatePath));
+
+        long startAggregationJob = System.currentTimeMillis();
+        boolean isAggregationJobSuccessful = aggregationJob.waitForCompletion(true);
+        long endAggregationJob = System.currentTimeMillis();
+        long aggregationJobDuration = endAggregationJob - startAggregationJob;
+
+        if (!isAggregationJobSuccessful) {
+            System.err.println("Aggregation job failed");
+            System.exit(1);
+        }
+
+        return aggregationJobDuration;
+    }
+
+    private static long prepareAndExecuteSortingCategoriesJob(
+            Configuration configuration, String intermediatePath, String outputPath) throws Exception {
+
+        Job sortingCategoriesJob = Job.getInstance(configuration, "sorting-categories");
+        sortingCategoriesJob.setJobName("sorting-categories");
+
+        sortingCategoriesJob.setJarByClass(SalesPipelineJob.class);
+        sortingCategoriesJob.setMapperClass(SortByRevenueMapper.class);
+        sortingCategoriesJob.setReducerClass(SortedDataToFileReducer.class);
+
+        sortingCategoriesJob.setMapOutputKeyClass(DoubleWritable.class);
+        sortingCategoriesJob.setMapOutputValueClass(Text.class);
+        sortingCategoriesJob.setOutputKeyClass(Text.class);
+        sortingCategoriesJob.setOutputValueClass(Text.class);
+
+        sortingCategoriesJob.setSortComparatorClass(DescendingDoubleComparator.class);
+        sortingCategoriesJob.setNumReduceTasks(1);
+
+        FileInputFormat.addInputPath(sortingCategoriesJob, new Path(intermediatePath));
+        FileOutputFormat.setOutputPath(sortingCategoriesJob, new Path(outputPath));
+
+        long startSortingJob = System.currentTimeMillis();
+        boolean isSortingJobSuccessful = sortingCategoriesJob.waitForCompletion(true);
+        long endSortingJob = System.currentTimeMillis();
+        long sortingJobDuration = endSortingJob - startSortingJob;
+
+        if (!isSortingJobSuccessful) {
+            System.err.println("Sorting job failed");
+            System.exit(1);
+        }
+
+        return sortingJobDuration;
+    }
+
+    private static void appendResultsToCsv(long splitMB, long reducersCount, long aggregationMs,
+                                           long sortingMs, long totalPipelineMs) {
+
+        try {
+            File reportFile = new File(REPORT_CSV_PATH);
+            boolean isReportFileNew = !reportFile.exists();
+
+            try (FileWriter fileWriter = new FileWriter(reportFile, true)) {
+                if (isReportFileNew) {
+                    fileWriter.write("splitMB,reducersCount,aggregationMs,sortingMs,totalMs,timestamp\n");
+                }
+
+                fileWriter.write(String.format(
+                        "%d,%d,%d,%d,%d,%s\n",
+                        splitMB, reducersCount,
+                        aggregationMs, sortingMs, totalPipelineMs,
+                        java.time.LocalDateTime.now()
+                ));
+            }
+            System.out.println("CSV report successfully updated");
+
+        } catch (IOException e) {
+            System.err.println("Failed writing to CSV report: " + e.getMessage());
+        }
+    }
+}
diff --git a/src/main/java/com/yu_ufimtsev/itmo/sales_data_analysis/aggregate_stage/CategoryStatsReducer.java b/src/main/java/com/yu_ufimtsev/itmo/sales_data_analysis/aggregate_stage/CategoryStatsReducer.java
new file mode 100644
index 0000000..6102797
--- /dev/null
+++ b/src/main/java/com/yu_ufimtsev/itmo/sales_data_analysis/aggregate_stage/CategoryStatsReducer.java
@@ -0,0 +1,25 @@
+package com.yu_ufimtsev.itmo.sales_data_analysis.aggregate_stage;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Reducer;
+
+import java.io.IOException;
+
+public class CategoryStatsReducer extends Reducer<Text,Text,Text, Text> {
+
+    @Override
+    protected void reduce(Text key, Iterable<Text> values, Context context)
+            throws IOException, InterruptedException {
+
+        double totalRevenue = 0;
+        int totalQuantity = 0;
+
+        for (Text value : values) {
+            String[] parts = value.toString().split(";");
+            totalRevenue +=  Double.parseDouble(parts[0]);
+            totalQuantity += Integer.parseInt(parts[1]);
+        }
+
+        context.write(key, new Text(String.format("%.2f\t%d", totalRevenue, totalQuantity)));
+    }
+}
diff --git a/src/main/java/com/yu_ufimtsev/itmo/sales_data_analysis/aggregate_stage/RevenueCalculationMapper.java b/src/main/java/com/yu_ufimtsev/itmo/sales_data_analysis/aggregate_stage/RevenueCalculationMapper.java
new file mode 100644
index 0000000..adb4039
--- /dev/null
+++ b/src/main/java/com/yu_ufimtsev/itmo/sales_data_analysis/aggregate_stage/RevenueCalculationMapper.java
@@ -0,0 +1,37 @@
+package com.yu_ufimtsev.itmo.sales_data_analysis.aggregate_stage;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+
+import java.io.IOException;
+
+public class RevenueCalculationMapper extends Mapper<LongWritable, Text,Text,Text> {
+
+    @Override
+    protected void map(LongWritable key, Text value, Context context)
+            throws IOException, InterruptedException {
+
+        String line = value.toString();
+
+        if (line.startsWith("transaction_id")) {
+            return;
+        }
+
+        String[] parts = line.split(",");
+        if (parts.length != 5) {
+            return;
+        }
+
+        String categoryName = parts[2];
+
+        double price = Double.parseDouble(parts[3]);
+        int quantity = Integer.parseInt(parts[4]);
+        double revenue = price * quantity;
+
+        context.write(
+                new Text(categoryName),
+                new Text(String.format("%s;%s", revenue, quantity))
+        );
+    }
+}
diff --git a/src/main/java/com/yu_ufimtsev/itmo/sales_data_analysis/sort_stage/DescendingDoubleComparator.java b/src/main/java/com/yu_ufimtsev/itmo/sales_data_analysis/sort_stage/DescendingDoubleComparator.java
new file mode 100644
index 0000000..f15d636
--- /dev/null
+++ b/src/main/java/com/yu_ufimtsev/itmo/sales_data_analysis/sort_stage/DescendingDoubleComparator.java
@@ -0,0 +1,17 @@
+package com.yu_ufimtsev.itmo.sales_data_analysis.sort_stage;
+
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableComparator;
+
+public class DescendingDoubleComparator extends WritableComparator {
+
+    public DescendingDoubleComparator() {
+        super(DoubleWritable.class, true);
+    }
+
+    @Override
+    public int compare(WritableComparable a, WritableComparable b) {
+        return -((DoubleWritable) a).compareTo((DoubleWritable) b);
+    }
+}
diff --git a/src/main/java/com/yu_ufimtsev/itmo/sales_data_analysis/sort_stage/SortByRevenueMapper.java b/src/main/java/com/yu_ufimtsev/itmo/sales_data_analysis/sort_stage/SortByRevenueMapper.java
new file mode 100644
index 0000000..e5bcf38
--- /dev/null
+++ b/src/main/java/com/yu_ufimtsev/itmo/sales_data_analysis/sort_stage/SortByRevenueMapper.java
@@ -0,0 +1,36 @@
+package com.yu_ufimtsev.itmo.sales_data_analysis.sort_stage;
+
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+
+import java.io.IOException;
+
+public class SortByRevenueMapper extends Mapper<LongWritable, Text, DoubleWritable, Text> {
+
+    @Override
+    protected void map(LongWritable key, Text value, Context context)
+            throws IOException, InterruptedException {
+
+        String line = value.toString().trim();
+        if (line.isEmpty()) {
+            return;
+
+        }
+        String[] parts = value.toString().split("\\t");
+        if (parts.length != 3) {
+            return;
+        }
+
+        String categoryName = parts[0].trim();
+
+        double revenue = Double.parseDouble(parts[1].trim());
+        int quantity = Integer.parseInt(parts[2].trim());
+
+        context.write(
+                new DoubleWritable(revenue),
+                new Text(String.format("%s-%d", categoryName, quantity))
+        );
+    }
+}
diff --git a/src/main/java/com/yu_ufimtsev/itmo/sales_data_analysis/sort_stage/SortedDataToFileReducer.java b/src/main/java/com/yu_ufimtsev/itmo/sales_data_analysis/sort_stage/SortedDataToFileReducer.java
new file mode 100644
index 0000000..b5df99d
--- /dev/null
+++ b/src/main/java/com/yu_ufimtsev/itmo/sales_data_analysis/sort_stage/SortedDataToFileReducer.java
@@ -0,0 +1,31 @@
+package com.yu_ufimtsev.itmo.sales_data_analysis.sort_stage;
+
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Reducer;
+
+import java.io.IOException;
+
+public class SortedDataToFileReducer extends Reducer<DoubleWritable, Text, Text, Text> {
+
+    private boolean isHeaderWritten;
+
+    @Override
+    protected void reduce(DoubleWritable key, Iterable<Text> values, Context context)
+            throws IOException, InterruptedException {
+
+        if (!isHeaderWritten) {
+            String header = String.format("%-20s %-15s %-10s", "Category", "Revenue", "Quantity");
+            context.write(null, new Text(header));
+            isHeaderWritten = true;
+        }
+
+        double revenue = key.get();
+
+        for (Text value : values) {
+            String[] parts = value.toString().split("-");
+            String formattedResult = String.format("%-20s %-15.2f %-10s", parts[0], revenue, parts[1]);
+            context.write(null, new Text(formattedResult));
+        }
+    }
+}