diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b63da45
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,42 @@
+.gradle
+build/
+!gradle/wrapper/gradle-wrapper.jar
+!**/src/main/**/build/
+!**/src/test/**/build/
+
+### IntelliJ IDEA ###
+.idea/modules.xml
+.idea/jarRepositories.xml
+.idea/compiler.xml
+.idea/libraries/
+*.iws
+*.iml
+*.ipr
+out/
+!**/src/main/**/out/
+!**/src/test/**/out/
+
+### Eclipse ###
+.apt_generated
+.classpath
+.factorypath
+.project
+.settings
+.springBeans
+.sts4-cache
+bin/
+!**/src/main/**/bin/
+!**/src/test/**/bin/
+
+### NetBeans ###
+/nbproject/private/
+/nbbuild/
+/dist/
+/nbdist/
+/.nb-gradle/
+
+### VS Code ###
+.vscode/
+
+### Mac OS ###
+.DS_Store
\ No newline at end of file
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/gradle.xml b/.idea/gradle.xml
new file mode 100644
index 0000000..14746e7
--- /dev/null
+++ b/.idea/gradle.xml
@@ -0,0 +1,16 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..f515627
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..32cf4db
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/palantir-java-format.xml b/.idea/palantir-java-format.xml
new file mode 100644
index 0000000..cbcc1cb
--- /dev/null
+++ b/.idea/palantir-java-format.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml
new file mode 100644
index 0000000..2b63946
--- /dev/null
+++ b/.idea/uiDesigner.xml
@@ -0,0 +1,124 @@
+
+
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+
+
+
+ -
+
+
+
+
+
+ -
+
+
+
+
+
+ -
+
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+
+
+ -
+
+
+ -
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/0.csv b/0.csv
deleted file mode 100644
index 2f7dfb7..0000000
--- a/0.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:eae61feac01cb36de9d6dd148a5b3fb76ad506fd8b283b20306df16f246e511e
-size 3406784
diff --git a/1.csv b/1.csv
deleted file mode 100644
index 3a258b0..0000000
--- a/1.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7b2681409003d07d3ca063eb34339a0cdea9b688d16723b0e284091afd6bf806
-size 7078520
diff --git a/2.csv b/2.csv
deleted file mode 100644
index 4cdd100..0000000
--- a/2.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ea9a50aa300dfe7e766eb419b2a963d31cfe68644acb23f654df7abe852d3a76
-size 10737171
diff --git a/3.csv b/3.csv
deleted file mode 100644
index afbe78d..0000000
--- a/3.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6268fae7b50879a151a6f6de4852e4f39d2e22a8315290e87dcda71f4e10b866
-size 14530705
diff --git a/4.csv b/4.csv
deleted file mode 100644
index 9ff08df..0000000
--- a/4.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b4adf0d42097f1b2c9e68731460c5a7c52cb7ac7238addab7a796817cee9d00b
-size 18299520
diff --git a/5.csv b/5.csv
deleted file mode 100644
index 3980291..0000000
--- a/5.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:212b6d6b07197921eaedaa271d781431b8bb034c5328622d63231a2967df1702
-size 22053240
diff --git a/6.csv b/6.csv
deleted file mode 100644
index 5906dc6..0000000
--- a/6.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:81f65085c6ce29a8f766244dc4b21d41d565ea3d6231b3b1c0b6739d67cd1d53
-size 25790880
diff --git a/7.csv b/7.csv
deleted file mode 100644
index df43af3..0000000
--- a/7.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4e73ff71a37438ec216fc522d77e3902c5670e24a917d0be047ed101dbeea914
-size 29524261
diff --git a/build.gradle b/build.gradle
new file mode 100644
index 0000000..ab83f79
--- /dev/null
+++ b/build.gradle
@@ -0,0 +1,31 @@
+plugins {
+ id 'java'
+}
+
+group = 'net.rednite'
+version = '1.0-SNAPSHOT'
+
+repositories {
+ mavenCentral()
+}
+
+dependencies {
+ implementation ('org.apache.hadoop:hadoop-common:3.3.6') {
+ exclude group: 'org.slf4j', module: 'slf4j-reload4j'
+ }
+ implementation ('org.apache.hadoop:hadoop-client:3.3.6') {
+ exclude group: 'org.slf4j', module: 'slf4j-reload4j'
+ }
+
+ implementation 'org.slf4j:slf4j-simple:2.0.16'
+
+ compileOnly 'org.projectlombok:lombok:1.18.36'
+ annotationProcessor 'org.projectlombok:lombok:1.18.36'
+
+ testImplementation platform('org.junit:junit-bom:5.10.0')
+ testImplementation 'org.junit.jupiter:junit-jupiter'
+}
+
+test {
+ useJUnitPlatform()
+}
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..e208fd0
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,44 @@
+version: '3.8'
+
+services:
+ namenode:
+ image: bde2020/hadoop-namenode:2.0.0-hadoop3.2.1-java8
+ container_name: namenode
+ hostname: namenode
+ environment:
+ - CLUSTER_NAME=test-cluster
+ - CORE_CONF_fs_defaultFS=hdfs://0.0.0.0:9000 # Слушать на всех интерфейсах
+ - HDFS_CONF_dfs_replication=1
+ ports:
+ - "9870:9870" # Web UI
+ - "9000:9000" # RPC
+ volumes:
+ - namenode-data:/hadoop/dfs/name
+ networks:
+ - hadoop
+
+ datanode:
+ image: bde2020/hadoop-datanode:2.0.0-hadoop3.2.1-java8
+ container_name: datanode
+ hostname: datanode
+ environment:
+ - CORE_CONF_fs_defaultFS=hdfs://0.0.0.0:9000 # Слушать на всех интерфейсах
+ - HDFS_CONF_dfs_replication=1
+ ports:
+ - "9864:9864" # Web UI
+ - "50010:50010" # Data Transfer Port
+ - "50020:50020" # IPC Port
+ - "50075:50075" # HTTP Server Port
+ volumes:
+ - datanode-data:/hadoop/dfs/data
+ depends_on:
+ - namenode
+ networks:
+ - hadoop
+
+volumes:
+ namenode-data:
+ datanode-data:
+
+networks:
+ hadoop:
\ No newline at end of file
diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 0000000..249e583
Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ
diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000..a75c42d
--- /dev/null
+++ b/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
+#Wed Dec 11 19:44:28 MSK 2024
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-bin.zip
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
diff --git a/gradlew b/gradlew
new file mode 100755
index 0000000..1b6c787
--- /dev/null
+++ b/gradlew
@@ -0,0 +1,234 @@
+#!/bin/sh
+
+#
+# Copyright © 2015-2021 the original authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+#
+# Gradle start up script for POSIX generated by Gradle.
+#
+# Important for running:
+#
+# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
+# noncompliant, but you have some other compliant shell such as ksh or
+# bash, then to run this script, type that shell name before the whole
+# command line, like:
+#
+# ksh Gradle
+#
+# Busybox and similar reduced shells will NOT work, because this script
+# requires all of these POSIX shell features:
+# * functions;
+# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
+# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
+# * compound commands having a testable exit status, especially «case»;
+# * various built-in commands including «command», «set», and «ulimit».
+#
+# Important for patching:
+#
+# (2) This script targets any POSIX shell, so it avoids extensions provided
+# by Bash, Ksh, etc; in particular arrays are avoided.
+#
+# The "traditional" practice of packing multiple parameters into a
+# space-separated string is a well documented source of bugs and security
+# problems, so this is (mostly) avoided, by progressively accumulating
+# options in "$@", and eventually passing that to Java.
+#
+# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
+# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
+# see the in-line comments for details.
+#
+# There are tweaks for specific operating systems such as AIX, CygWin,
+# Darwin, MinGW, and NonStop.
+#
+# (3) This script is generated from the Groovy template
+# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
+# within the Gradle project.
+#
+# You can find Gradle at https://github.com/gradle/gradle/.
+#
+##############################################################################
+
+# Attempt to set APP_HOME
+
+# Resolve links: $0 may be a link
+app_path=$0
+
+# Need this for daisy-chained symlinks.
+while
+ APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
+ [ -h "$app_path" ]
+do
+ ls=$( ls -ld "$app_path" )
+ link=${ls#*' -> '}
+ case $link in #(
+ /*) app_path=$link ;; #(
+ *) app_path=$APP_HOME$link ;;
+ esac
+done
+
+APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
+
+APP_NAME="Gradle"
+APP_BASE_NAME=${0##*/}
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD=maximum
+
+warn () {
+ echo "$*"
+} >&2
+
+die () {
+ echo
+ echo "$*"
+ echo
+ exit 1
+} >&2
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "$( uname )" in #(
+ CYGWIN* ) cygwin=true ;; #(
+ Darwin* ) darwin=true ;; #(
+ MSYS* | MINGW* ) msys=true ;; #(
+ NONSTOP* ) nonstop=true ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD=$JAVA_HOME/jre/sh/java
+ else
+ JAVACMD=$JAVA_HOME/bin/java
+ fi
+ if [ ! -x "$JAVACMD" ] ; then
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+ fi
+else
+ JAVACMD=java
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
+ case $MAX_FD in #(
+ max*)
+ MAX_FD=$( ulimit -H -n ) ||
+ warn "Could not query maximum file descriptor limit"
+ esac
+ case $MAX_FD in #(
+ '' | soft) :;; #(
+ *)
+ ulimit -n "$MAX_FD" ||
+ warn "Could not set maximum file descriptor limit to $MAX_FD"
+ esac
+fi
+
+# Collect all arguments for the java command, stacking in reverse order:
+# * args from the command line
+# * the main class name
+# * -classpath
+# * -D...appname settings
+# * --module-path (only if needed)
+# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if "$cygwin" || "$msys" ; then
+ APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
+ CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
+
+ JAVACMD=$( cygpath --unix "$JAVACMD" )
+
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
+ for arg do
+ if
+ case $arg in #(
+ -*) false ;; # don't mess with options #(
+ /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
+ [ -e "$t" ] ;; #(
+ *) false ;;
+ esac
+ then
+ arg=$( cygpath --path --ignore --mixed "$arg" )
+ fi
+ # Roll the args list around exactly as many times as the number of
+ # args, so each arg winds up back in the position where it started, but
+ # possibly modified.
+ #
+ # NB: a `for` loop captures its iteration list before it begins, so
+ # changing the positional parameters here affects neither the number of
+ # iterations, nor the values presented in `arg`.
+ shift # remove old arg
+ set -- "$@" "$arg" # push replacement arg
+ done
+fi
+
+# Collect all arguments for the java command;
+# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
+# shell script including quotes and variable substitutions, so put them in
+# double quotes to make sure that they get re-expanded; and
+# * put everything else in single quotes, so that it's not re-expanded.
+
+set -- \
+ "-Dorg.gradle.appname=$APP_BASE_NAME" \
+ -classpath "$CLASSPATH" \
+ org.gradle.wrapper.GradleWrapperMain \
+ "$@"
+
+# Use "xargs" to parse quoted args.
+#
+# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
+#
+# In Bash we could simply go:
+#
+# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
+# set -- "${ARGS[@]}" "$@"
+#
+# but POSIX shell has neither arrays nor command substitution, so instead we
+# post-process each arg (as a line of input to sed) to backslash-escape any
+# character that might be a shell metacharacter, then use eval to reverse
+# that process (while maintaining the separation between arguments), and wrap
+# the whole thing up as a single "set" statement.
+#
+# This will of course break if any of these variables contains a newline or
+# an unmatched quote.
+#
+
+eval "set -- $(
+ printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
+ xargs -n1 |
+ sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
+ tr '\n' ' '
+ )" '"$@"'
+
+exec "$JAVACMD" "$@"
diff --git a/gradlew.bat b/gradlew.bat
new file mode 100644
index 0000000..107acd3
--- /dev/null
+++ b/gradlew.bat
@@ -0,0 +1,89 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/metrics.txt b/metrics.txt
new file mode 100644
index 0000000..e6e449b
--- /dev/null
+++ b/metrics.txt
@@ -0,0 +1,10 @@
+ms: 9397, salted: false, reducers: 1
+ms: 9543, salted: true, reducers: 1
+ms: 9498, salted: false, reducers: 4
+ms: 11546, salted: false, reducers: 8
+ms: 14414, salted: false, reducers: 16
+ms: 8868, salted: true, reducers: 1
+ms: 12586, salted: true, reducers: 1
+ms: 9678, salted: true, reducers: 1
+ms: 9434, salted: true, reducers: 1
+ms: 8510, salted: true, reducers: 1
diff --git a/results/part-r-00000 b/results/part-r-00000
new file mode 100644
index 0000000..dc840dc
--- /dev/null
+++ b/results/part-r-00000
@@ -0,0 +1,20 @@
+clothing 4560302171.99 911487
+video games 4560108307.50 913326
+baby products 4541435362.25 907186
+beauty products 4533874327.85 906417
+gardening tools 4531880837.74 905841
+automotive 4529861310.74 904962
+music instruments 4512294466.14 902389
+furniture 4503986763.16 900244
+electronics 4497526631.04 903266
+pet supplies 4488741730.38 896724
+stationery 4481794912.40 898265
+home appliances 4473888361.73 895815
+sports equipment 4469387812.34 894287
+groceries 4466915230.97 895470
+footwear 4465574983.36 894424
+jewelry 4463823670.79 893980
+office equipment 4463564947.38 892370
+toys 4462453654.12 892741
+books 4457620825.95 890948
+health & wellness 4454082892.49 890475
diff --git a/settings.gradle b/settings.gradle
new file mode 100644
index 0000000..5e8f702
--- /dev/null
+++ b/settings.gradle
@@ -0,0 +1,2 @@
+rootProject.name = 'parallel-lab4'
+
diff --git a/src/main/java/ru/brikster/MetricType.java b/src/main/java/ru/brikster/MetricType.java
new file mode 100644
index 0000000..49c9805
--- /dev/null
+++ b/src/main/java/ru/brikster/MetricType.java
@@ -0,0 +1,9 @@
+package ru.brikster;
+
+public enum MetricType {
+ REVENUE, // Общая выручка
+ AVERAGE_PRICE, // Средняя цена товара
+ ITEMS_SOLD, // Количество проданных товаров
+ AVERAGE_QUANTITY, // Среднее количество товаров в заказе
+ TRANSACTIONS // Количество транзакций
+}
diff --git a/src/main/java/ru/brikster/SalesAnalysisApplication.java b/src/main/java/ru/brikster/SalesAnalysisApplication.java
new file mode 100644
index 0000000..282086d
--- /dev/null
+++ b/src/main/java/ru/brikster/SalesAnalysisApplication.java
@@ -0,0 +1,126 @@
+package ru.brikster;
+
+import lombok.extern.slf4j.Slf4j;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
+import java.util.stream.Stream;
+
+@Slf4j
+public class SalesAnalysisApplication {
+
+ private static final String HDFS_URI = "hdfs://localhost:9000";
+ private static final String INPUT_DIR = "/input";
+ private static final String OUTPUT_DIR = "/result";
+
+ private static final boolean USE_SALTED_PARTITIONER = true;
+ private static final int REDUCERS_COUNT = 1;
+
+ public static void main(String[] args) throws Exception {
+ if (args.length < 3) {
+ System.err.println("Usage: SalesAnalysis ");
+ System.exit(1);
+ }
+
+ Configuration conf = createConfiguration(args[2]);
+// conf.set("mapreduce.input.fileinputformat.split.maxsize", "30000000");
+ FileSystem fs = FileSystem.get(conf);
+
+ Path localInputDir = new Path(args[0]);
+ Path hdfsInputDir = new Path(INPUT_DIR);
+ Path hdfsOutputDir = new Path(OUTPUT_DIR);
+ Path localOutputDir = new Path(args[1]);
+
+ setupDirectories(fs, hdfsInputDir, hdfsOutputDir, localOutputDir);
+
+ try (Stream files = Files.list(Paths.get(localInputDir.toString()))) {
+ files.forEach(file -> {
+ try {
+ Path hdfsFilePath = new Path(hdfsInputDir, file.getFileName().toString());
+ fs.copyFromLocalFile(false, true, new Path(file.toString()), hdfsFilePath);
+ } catch (IOException e) {
+ log.error("Error copying file to HDFS: {}", file, e);
+ }
+ });
+ }
+
+ Job job = configureJob(conf, args[2], hdfsInputDir, hdfsOutputDir);
+ long startMs = System.currentTimeMillis();
+ boolean success = job.waitForCompletion(true);
+ long totalMs = System.currentTimeMillis() - startMs;
+
+ if (success) {
+ fs.copyToLocalFile(false, hdfsOutputDir, localOutputDir, true);
+ Files.writeString(Paths.get("metrics.txt"), "ms: " + totalMs + ", salted: " + USE_SALTED_PARTITIONER + ", reducers: " + REDUCERS_COUNT +"\n",
+ StandardOpenOption.APPEND,
+ StandardOpenOption.CREATE);
+ } else {
+ System.err.println("Job failed!");
+ System.exit(1);
+ }
+ }
+
+ private static Configuration createConfiguration(String metricType) {
+ Configuration conf = new Configuration();
+ conf.set("metric.type", metricType.toUpperCase());
+ conf.set("fs.defaultFS", HDFS_URI);
+ return conf;
+ }
+
+ private static void setupDirectories(FileSystem fs, Path inputDir, Path outputDir, Path localOutputDir) throws IOException {
+ if (!fs.exists(inputDir)) {
+ fs.mkdirs(inputDir);
+ }
+ if (fs.exists(outputDir)) {
+ fs.delete(outputDir, true);
+ }
+ var localOutputDirPath = Paths.get(localOutputDir.toString());
+ if (Files.exists(localOutputDirPath)) {
+ try (var files = Files.walk(localOutputDirPath)) {
+ files.forEach(path -> {
+ try {
+ if (!Files.isDirectory(path)) {
+ Files.delete(path);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+ Files.delete(localOutputDirPath);
+ }
+ }
+
+ private static Job configureJob(Configuration conf, String metricType, Path inputDir, Path outputDir)
+ throws IOException {
+ Job job = Job.getInstance(conf, "sales analysis - " + metricType);
+ job.setJarByClass(SalesAnalysisApplication.class);
+
+ job.setMapperClass(SalesMapper.class);
+ job.setMapOutputKeyClass(Text.class);
+ job.setMapOutputValueClass(SalesMetric.class);
+
+ job.setReducerClass(SalesReducer.class);
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(Text.class);
+
+ if (USE_SALTED_PARTITIONER) {
+ job.setPartitionerClass(SaltedPartitioner.class);
+ }
+ job.setNumReduceTasks(REDUCERS_COUNT);
+
+ FileInputFormat.addInputPath(job, inputDir);
+ FileOutputFormat.setOutputPath(job, outputDir);
+
+ return job;
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/ru/brikster/SalesMapper.java b/src/main/java/ru/brikster/SalesMapper.java
new file mode 100644
index 0000000..99a02bb
--- /dev/null
+++ b/src/main/java/ru/brikster/SalesMapper.java
@@ -0,0 +1,61 @@
+package ru.brikster;
+
+import lombok.extern.slf4j.Slf4j;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+
+import java.io.IOException;
+
+@Slf4j
+public class SalesMapper extends Mapper