diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..bba7b53
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/target/
+/.idea/
diff --git a/README.md b/README.md
index 948dc85..84b20f4 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,4 @@
+[](https://classroom.github.com/a/uyodabcP)
## Лабораторная работа: Реализация MapReduce для анализа данных о продажах с ипользованием HADOOP!!!
# Цель работы
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..992090e
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,84 @@
+version: '3.8'
+
+services:
+ namenode:
+ image: bde2020/hadoop-namenode:latest
+ container_name: namenode
+ environment:
+ - CLUSTER_NAME=hadoop_namenode
+ - CORE_CONF_fs_defaultFS=hdfs://namenode:9000
+ - HDFS_CONF_dfs_replication=1
+ - HDFS_CONF_dfs_permissions_enabled=false
+ ports:
+ - "9870:9870" # Web UI HDFS
+ - "9000:9000"
+ - "8020:9000"
+ volumes:
+ - namenode_data:/hadoop/dfs/name
+ healthcheck:
+ test: [ "CMD", "curl", "-f", "http://localhost:9870" ]
+ interval: 30s
+ timeout: 10s
+ retries: 300
+
+ datanode:
+ image: bde2020/hadoop-datanode:latest
+ container_name: datanode
+ environment:
+ - CORE_CONF_fs_defaultFS=hdfs://namenode:9000
+ - HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data
+ volumes:
+ - datanode_data:/hadoop/dfs/data
+ depends_on:
+ namenode:
+ condition: service_healthy
+ ports:
+ - "9864:9864"
+
+ resourcemanager:
+ image: bde2020/hadoop-resourcemanager:latest
+ container_name: resourcemanager
+ environment:
+ - CORE_CONF_fs_defaultFS=hdfs://namenode:9000
+ - YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
+ - YARN_CONF_yarn_nodemanager_aux-services=mapreduce_shuffle
+ - YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
+ - YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
+ - YARN_CONF_yarn_resourcemanager_resource_tracker_address=resourcemanager:8031
+ - YARN_CONF_yarn_resourcemanager_admin_address=resourcemanager:8033
+ - YARN_CONF_yarn_resourcemanager_webapp_address=0.0.0.0:8088
+ - HADOOP_CONF_DIR=/etc/hadoop
+ ports:
+ - "8088:8088"
+ - "8032:8032"
+ - "8030:8030"
+ - "8031:8031"
+ depends_on:
+ - namenode
+ - datanode
+
+ nodemanager:
+ image: bde2020/hadoop-nodemanager:latest
+ container_name: nodemanager
+ environment:
+ - CORE_CONF_fs_defaultFS=hdfs://namenode:9000
+ - YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
+ - YARN_CONF_yarn_nodemanager_resource_memory_mb=4096
+ - YARN_CONF_yarn_nodemanager_resource_cpu_vcores=4
+ - YARN_CONF_yarn_nodemanager_aux-services=mapreduce_shuffle
+ - YARN_CONF_yarn_nodemanager_address=0.0.0.0:8041
+ - YARN_CONF_yarn_nodemanager_webapp_address=0.0.0.0:8042
+ - YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
+ - YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
+ - YARN_CONF_yarn_resourcemanager_resource_tracker_address=resourcemanager:8031
+ - YARN_CONF_yarn_resourcemanager_admin_address=resourcemanager:8033
+ - HADOOP_CONF_DIR=/etc/hadoop
+ ports:
+ - "8042:8042"
+ depends_on:
+ - resourcemanager
+ - datanode
+
+volumes:
+ namenode_data:
+ datanode_data:
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..f15c708
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,100 @@
+
+ 4.0.0
+
+ ru.ifmo
+ lab3
+ 1.0
+ jar
+
+ lab3
+
+
+ org.springframework.boot
+ spring-boot-starter-parent
+ 3.4.12
+
+
+
+ UTF-8
+ ru.ifmo.sales.App
+ 8
+ 8
+ 8
+ 8
+ 1.18.30
+ 3.3.6
+
+
+
+
+ org.projectlombok
+ lombok
+ ${lombok.version}
+ provided
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+
+
+
+
+ org.apache.hadoop
+ hadoop-client
+ ${hadoop.version}
+
+
+ org.apache.hadoop
+ hadoop-common
+ ${hadoop.version}
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+ ${hadoop.version}
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-core
+ ${hadoop.version}
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-common
+ ${hadoop.version}
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+
+ --add-opens java.base/java.lang=ALL-UNNAMED
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/rerun.sh b/rerun.sh
new file mode 100755
index 0000000..87da0b9
--- /dev/null
+++ b/rerun.sh
@@ -0,0 +1,79 @@
+# !/bin/bash
+
+set -e
+
+echo "Запуск MapReduce для анализа продаж"
+
+REDUCERS=4
+BLOCK_SIZE_KB=1024
+
+read -p "Количество reducer-ов [$REDUCERS]: " user_reducers
+read -p "Размер блока в KB [$BLOCK_SIZE_KB]: " user_block_size
+
+REDUCERS=${user_reducers:-$REDUCERS}
+BLOCK_SIZE_KB=${user_block_size:-$BLOCK_SIZE_KB}
+
+echo "1. Запуск Hadoop кластера..."
+docker-compose up -d
+
+echo "Ожидание запуска NameNode..."
+while ! curl -s http://localhost:9870 > /dev/null; do
+ sleep 2
+ echo -n "."
+done
+echo " Готово!"
+
+echo "Ожидание запуска DataNode..."
+while ! curl -s http://localhost:9864 > /dev/null; do
+ sleep 2
+ echo -n "."
+done
+echo " Готово!"
+
+echo "Ожидание запуска ResourceManager..."
+while ! curl -s http://localhost:8088 > /dev/null; do
+ sleep 2
+ echo -n "."
+done
+echo " Готово!"
+
+echo "Ожидание запуска NodeManager..."
+while ! curl -s http://localhost:8042 > /dev/null; do
+ sleep 2
+ echo -n "."
+done
+echo " Готово!"
+
+echo "2. Сборка проекта..."
+mvn clean package
+
+echo "3. Подготовка HDFS..."
+
+docker exec namenode hdfs dfs -rm -r /output /output-temp 2>/dev/null || true
+
+echo "5. Подготовка JAR файла..."
+docker cp ./target/lab3-1.0.jar namenode:/tmp/sales-app.jar
+
+echo "6. Запуск MapReduce задания на YARN..."
+
+docker exec namenode hadoop jar /tmp/sales-app.jar /input /output $REDUCERS $BLOCK_SIZE_KB
+
+echo "7. Получение результатов..."
+
+if docker exec namenode hdfs dfs -test -e /output/_SUCCESS; then
+ echo "Задание выполнено успешно!"
+ > results.txt
+ echo "Category,Revenue,Quantity" >> results.txt
+
+ docker exec namenode hdfs dfs -ls /output/part-r-* 2>/dev/null | \
+ awk '{print $NF}' | \
+ while read file; do
+ docker exec namenode hdfs dfs -cat "$file" 2>/dev/null >> results.txt
+ echo "" >> results.txt
+ done
+
+ echo "Результаты сохранены в results.txt"
+else
+ echo "Ошибка: задание не завершилось успешно"
+ exit 1
+fi
diff --git a/run.sh b/run.sh
new file mode 100755
index 0000000..7b5cbfb
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,101 @@
+# !/bin/bash
+
+set -e
+
+echo "Запуск MapReduce для анализа продаж"
+
+REDUCERS=4
+BLOCK_SIZE_KB=1024
+
+read -p "Количество reducer-ов [$REDUCERS]: " user_reducers
+read -p "Размер блока в KB [$BLOCK_SIZE_KB]: " user_block_size
+
+REDUCERS=${user_reducers:-$REDUCERS}
+BLOCK_SIZE_KB=${user_block_size:-$BLOCK_SIZE_KB}
+
+echo "1. Запуск Hadoop кластера..."
+docker-compose up -d
+
+echo "Ожидание запуска NameNode..."
+while ! curl -s http://localhost:9870 > /dev/null; do
+ sleep 2
+ echo -n "."
+done
+echo " Готово!"
+
+echo "Ожидание запуска DataNode..."
+while ! curl -s http://localhost:9864 > /dev/null; do
+ sleep 2
+ echo -n "."
+done
+echo " Готово!"
+
+echo "Ожидание запуска ResourceManager..."
+while ! curl -s http://localhost:8088 > /dev/null; do
+ sleep 2
+ echo -n "."
+done
+echo " Готово!"
+
+echo "Ожидание запуска NodeManager..."
+while ! curl -s http://localhost:8042 > /dev/null; do
+ sleep 2
+ echo -n "."
+done
+echo " Готово!"
+
+echo "2. Сборка проекта..."
+mvn clean package
+
+echo "3. Подготовка HDFS..."
+
+docker exec namenode hdfs dfs -rm -r /input /output /output-temp 2>/dev/null || true
+docker exec namenode hdfs dfs -mkdir -p /input
+
+
+echo "4. Копирование CSV файлов в HDFS..."
+
+docker exec namenode rm -rf /tmp/*.csv 2>/dev/null || true
+
+for csv_file in ./*.csv; do
+ if [ -f "$csv_file" ]; then
+ filename=$(basename "$csv_file")
+ echo "Копирование $filename..."
+
+ docker cp "$csv_file" namenode:/tmp/
+ docker exec namenode hdfs dfs -put -f /tmp/"$filename" /input/
+ fi
+done
+
+echo "5. Подготовка JAR файла..."
+docker cp ./target/lab3-1.0.jar namenode:/tmp/sales-app.jar
+
+echo "6. Запуск MapReduce задания на YARN..."
+
+docker exec namenode hadoop jar /tmp/sales-app.jar /input /output $REDUCERS $BLOCK_SIZE_KB
+
+echo "7. Получение результатов..."
+
+if docker exec namenode hdfs dfs -test -e /output/_SUCCESS; then
+ echo "Задание выполнено успешно!"
+ > results.txt
+ echo "Category,Revenue,Quantity" >> results.txt
+
+ docker exec namenode hdfs dfs -ls /output/part-r-* 2>/dev/null | \
+ awk '{print $NF}' | \
+ while read file; do
+ docker exec namenode hdfs dfs -cat "$file" 2>/dev/null >> results.txt
+ echo "" >> results.txt
+ done
+
+ echo "Результаты сохранены в results.txt"
+else
+ echo "Ошибка: задание не завершилось успешно"
+ exit 1
+fi
+
+read -p "Остановить Hadoop кластер? (y/N): " stop_cluster
+if [[ $stop_cluster == "y" || $stop_cluster == "Y" ]]; then
+ echo "8. Остановка кластера..."
+ docker-compose down
+fi
diff --git a/src/main/java/ru/ifmo/sales/App.java b/src/main/java/ru/ifmo/sales/App.java
new file mode 100644
index 0000000..4b8095e
--- /dev/null
+++ b/src/main/java/ru/ifmo/sales/App.java
@@ -0,0 +1,61 @@
+package ru.ifmo.sales;
+
+import java.util.Arrays;
+
+import lombok.extern.slf4j.Slf4j;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.ToolRunner;
+import ru.ifmo.sales.stats.StatsJob;
+import ru.ifmo.sales.sort.SortJob;
+
+@Slf4j
+public class App {
+ public static void main(String[] args) throws Exception {
+ log.info("Got arguments: {}", Arrays.toString(args));
+
+ if (args.length < 2) {
+ System.err.println("Usage: App