Skip to content

Commit 60eb0a7

Browse files
authored
Merge pull request #1 from BENMFeng/main
Pre-release: first upload
2 parents 67efd24 + 4cc7b86 commit 60eb0a7

File tree

11 files changed

+1891
-2
lines changed

11 files changed

+1891
-2
lines changed

Cargo.toml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
[package]
2+
name = "doh"
3+
4+
version = "0.1.0"
5+
edition = "2021"
6+
license = "GNU GPLv3"
7+
repository = "https://github.com/ZimaBlue-AI/DoH"
8+
description = "Distributed jObs Hypervisor"
9+
rust-version = "1.82.0"
10+
authors = ["ZimaBlueAI <zimablueai@proton.me>"]
11+
12+
[[bin]]
13+
name = "node_monitor"
14+
path = "src/node_monitor.rs"
15+
doc = false
16+
17+
[[bin]]
18+
name = "job_monitor"
19+
path = "src/job_monitor.rs"
20+
doc = false
21+
22+
[dependencies]
23+
sysinfo = "0.32.1"
24+
serde = { version = "1.0", features = ["derive"] }
25+
serde_json = "1.0.133"
26+
procfs = "0.17.0"
27+
regex = "1.5.4"
28+
tokio = { version = "1", features = ["full", "rt-multi-thread"] }
29+
log = "0.4"
30+
env_logger = "0.11.5"
31+
uuid = {version = "^1.8.0", features = [
32+
"v4", # Lets you generate random UUIDs
33+
"fast-rng", # Use a faster (but still sufficiently random) RNG
34+
"macro-diagnostics", # Enable better diagnostics for compile-time UUIDs
35+
]}
36+
uuid-macro-internal = { version = "1.0.0-alpha.1" }
37+
anyhow = "1.0"
38+
reqwest = { version="0.12.2", features = ["json", "multipart", "stream"] }

README.md

Lines changed: 204 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,204 @@
1-
# DoH
2-
Distributed jObs Hypervisor
1+
# DoH: Disributed jObs Hypervisor
2+
3+
## 0x01 Deployment
4+
5+
Install Rust
6+
```bash
7+
# Install rust
8+
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
9+
# or update
10+
rustup update
11+
Install openssl and libssl-dev
12+
sudo apt-get update
13+
sudo apt install openssl
14+
sudo apt install libssl-dev
15+
sudo apt install pkg-config
16+
```
17+
18+
## 0x02 compile
19+
```bash
20+
git clone https://github.com/ZimaBlue-AI/DoH
21+
cd DoH
22+
cargo build --release
23+
```
24+
25+
## 0x03 configuration
26+
27+
### 3.1 Resource monitoring program configuration, saved as config.json
28+
29+
```json
30+
{
31+
"disk_monitor": {
32+
"disk_space_threshold": 107374182400, #100GB
33+
"check_interval": 60, #1 miniute
34+
"mount_points": ["/mnt/c"],
35+
"path_space": [
36+
{
37+
"path":"/mnt/c/DoH/",
38+
"space_threshold":[10, 26214400] # lower than 10bytes, or larger than 25MB will give out warning
39+
}
40+
],
41+
"receiver": [{
42+
"receive_id": "ou_***",
43+
"receive_id_type": "open_id"
44+
}]
45+
},
46+
"resource_monitor": {
47+
"check_interval": 10, #10 seconds
48+
"iops_threshold": 26214400, # read/write bytes over 25MB
49+
"memory_threshold": 1073741824, # RAM over 1GB
50+
"virtual_memory_threshold": 10737418240, # Virtual memory over 1GB
51+
"cpu_threshold": 190.0, # CPU usage over 190%
52+
"exclude_self_process": true, # don't warrning the self monitor processes
53+
"receiver": [{
54+
"receive_id": "186***", # phone
55+
"receive_id_type": "mobile"
56+
}]
57+
},
58+
"notice_config": {
59+
"fs_config": {
60+
"app_id": "cli_***", # feishu(lark) app_id
61+
"app_secret": "***", # feishu(lark) app_secret
62+
"receiver": [{
63+
"receive_id": "oc_***"
64+
"receive_id_type": "chat_id"
65+
}]
66+
}
67+
}
68+
}
69+
```
70+
71+
72+
### 3.2 Node hypervisor configuration
73+
74+
```json
75+
{
76+
"disk_monitor": {
77+
"disk_space_threshold": 107374182400, #100GB
78+
"check_interval": 60, #1 miniute
79+
"mount_points": ["/mnt/c"],
80+
"path_space": [
81+
{
82+
"path":"/mnt/c/DoH/",
83+
"space_threshold":[10, 26214400] # lower than 10bytes, or larger than 25MB will give out warning
84+
}
85+
],
86+
"receiver": [{
87+
"receive_id": "ou_***",
88+
"receive_id_type": "open_id"
89+
}]
90+
},
91+
"resource_monitor": {
92+
"check_interval": 10, #10 seconds
93+
"iops_threshold": 26214400, # read/write bytes over 25MB
94+
"memory_threshold": 1073741824, # RAM over 1GB
95+
"virtual_memory_threshold": 10737418240, # Virtual memory over 1GB
96+
"cpu_threshold": 190.0, # CPU usage over 190%
97+
"exclude_self_process": true, # don't warrning the self monitor processes
98+
"receiver": [{
99+
"receive_id": "186***", # phone
100+
"receive_id_type": "mobile"
101+
}]
102+
},
103+
"node_monitor": {
104+
"run_time": 10,
105+
"check_interval": 60,
106+
"node_id": "management_node", # node name
107+
"exclude_users": ["root"], # don not daemon root user
108+
"include_users": ["ai"], # daemon user list
109+
"receiver": [{
110+
"receive_id": "186***",
111+
"receive_id_type": "mobile"
112+
}]
113+
},
114+
"notice_config": {
115+
"fs_config": {
116+
"app_id": "cli_***", # feishu(lark) app_id
117+
"app_secret": "***", # feishu(lark) app_secret
118+
"receiver": []
119+
}
120+
}
121+
}
122+
```
123+
124+
### 3.3 Job management program configuration
125+
```
126+
{
127+
"disk_monitor": {
128+
"disk_space_threshold": 107374182400, #100GB
129+
"check_interval": 60, #1 miniute
130+
"mount_points": ["/mnt/c"],
131+
"path_space": [
132+
{
133+
"path":"/mnt/c/DoH/",
134+
"space_threshold":[10, 26214400] # lower than 10bytes, or larger than 25MB will give out warning
135+
}
136+
],
137+
"receiver": [{
138+
"receive_id": "ou_***",
139+
"receive_id_type": "open_id"
140+
}]
141+
},
142+
"resource_monitor": {
143+
"check_interval": 10, #10 seconds
144+
"iops_threshold": 26214400, # read/write bytes over 25MB
145+
"memory_threshold": 1073741824, # RAM over 1GB
146+
"virtual_memory_threshold": 10737418240, # Virtual memory over 1GB
147+
"cpu_threshold": 190.0, # CPU usage over 190%
148+
"exclude_self_process": true, # don't warrning the self monitor processes
149+
"receiver": [{
150+
"receive_id": "186***", # phone
151+
"receive_id_type": "mobile"
152+
}]
153+
},
154+
"job_monitor": {
155+
"check_interval": 10,
156+
"script_path": "/mnt/c/DoH/test/test.sh", # run job shell script
157+
"receiver": [{
158+
"receive_id": "ou_***",
159+
"receive_id_type": "open_id"
160+
}]
161+
},
162+
"notice_config": {
163+
"fs_config": {
164+
"app_id": "cli_***", # feishu(lark) app_id
165+
"app_secret": "***", # feishu(lark) app_secret
166+
"receiver": [{
167+
"receive_id": "oc_***"
168+
"receive_id_type": "chat_id"
169+
}]
170+
}
171+
}
172+
}
173+
```
174+
175+
## 0x04 Run
176+
### 4.1 Resource monitoring
177+
```bash
178+
RUST_LOG=INFO ./target/release/doh
179+
```
180+
181+
### 4.2 Node monitoring
182+
```bash
183+
RUST_LOG=INFO ./target/release/node_monitor
184+
```
185+
186+
### 4.3 Job monitoring
187+
```bash
188+
RUST_LOG=INFO ./target/release/job_monitor
189+
```
190+
191+
## 0x05 TODO
192+
193+
- [ ] Add support for other IM (WeCom, DingTalk, Slack, Discord, etc.)
194+
- [ ] Increase resource use assessment report
195+
- [ ] ReAct according to constraint policy (Response & Action)
196+
- [ ] Remote control and web interaction
197+
- [ ] Increase artificial intelligence management
198+
199+
## 0x06 License
200+
201+
Licensed under [GNU General Public License v3.0 (GPL-3.0)](https://www.gnu.org/licenses/gpl-3.0.html).
202+
203+
---
204+
Copyright (c) 2024 ZimaBlueAI Tech. Co. Ltd.

config.json

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
{
2+
"disk_monitor": {
3+
"disk_space_threshold": 107374182400,
4+
"check_interval": 60,
5+
"mount_points": ["/mnt/c"],
6+
"path_space": [
7+
{
8+
"path":"/mnt/c/DoH/",
9+
"space_threshold":[10, 26214400]
10+
}
11+
],
12+
"receiver": [{
13+
"receive_id": "ou_***",
14+
"receive_id_type": "open_id"
15+
}]
16+
},
17+
"resource_monitor": {
18+
"check_interval": 10,
19+
"iops_threshold": 26214400,
20+
"memory_threshold": 1073741824,
21+
"virtual_memory_threshold": 10737418240,
22+
"cpu_threshold": 190.0,
23+
"exclude_self_process": true,
24+
"receiver": [{
25+
"receive_id": "186***",
26+
"receive_id_type": "mobile"
27+
}]
28+
},
29+
"node_monitor": {
30+
"run_time": 10,
31+
"check_interval": 60,
32+
"node_id": "management_node",
33+
"exclude_users": ["root"],
34+
"include_users": ["ai"],
35+
"receiver": [{
36+
"receive_id": "186***",
37+
"receive_id_type": "mobile"
38+
}]
39+
},
40+
"job_monitor": {
41+
"check_interval": 10,
42+
"script_path": "/mnt/c/DoH/test/test.sh",
43+
"receiver": [{
44+
"receive_id": "ou_***",
45+
"receive_id_type": "open_id"
46+
}]
47+
},
48+
"notice_config": {
49+
"fs_config": {
50+
"app_id": "cli_***",
51+
"app_secret": "***",
52+
"receiver": [{
53+
"receive_id": "oc_***",
54+
"receive_id_type": "chat_id"
55+
}]
56+
}
57+
}
58+
}

src/disk_monitor.rs

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// use serde::Deserialize;
2+
use std::fs;
3+
use std::thread;
4+
use std::time::Duration;
5+
use sysinfo::Disks;
6+
use log::warn;
7+
8+
use doh::{
9+
Config,
10+
read_config,
11+
format_bytes,
12+
get_dir_size,
13+
notify_msg
14+
};
15+
16+
pub async fn start_disk_monitor(config_path: &str) {
17+
let config: Config = read_config(config_path);
18+
let disk_config = config.disk_monitor;
19+
20+
tokio::spawn(async move {
21+
// let mut sys = System::new_all();
22+
loop {
23+
let disks = Disks::new_with_refreshed_list();
24+
let mut msg_content = String::new();
25+
for disk in disks.list() {
26+
let mount_point = disk.mount_point().to_str().unwrap_or("Unknown").to_string();
27+
let available_space = disk.available_space();
28+
if (disk_config.mount_points.is_empty() || disk_config.mount_points.contains(&mount_point)) && available_space < disk_config.disk_space_threshold {
29+
// println!("Warning: Disk space is below threshold: {} B available", available_space);
30+
warn!(
31+
"Warning: Disk space is below threshold on disk Filesystem {:?} Mounted on {:?}: {} available",
32+
disk.name().to_str().unwrap_or("Unknown"),
33+
mount_point,
34+
format_bytes(available_space)
35+
);
36+
msg_content.push_str(&format!("Warning: Disk space is below threshold on disk Filesystem {:?} Mounted on {:?}: {} available\n",
37+
disk.name().to_str().unwrap_or("Unknown"),
38+
mount_point,
39+
format_bytes(available_space)
40+
));
41+
}
42+
// Add SMART information and disk damage alarm here
43+
}
44+
if msg_content.len() > 0 {
45+
let _ = notify_msg(&config.notice_config, &disk_config.receiver, &msg_content);
46+
msg_content.clear();
47+
}
48+
49+
// Check file increase/decrease data size
50+
for path_space in &disk_config.path_space {
51+
let path = &path_space.path;
52+
let space_threshold = path_space.space_threshold;
53+
if fs::metadata(path).is_ok() {
54+
let metadata = fs::metadata(path).unwrap();
55+
if metadata.is_dir() {
56+
let total_size = get_dir_size(path);
57+
if total_size > space_threshold.1 || total_size < space_threshold.0 {
58+
warn!(
59+
"Warning: Directory size exceeds threshold in path {:?}: {}",
60+
path,
61+
format_bytes(total_size)
62+
);
63+
msg_content.push_str(&format!("Warning: Directory size exceeds threshold in path {:?}: {}\n",
64+
path,
65+
format_bytes(total_size)
66+
));
67+
}
68+
} else {
69+
if metadata.len() > space_threshold.1 || metadata.len() < space_threshold.0 {
70+
warn!(
71+
"Warning: File size exceeds threshold in path {:?}: {}",
72+
path,
73+
format_bytes(metadata.len())
74+
);
75+
msg_content.push_str(&format!("Warning: File size exceeds threshold in path {:?}: {}\n",
76+
path,
77+
format_bytes(metadata.len())
78+
));
79+
}
80+
}
81+
}
82+
}
83+
if msg_content.len() > 0 {
84+
let _ = notify_msg(&config.notice_config, &disk_config.receiver, &msg_content).await;
85+
msg_content.clear();
86+
}
87+
thread::sleep(Duration::from_secs(disk_config.check_interval));
88+
}
89+
});
90+
}

0 commit comments

Comments
 (0)