/home/b/lab/denet/src/cpu_sampler.rs
Line | Count | Source |
1 | | //! CPU usage measurement module |
2 | | //! |
3 | | //! This module provides accurate CPU usage measurement by directly reading |
4 | | //! process statistics from the operating system. The current implementation |
5 | | //! is Linux-specific and uses the procfs crate to read from /proc. |
6 | | //! |
7 | | //! # Cross-platform Strategy |
8 | | //! |
9 | | //! We should implement platform-specific CPU measurement backends: |
10 | | //! |
11 | | //! ## Linux (Current Implementation) |
12 | | //! - Uses procfs to read /proc/[pid]/stat |
13 | | //! - Gets CPU jiffies and calculates percentage based on time delta |
14 | | //! - Matches the calculation method used by tools like 'top' and 'htop' |
15 | | //! |
16 | | //! ## macOS (Planned) |
17 | | //! - Will use host_processor_info() from libproc |
18 | | //! - Will use proc_pidinfo() to get task_info |
19 | | //! - Calculation is based on CPU ticks delta / time delta |
20 | | //! - Reference implementation: psutil's cpu_percent for macOS |
21 | | //! |
22 | | //! ## Windows (Planned) |
23 | | //! - Will use GetProcessTimes() for process CPU times |
24 | | //! - Will use GetSystemTimes() for system-wide times |
25 | | //! - Performance Counters API as fallback |
26 | | //! - Will match calculation method from psutil and Process Explorer |
27 | | //! |
28 | | //! This strategy will allow us to have accurate CPU measurements |
29 | | //! across all major platforms without relying on sysinfo. |
30 | | |
31 | | use procfs::process::Process; |
32 | | use std::collections::HashMap; |
33 | | use std::io::{Error, ErrorKind}; |
34 | | use std::time::{Duration, Instant}; |
35 | | |
36 | | /// Store CPU times for delta calculation |
37 | | /// |
38 | | /// This structure holds the CPU time values from a single measurement point, |
39 | | /// allowing us to calculate the delta between two measurements. |
40 | | #[derive(Clone, Debug)] |
41 | | struct CpuTimes { |
42 | | user: u64, // User mode CPU time in clock ticks |
43 | | system: u64, // System mode CPU time in clock ticks |
44 | | timestamp: Instant, |
45 | | } |
46 | | |
47 | | /// CpuSampler provides accurate per-process CPU usage measurement |
48 | | /// |
49 | | /// This struct tracks process CPU times and calculates usage percentages |
50 | | /// based on the delta between measurements. The calculation matches what |
51 | | /// tools like top/htop use, providing more accurate values than sysinfo. |
52 | | /// |
53 | | /// # Note |
54 | | /// |
55 | | /// On Linux, CPU percentages can exceed 100% for multi-threaded processes |
56 | | /// that utilize multiple cores. 100% represents full utilization of one core. |
57 | | pub struct CpuSampler { |
58 | | /// Previous measurements for each PID |
59 | | previous_times: HashMap<usize, CpuTimes>, |
60 | | /// System CPU info - clock ticks per second (usually 100) |
61 | | clock_ticks_per_sec: u64, |
62 | | } |
63 | | |
64 | | impl Default for CpuSampler { |
65 | 0 | fn default() -> Self { |
66 | 0 | Self::new() |
67 | 0 | } |
68 | | } |
69 | | |
70 | | impl CpuSampler { |
71 | | /// Create a new CpuSampler instance |
72 | | /// |
73 | | /// This initializes the sampler with the system's clock ticks per second |
74 | | /// value, which is needed for accurate CPU percentage calculation. |
75 | 22 | pub fn new() -> Self { |
76 | 22 | // Get clock ticks per second (usually 100) |
77 | 22 | let clock_ticks = unsafe { libc::sysconf(libc::_SC_CLK_TCK) } as u64; |
78 | 22 | |
79 | 22 | Self { |
80 | 22 | previous_times: HashMap::new(), |
81 | 22 | clock_ticks_per_sec: clock_ticks, |
82 | 22 | } |
83 | 22 | } |
84 | | |
85 | | /// Static method to get CPU usage for a single measurement |
86 | | /// This creates a temporary sampler instance for one-off measurements |
87 | 0 | pub fn get_cpu_usage_static(pid: usize) -> Result<f32, std::io::Error> { |
88 | | // For static usage, we use procfs directly |
89 | 0 | let process = Process::new(pid as i32).map_err(|e| { |
90 | 0 | std::io::Error::new( |
91 | 0 | std::io::ErrorKind::NotFound, |
92 | 0 | format!("Process not found: {e}"), |
93 | 0 | ) |
94 | 0 | })?; |
95 | | |
96 | 0 | let stat = process |
97 | 0 | .stat() |
98 | 0 | .map_err(|e| std::io::Error::other(format!("Failed to read process stat: {e}")))?; |
99 | | |
100 | | // For a single measurement, we can't calculate delta, so return approximate CPU usage |
101 | 0 | let total_time = stat.utime + stat.stime; |
102 | 0 | let _clock_ticks = unsafe { libc::sysconf(libc::_SC_CLK_TCK) } as u64; |
103 | 0 | let uptime_ticks = stat.starttime; |
104 | 0 |
|
105 | 0 | if uptime_ticks > 0 { |
106 | 0 | let cpu_usage = (total_time as f64 / uptime_ticks as f64) * 100.0; |
107 | 0 | Ok(cpu_usage.min(100.0) as f32) |
108 | | } else { |
109 | 0 | Ok(0.0) |
110 | | } |
111 | 0 | } |
112 | | |
113 | | /// Get CPU usage percentage for a process (0-100% per core) |
114 | | /// |
115 | | /// Returns the CPU usage as a percentage, where: |
116 | | /// - 0% means no CPU usage |
117 | | /// - 100% means full utilization of one CPU core |
118 | | /// - >100% possible for multi-threaded processes using multiple cores |
119 | | /// |
120 | | /// The first call for a PID will return None as it establishes a baseline. |
121 | | /// Subsequent calls will return the CPU usage since the previous call. |
122 | | /// |
123 | | /// # Arguments |
124 | | /// |
125 | | /// * `pid` - Process ID to measure |
126 | | /// |
127 | | /// # Returns |
128 | | /// |
129 | | /// * `Some(f32)` - CPU usage percentage if available |
130 | | /// * `None` - If this is the first measurement or the process doesn't exist |
131 | 45 | pub fn get_cpu_usage(&mut self, pid: usize) -> Option<f32> { |
132 | 45 | let current = Self::read_process_times(pid).ok()?0 ; |
133 | | |
134 | 45 | if let Some(previous29 ) = self.previous_times.get(&pid) { |
135 | 29 | let time_delta = current.timestamp.duration_since(previous.timestamp); |
136 | 29 | if time_delta < Duration::from_millis(10) { |
137 | 0 | return None; // Too soon for accurate measurement |
138 | 29 | } |
139 | 29 | |
140 | 29 | let cpu_delta = (current.user + current.system) - (previous.user + previous.system); |
141 | 29 | let time_delta_ticks = time_delta.as_secs_f64() * self.clock_ticks_per_sec as f64; |
142 | 29 | |
143 | 29 | // CPU usage as percentage (0-100 per core) |
144 | 29 | // No need to multiply by num_cpus - we want per-core percentage |
145 | 29 | let usage = (cpu_delta as f64 / time_delta_ticks) * 100.0; |
146 | 29 | |
147 | 29 | self.previous_times.insert(pid, current); |
148 | 29 | Some(usage as f32) |
149 | | } else { |
150 | | // First measurement - store for next time |
151 | 16 | self.previous_times.insert(pid, current); |
152 | 16 | None |
153 | | } |
154 | 45 | } |
155 | | |
156 | | /// Read CPU times using procfs crate |
157 | | /// |
158 | | /// This function reads the user and system CPU times for a process |
159 | | /// from /proc/[pid]/stat using the procfs crate. |
160 | | /// |
161 | | /// # Arguments |
162 | | /// |
163 | | /// * `pid` - Process ID to read |
164 | | /// |
165 | | /// # Returns |
166 | | /// |
167 | | /// * `Result<CpuTimes, std::io::Error>` - CPU times or error if process not found |
168 | 47 | fn read_process_times(pid: usize) -> Result<CpuTimes, std::io::Error> { |
169 | | // Use procfs to get process stat information |
170 | 47 | let process = Process::new(pid as i32).map_err(|e| { |
171 | 0 | Error::new( |
172 | 0 | ErrorKind::NotFound, |
173 | 0 | format!("Failed to access process {pid}: {e}"), |
174 | 0 | ) |
175 | 47 | })?0 ; |
176 | | |
177 | 47 | let stat = process.stat().map_err(|e| { |
178 | 0 | Error::new( |
179 | 0 | ErrorKind::InvalidData, |
180 | 0 | format!("Failed to read process stats: {e}"), |
181 | 0 | ) |
182 | 47 | })?0 ; |
183 | | |
184 | 47 | Ok(CpuTimes { |
185 | 47 | user: stat.utime, |
186 | 47 | system: stat.stime, |
187 | 47 | timestamp: Instant::now(), |
188 | 47 | }) |
189 | 47 | } |
190 | | |
191 | | /// Clean up stale entries from the CPU sampler |
192 | | /// |
193 | | /// Removes tracking data for processes that no longer exist or are no |
194 | | /// longer being monitored. This prevents memory leaks when processes |
195 | | /// terminate. |
196 | | /// |
197 | | /// # Arguments |
198 | | /// |
199 | | /// * `active_pids` - List of PIDs that are still active and should be kept |
200 | 29 | pub fn cleanup_stale_entries(&mut self, active_pids: &[usize]) { |
201 | 29 | self.previous_times |
202 | 38 | .retain(|pid, _| active_pids.contains(pid)); |
203 | 29 | } |
204 | | } |
205 | | |
206 | | #[cfg(test)] |
207 | | mod tests { |
208 | | use super::*; |
209 | | use std::process::{Child, Command}; |
210 | | |
211 | | /// Tests that CPU measurement using procfs is accurate |
212 | | /// |
213 | | /// This test creates a CPU-intensive process and verifies that |
214 | | /// our measurement shows non-zero CPU usage. |
215 | | #[test] |
216 | | #[cfg(target_os = "linux")] |
217 | 1 | fn test_cpu_measurement_accuracy() { |
218 | 1 | let mut sampler = CpuSampler::new(); |
219 | 1 | |
220 | 1 | // Spawn a CPU-burning process |
221 | 1 | let child = Command::new("sh") |
222 | 1 | .arg("-c") |
223 | 1 | .arg("for i in $(seq 1 10000000); do let j=i*i; done") |
224 | 1 | .spawn() |
225 | 1 | .expect("Failed to spawn test process"); |
226 | 1 | |
227 | 1 | let pid = child.id() as usize; |
228 | 1 | |
229 | 1 | // First measurement (baseline) |
230 | 1 | assert!(sampler.get_cpu_usage(pid).is_none()); |
231 | | |
232 | | // Wait a bit longer to ensure CPU usage is registered |
233 | 1 | std::thread::sleep(Duration::from_millis(500)); |
234 | 1 | |
235 | 1 | // Measure multiple times if needed |
236 | 1 | let mut usage = 0.0; |
237 | 1 | for _ in 0..5 { |
238 | 1 | if let Some(u) = sampler.get_cpu_usage(pid) { |
239 | 1 | usage = u; |
240 | 1 | if usage > 0.0 { |
241 | 1 | break; |
242 | 0 | } |
243 | 0 | } |
244 | 0 | std::thread::sleep(Duration::from_millis(100)); |
245 | | } |
246 | | |
247 | | // Should have some measurable CPU usage for the calculation process |
248 | 1 | assert!(usage > 0.0, "CPU usage should be greater than 0: {}"0 , usage); |
249 | | |
250 | | // Kill the child process |
251 | 1 | kill_child(child); |
252 | 1 | } |
253 | | |
254 | | /// Tests that we can read process CPU times from procfs |
255 | | /// |
256 | | /// This test reads the CPU times for the current process |
257 | | /// and verifies that we get valid data. |
258 | | #[test] |
259 | | #[cfg(target_os = "linux")] |
260 | 1 | fn test_read_process_times() { |
261 | 1 | // Try to read our own process's times |
262 | 1 | let pid = std::process::id() as usize; |
263 | 1 | |
264 | 1 | let times = CpuSampler::read_process_times(pid).expect("Failed to read process times"); |
265 | 1 | |
266 | 1 | // Print CPU times for debugging |
267 | 1 | println!( |
268 | 1 | "User CPU time: {}, System CPU time: {}", |
269 | 1 | times.user, times.system |
270 | 1 | ); |
271 | | |
272 | | // Do some CPU work to ensure non-zero values |
273 | 1.00M | for _ in 0..1000000 { |
274 | 1.00M | let _ = std::time::SystemTime::now(); |
275 | 1.00M | } |
276 | | |
277 | | // Read again after doing work |
278 | 1 | let times_after = |
279 | 1 | CpuSampler::read_process_times(pid).expect("Failed to read process times"); |
280 | 1 | |
281 | 1 | // At least one of them should have increased |
282 | 1 | assert!( |
283 | 1 | times_after.user > times.user || times_after.system > times.system0 , |
284 | 0 | "Either user or system CPU time should increase after doing work" |
285 | | ); |
286 | 1 | } |
287 | | |
288 | | /// Tests that stale PIDs are properly cleaned up |
289 | | /// |
290 | | /// This test verifies that the cleanup_stale_entries method |
291 | | /// correctly removes entries for PIDs that are no longer |
292 | | /// in the active list. |
293 | | #[test] |
294 | | #[cfg(target_os = "linux")] |
295 | 1 | fn test_cleanup_stale_entries() { |
296 | 1 | let mut sampler = CpuSampler::new(); |
297 | 1 | |
298 | 1 | // Create some test processes |
299 | 1 | let child1 = Command::new("sh") |
300 | 1 | .arg("-c") |
301 | 1 | .arg("sleep 2") |
302 | 1 | .spawn() |
303 | 1 | .expect("Failed to spawn test process"); |
304 | 1 | |
305 | 1 | let child2 = Command::new("sh") |
306 | 1 | .arg("-c") |
307 | 1 | .arg("sleep 2") |
308 | 1 | .spawn() |
309 | 1 | .expect("Failed to spawn test process"); |
310 | 1 | |
311 | 1 | let pid1 = child1.id() as usize; |
312 | 1 | let pid2 = child2.id() as usize; |
313 | 1 | |
314 | 1 | // Make initial measurements to populate the map |
315 | 1 | sampler.get_cpu_usage(pid1); |
316 | 1 | sampler.get_cpu_usage(pid2); |
317 | 1 | |
318 | 1 | assert!(sampler.previous_times.contains_key(&pid1)); |
319 | 1 | assert!(sampler.previous_times.contains_key(&pid2)); |
320 | | |
321 | | // Cleanup keeping only pid1 |
322 | 1 | sampler.cleanup_stale_entries(&[pid1]); |
323 | 1 | |
324 | 1 | assert!(sampler.previous_times.contains_key(&pid1)); |
325 | 1 | assert!(!sampler.previous_times.contains_key(&pid2)); |
326 | | |
327 | | // Kill the child processes |
328 | 1 | kill_child(child1); |
329 | 1 | kill_child(child2); |
330 | 1 | } |
331 | | |
332 | | /// Helper function to safely kill a child process |
333 | | /// |
334 | | /// This ensures that test processes are properly terminated |
335 | | /// and don't become zombies or continue running after tests. |
336 | 3 | fn kill_child(mut child: Child) { |
337 | 3 | child.kill().ok(); |
338 | 3 | child.wait().ok(); |
339 | 3 | } |
340 | | } |