Coverage Report

Created: 2025-06-23 14:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/b/lab/denet/src/cpu_sampler.rs
Line
Count
Source
1
//! CPU usage measurement module
2
//!
3
//! This module provides accurate CPU usage measurement by directly reading
4
//! process statistics from the operating system. The current implementation
5
//! is Linux-specific and uses the procfs crate to read from /proc.
6
//!
7
//! # Cross-platform Strategy
8
//!
9
//! We should implement platform-specific CPU measurement backends:
10
//!
11
//! ## Linux (Current Implementation)
12
//! - Uses procfs to read /proc/[pid]/stat
13
//! - Gets CPU jiffies and calculates percentage based on time delta
14
//! - Matches the calculation method used by tools like 'top' and 'htop'
15
//!
16
//! ## macOS (Planned)
17
//! - Will use host_processor_info() from libproc
18
//! - Will use proc_pidinfo() to get task_info
19
//! - Calculation is based on CPU ticks delta / time delta
20
//! - Reference implementation: psutil's cpu_percent for macOS
21
//!
22
//! ## Windows (Planned)
23
//! - Will use GetProcessTimes() for process CPU times
24
//! - Will use GetSystemTimes() for system-wide times
25
//! - Performance Counters API as fallback
26
//! - Will match calculation method from psutil and Process Explorer
27
//!
28
//! This strategy will allow us to have accurate CPU measurements
29
//! across all major platforms without relying on sysinfo.
30
31
use procfs::process::Process;
32
use std::collections::HashMap;
33
use std::io::{Error, ErrorKind};
34
use std::time::{Duration, Instant};
35
36
/// Store CPU times for delta calculation
37
///
38
/// This structure holds the CPU time values from a single measurement point,
39
/// allowing us to calculate the delta between two measurements.
40
#[derive(Clone, Debug)]
41
struct CpuTimes {
42
    user: u64,   // User mode CPU time in clock ticks
43
    system: u64, // System mode CPU time in clock ticks
44
    timestamp: Instant,
45
}
46
47
/// CpuSampler provides accurate per-process CPU usage measurement
48
///
49
/// This struct tracks process CPU times and calculates usage percentages
50
/// based on the delta between measurements. The calculation matches what
51
/// tools like top/htop use, providing more accurate values than sysinfo.
52
///
53
/// # Note
54
///
55
/// On Linux, CPU percentages can exceed 100% for multi-threaded processes
56
/// that utilize multiple cores. 100% represents full utilization of one core.
57
pub struct CpuSampler {
58
    /// Previous measurements for each PID
59
    previous_times: HashMap<usize, CpuTimes>,
60
    /// System CPU info - clock ticks per second (usually 100)
61
    clock_ticks_per_sec: u64,
62
}
63
64
impl Default for CpuSampler {
65
0
    fn default() -> Self {
66
0
        Self::new()
67
0
    }
68
}
69
70
impl CpuSampler {
71
    /// Create a new CpuSampler instance
72
    ///
73
    /// This initializes the sampler with the system's clock ticks per second
74
    /// value, which is needed for accurate CPU percentage calculation.
75
22
    pub fn new() -> Self {
76
22
        // Get clock ticks per second (usually 100)
77
22
        let clock_ticks = unsafe { libc::sysconf(libc::_SC_CLK_TCK) } as u64;
78
22
79
22
        Self {
80
22
            previous_times: HashMap::new(),
81
22
            clock_ticks_per_sec: clock_ticks,
82
22
        }
83
22
    }
84
85
    /// Static method to get CPU usage for a single measurement
86
    /// This creates a temporary sampler instance for one-off measurements
87
0
    pub fn get_cpu_usage_static(pid: usize) -> Result<f32, std::io::Error> {
88
        // For static usage, we use procfs directly
89
0
        let process = Process::new(pid as i32).map_err(|e| {
90
0
            std::io::Error::new(
91
0
                std::io::ErrorKind::NotFound,
92
0
                format!("Process not found: {e}"),
93
0
            )
94
0
        })?;
95
96
0
        let stat = process
97
0
            .stat()
98
0
            .map_err(|e| std::io::Error::other(format!("Failed to read process stat: {e}")))?;
99
100
        // For a single measurement, we can't calculate delta, so return approximate CPU usage
101
0
        let total_time = stat.utime + stat.stime;
102
0
        let _clock_ticks = unsafe { libc::sysconf(libc::_SC_CLK_TCK) } as u64;
103
0
        let uptime_ticks = stat.starttime;
104
0
105
0
        if uptime_ticks > 0 {
106
0
            let cpu_usage = (total_time as f64 / uptime_ticks as f64) * 100.0;
107
0
            Ok(cpu_usage.min(100.0) as f32)
108
        } else {
109
0
            Ok(0.0)
110
        }
111
0
    }
112
113
    /// Get CPU usage percentage for a process (0-100% per core)
114
    ///
115
    /// Returns the CPU usage as a percentage, where:
116
    /// - 0% means no CPU usage
117
    /// - 100% means full utilization of one CPU core
118
    /// - >100% possible for multi-threaded processes using multiple cores
119
    ///
120
    /// The first call for a PID will return None as it establishes a baseline.
121
    /// Subsequent calls will return the CPU usage since the previous call.
122
    ///
123
    /// # Arguments
124
    ///
125
    /// * `pid` - Process ID to measure
126
    ///
127
    /// # Returns
128
    ///
129
    /// * `Some(f32)` - CPU usage percentage if available
130
    /// * `None` - If this is the first measurement or the process doesn't exist
131
45
    pub fn get_cpu_usage(&mut self, pid: usize) -> Option<f32> {
132
45
        let current = Self::read_process_times(pid).ok()
?0
;
133
134
45
        if let Some(
previous29
) = self.previous_times.get(&pid) {
135
29
            let time_delta = current.timestamp.duration_since(previous.timestamp);
136
29
            if time_delta < Duration::from_millis(10) {
137
0
                return None; // Too soon for accurate measurement
138
29
            }
139
29
140
29
            let cpu_delta = (current.user + current.system) - (previous.user + previous.system);
141
29
            let time_delta_ticks = time_delta.as_secs_f64() * self.clock_ticks_per_sec as f64;
142
29
143
29
            // CPU usage as percentage (0-100 per core)
144
29
            // No need to multiply by num_cpus - we want per-core percentage
145
29
            let usage = (cpu_delta as f64 / time_delta_ticks) * 100.0;
146
29
147
29
            self.previous_times.insert(pid, current);
148
29
            Some(usage as f32)
149
        } else {
150
            // First measurement - store for next time
151
16
            self.previous_times.insert(pid, current);
152
16
            None
153
        }
154
45
    }
155
156
    /// Read CPU times using procfs crate
157
    ///
158
    /// This function reads the user and system CPU times for a process
159
    /// from /proc/[pid]/stat using the procfs crate.
160
    ///
161
    /// # Arguments
162
    ///
163
    /// * `pid` - Process ID to read
164
    ///
165
    /// # Returns
166
    ///
167
    /// * `Result<CpuTimes, std::io::Error>` - CPU times or error if process not found
168
47
    fn read_process_times(pid: usize) -> Result<CpuTimes, std::io::Error> {
169
        // Use procfs to get process stat information
170
47
        let process = Process::new(pid as i32).map_err(|e| {
171
0
            Error::new(
172
0
                ErrorKind::NotFound,
173
0
                format!("Failed to access process {pid}: {e}"),
174
0
            )
175
47
        })
?0
;
176
177
47
        let stat = process.stat().map_err(|e| {
178
0
            Error::new(
179
0
                ErrorKind::InvalidData,
180
0
                format!("Failed to read process stats: {e}"),
181
0
            )
182
47
        })
?0
;
183
184
47
        Ok(CpuTimes {
185
47
            user: stat.utime,
186
47
            system: stat.stime,
187
47
            timestamp: Instant::now(),
188
47
        })
189
47
    }
190
191
    /// Clean up stale entries from the CPU sampler
192
    ///
193
    /// Removes tracking data for processes that no longer exist or are no
194
    /// longer being monitored. This prevents memory leaks when processes
195
    /// terminate.
196
    ///
197
    /// # Arguments
198
    ///
199
    /// * `active_pids` - List of PIDs that are still active and should be kept
200
29
    pub fn cleanup_stale_entries(&mut self, active_pids: &[usize]) {
201
29
        self.previous_times
202
38
            .retain(|pid, _| active_pids.contains(pid));
203
29
    }
204
}
205
206
#[cfg(test)]
207
mod tests {
208
    use super::*;
209
    use std::process::{Child, Command};
210
211
    /// Tests that CPU measurement using procfs is accurate
212
    ///
213
    /// This test creates a CPU-intensive process and verifies that
214
    /// our measurement shows non-zero CPU usage.
215
    #[test]
216
    #[cfg(target_os = "linux")]
217
1
    fn test_cpu_measurement_accuracy() {
218
1
        let mut sampler = CpuSampler::new();
219
1
220
1
        // Spawn a CPU-burning process
221
1
        let child = Command::new("sh")
222
1
            .arg("-c")
223
1
            .arg("for i in $(seq 1 10000000); do let j=i*i; done")
224
1
            .spawn()
225
1
            .expect("Failed to spawn test process");
226
1
227
1
        let pid = child.id() as usize;
228
1
229
1
        // First measurement (baseline)
230
1
        assert!(sampler.get_cpu_usage(pid).is_none());
231
232
        // Wait a bit longer to ensure CPU usage is registered
233
1
        std::thread::sleep(Duration::from_millis(500));
234
1
235
1
        // Measure multiple times if needed
236
1
        let mut usage = 0.0;
237
1
        for _ in 0..5 {
238
1
            if let Some(u) = sampler.get_cpu_usage(pid) {
239
1
                usage = u;
240
1
                if usage > 0.0 {
241
1
                    break;
242
0
                }
243
0
            }
244
0
            std::thread::sleep(Duration::from_millis(100));
245
        }
246
247
        // Should have some measurable CPU usage for the calculation process
248
1
        assert!(usage > 0.0, 
"CPU usage should be greater than 0: {}"0
, usage);
249
250
        // Kill the child process
251
1
        kill_child(child);
252
1
    }
253
254
    /// Tests that we can read process CPU times from procfs
255
    ///
256
    /// This test reads the CPU times for the current process
257
    /// and verifies that we get valid data.
258
    #[test]
259
    #[cfg(target_os = "linux")]
260
1
    fn test_read_process_times() {
261
1
        // Try to read our own process's times
262
1
        let pid = std::process::id() as usize;
263
1
264
1
        let times = CpuSampler::read_process_times(pid).expect("Failed to read process times");
265
1
266
1
        // Print CPU times for debugging
267
1
        println!(
268
1
            "User CPU time: {}, System CPU time: {}",
269
1
            times.user, times.system
270
1
        );
271
272
        // Do some CPU work to ensure non-zero values
273
1.00M
        for _ in 0..1000000 {
274
1.00M
            let _ = std::time::SystemTime::now();
275
1.00M
        }
276
277
        // Read again after doing work
278
1
        let times_after =
279
1
            CpuSampler::read_process_times(pid).expect("Failed to read process times");
280
1
281
1
        // At least one of them should have increased
282
1
        assert!(
283
1
            times_after.user > times.user || 
times_after.system > times.system0
,
284
0
            "Either user or system CPU time should increase after doing work"
285
        );
286
1
    }
287
288
    /// Tests that stale PIDs are properly cleaned up
289
    ///
290
    /// This test verifies that the cleanup_stale_entries method
291
    /// correctly removes entries for PIDs that are no longer
292
    /// in the active list.
293
    #[test]
294
    #[cfg(target_os = "linux")]
295
1
    fn test_cleanup_stale_entries() {
296
1
        let mut sampler = CpuSampler::new();
297
1
298
1
        // Create some test processes
299
1
        let child1 = Command::new("sh")
300
1
            .arg("-c")
301
1
            .arg("sleep 2")
302
1
            .spawn()
303
1
            .expect("Failed to spawn test process");
304
1
305
1
        let child2 = Command::new("sh")
306
1
            .arg("-c")
307
1
            .arg("sleep 2")
308
1
            .spawn()
309
1
            .expect("Failed to spawn test process");
310
1
311
1
        let pid1 = child1.id() as usize;
312
1
        let pid2 = child2.id() as usize;
313
1
314
1
        // Make initial measurements to populate the map
315
1
        sampler.get_cpu_usage(pid1);
316
1
        sampler.get_cpu_usage(pid2);
317
1
318
1
        assert!(sampler.previous_times.contains_key(&pid1));
319
1
        assert!(sampler.previous_times.contains_key(&pid2));
320
321
        // Cleanup keeping only pid1
322
1
        sampler.cleanup_stale_entries(&[pid1]);
323
1
324
1
        assert!(sampler.previous_times.contains_key(&pid1));
325
1
        assert!(!sampler.previous_times.contains_key(&pid2));
326
327
        // Kill the child processes
328
1
        kill_child(child1);
329
1
        kill_child(child2);
330
1
    }
331
332
    /// Helper function to safely kill a child process
333
    ///
334
    /// This ensures that test processes are properly terminated
335
    /// and don't become zombies or continue running after tests.
336
3
    fn kill_child(mut child: Child) {
337
3
        child.kill().ok();
338
3
        child.wait().ok();
339
3
    }
340
}