std/sys/thread/
unix.rs

1#[cfg(not(any(
2    target_env = "newlib",
3    target_os = "l4re",
4    target_os = "emscripten",
5    target_os = "redox",
6    target_os = "hurd",
7    target_os = "aix",
8)))]
9use crate::ffi::CStr;
10use crate::mem::{self, DropGuard, ManuallyDrop};
11use crate::num::NonZero;
12#[cfg(all(target_os = "linux", target_env = "gnu"))]
13use crate::sys::weak::dlsym;
14#[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto",))]
15use crate::sys::weak::weak;
16use crate::sys::{os, stack_overflow};
17use crate::time::Duration;
18use crate::{cmp, io, ptr};
19#[cfg(not(any(
20    target_os = "l4re",
21    target_os = "vxworks",
22    target_os = "espidf",
23    target_os = "nuttx"
24)))]
25pub const DEFAULT_MIN_STACK_SIZE: usize = 2 * 1024 * 1024;
26#[cfg(target_os = "l4re")]
27pub const DEFAULT_MIN_STACK_SIZE: usize = 1024 * 1024;
28#[cfg(target_os = "vxworks")]
29pub const DEFAULT_MIN_STACK_SIZE: usize = 256 * 1024;
30#[cfg(any(target_os = "espidf", target_os = "nuttx"))]
31pub const DEFAULT_MIN_STACK_SIZE: usize = 0; // 0 indicates that the stack size configured in the ESP-IDF/NuttX menuconfig system should be used
32
33struct ThreadData {
34    name: Option<Box<str>>,
35    f: Box<dyn FnOnce()>,
36}
37
38pub struct Thread {
39    id: libc::pthread_t,
40}
41
42// Some platforms may have pthread_t as a pointer in which case we still want
43// a thread to be Send/Sync
44unsafe impl Send for Thread {}
45unsafe impl Sync for Thread {}
46
47impl Thread {
48    // unsafe: see thread::Builder::spawn_unchecked for safety requirements
49    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
50    pub unsafe fn new(
51        stack: usize,
52        name: Option<&str>,
53        f: Box<dyn FnOnce()>,
54    ) -> io::Result<Thread> {
55        let data = Box::new(ThreadData { name: name.map(Box::from), f });
56
57        let mut attr: mem::MaybeUninit<libc::pthread_attr_t> = mem::MaybeUninit::uninit();
58        assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0);
59        let mut attr = DropGuard::new(&mut attr, |attr| {
60            assert_eq!(libc::pthread_attr_destroy(attr.as_mut_ptr()), 0)
61        });
62
63        #[cfg(any(target_os = "espidf", target_os = "nuttx"))]
64        if stack > 0 {
65            // Only set the stack if a non-zero value is passed
66            // 0 is used as an indication that the default stack size configured in the ESP-IDF/NuttX menuconfig system should be used
67            assert_eq!(
68                libc::pthread_attr_setstacksize(
69                    attr.as_mut_ptr(),
70                    cmp::max(stack, min_stack_size(attr.as_ptr()))
71                ),
72                0
73            );
74        }
75
76        #[cfg(not(any(target_os = "espidf", target_os = "nuttx")))]
77        {
78            let stack_size = cmp::max(stack, min_stack_size(attr.as_ptr()));
79
80            match libc::pthread_attr_setstacksize(attr.as_mut_ptr(), stack_size) {
81                0 => {}
82                n => {
83                    assert_eq!(n, libc::EINVAL);
84                    // EINVAL means |stack_size| is either too small or not a
85                    // multiple of the system page size. Because it's definitely
86                    // >= PTHREAD_STACK_MIN, it must be an alignment issue.
87                    // Round up to the nearest page and try again.
88                    let page_size = os::page_size();
89                    let stack_size =
90                        (stack_size + page_size - 1) & (-(page_size as isize - 1) as usize - 1);
91
92                    // Some libc implementations, e.g. musl, place an upper bound
93                    // on the stack size, in which case we can only gracefully return
94                    // an error here.
95                    if libc::pthread_attr_setstacksize(attr.as_mut_ptr(), stack_size) != 0 {
96                        return Err(io::const_error!(
97                            io::ErrorKind::InvalidInput,
98                            "invalid stack size"
99                        ));
100                    }
101                }
102            };
103        }
104
105        let data = Box::into_raw(data);
106        let mut native: libc::pthread_t = mem::zeroed();
107        let ret = libc::pthread_create(&mut native, attr.as_ptr(), thread_start, data as *mut _);
108        return if ret == 0 {
109            Ok(Thread { id: native })
110        } else {
111            // The thread failed to start and as a result `data` was not consumed.
112            // Therefore, it is safe to reconstruct the box so that it gets deallocated.
113            drop(Box::from_raw(data));
114            Err(io::Error::from_raw_os_error(ret))
115        };
116
117        extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void {
118            unsafe {
119                let data = Box::from_raw(data as *mut ThreadData);
120                // Next, set up our stack overflow handler which may get triggered if we run
121                // out of stack.
122                let _handler = stack_overflow::Handler::new(data.name);
123                // Finally, let's run some code.
124                (data.f)();
125            }
126            ptr::null_mut()
127        }
128    }
129
130    pub fn join(self) {
131        let id = self.into_id();
132        let ret = unsafe { libc::pthread_join(id, ptr::null_mut()) };
133        assert!(ret == 0, "failed to join thread: {}", io::Error::from_raw_os_error(ret));
134    }
135
136    pub fn id(&self) -> libc::pthread_t {
137        self.id
138    }
139
140    pub fn into_id(self) -> libc::pthread_t {
141        ManuallyDrop::new(self).id
142    }
143}
144
145impl Drop for Thread {
146    fn drop(&mut self) {
147        let ret = unsafe { libc::pthread_detach(self.id) };
148        debug_assert_eq!(ret, 0);
149    }
150}
151
152pub fn available_parallelism() -> io::Result<NonZero<usize>> {
153    cfg_select! {
154        any(
155            target_os = "android",
156            target_os = "emscripten",
157            target_os = "fuchsia",
158            target_os = "hurd",
159            target_os = "linux",
160            target_os = "aix",
161            target_vendor = "apple",
162            target_os = "cygwin",
163        ) => {
164            #[allow(unused_assignments)]
165            #[allow(unused_mut)]
166            let mut quota = usize::MAX;
167
168            #[cfg(any(target_os = "android", target_os = "linux"))]
169            {
170                quota = cgroups::quota().max(1);
171                let mut set: libc::cpu_set_t = unsafe { mem::zeroed() };
172                unsafe {
173                    if libc::sched_getaffinity(0, size_of::<libc::cpu_set_t>(), &mut set) == 0 {
174                        let count = libc::CPU_COUNT(&set) as usize;
175                        let count = count.min(quota);
176
177                        // According to sched_getaffinity's API it should always be non-zero, but
178                        // some old MIPS kernels were buggy and zero-initialized the mask if
179                        // none was explicitly set.
180                        // In that case we use the sysconf fallback.
181                        if let Some(count) = NonZero::new(count) {
182                            return Ok(count)
183                        }
184                    }
185                }
186            }
187            match unsafe { libc::sysconf(libc::_SC_NPROCESSORS_ONLN) } {
188                -1 => Err(io::Error::last_os_error()),
189                0 => Err(io::Error::UNKNOWN_THREAD_COUNT),
190                cpus => {
191                    let count = cpus as usize;
192                    // Cover the unusual situation where we were able to get the quota but not the affinity mask
193                    let count = count.min(quota);
194                    Ok(unsafe { NonZero::new_unchecked(count) })
195                }
196            }
197        }
198        any(
199           target_os = "freebsd",
200           target_os = "dragonfly",
201           target_os = "openbsd",
202           target_os = "netbsd",
203        ) => {
204            use crate::ptr;
205
206            #[cfg(target_os = "freebsd")]
207            {
208                let mut set: libc::cpuset_t = unsafe { mem::zeroed() };
209                unsafe {
210                    if libc::cpuset_getaffinity(
211                        libc::CPU_LEVEL_WHICH,
212                        libc::CPU_WHICH_PID,
213                        -1,
214                        size_of::<libc::cpuset_t>(),
215                        &mut set,
216                    ) == 0 {
217                        let count = libc::CPU_COUNT(&set) as usize;
218                        if count > 0 {
219                            return Ok(NonZero::new_unchecked(count));
220                        }
221                    }
222                }
223            }
224
225            #[cfg(target_os = "netbsd")]
226            {
227                unsafe {
228                    let set = libc::_cpuset_create();
229                    if !set.is_null() {
230                        let mut count: usize = 0;
231                        if libc::pthread_getaffinity_np(libc::pthread_self(), libc::_cpuset_size(set), set) == 0 {
232                            for i in 0..libc::cpuid_t::MAX {
233                                match libc::_cpuset_isset(i, set) {
234                                    -1 => break,
235                                    0 => continue,
236                                    _ => count = count + 1,
237                                }
238                            }
239                        }
240                        libc::_cpuset_destroy(set);
241                        if let Some(count) = NonZero::new(count) {
242                            return Ok(count);
243                        }
244                    }
245                }
246            }
247
248            let mut cpus: libc::c_uint = 0;
249            let mut cpus_size = size_of_val(&cpus);
250
251            unsafe {
252                cpus = libc::sysconf(libc::_SC_NPROCESSORS_ONLN) as libc::c_uint;
253            }
254
255            // Fallback approach in case of errors or no hardware threads.
256            if cpus < 1 {
257                let mut mib = [libc::CTL_HW, libc::HW_NCPU, 0, 0];
258                let res = unsafe {
259                    libc::sysctl(
260                        mib.as_mut_ptr(),
261                        2,
262                        (&raw mut cpus) as *mut _,
263                        (&raw mut cpus_size) as *mut _,
264                        ptr::null_mut(),
265                        0,
266                    )
267                };
268
269                // Handle errors if any.
270                if res == -1 {
271                    return Err(io::Error::last_os_error());
272                } else if cpus == 0 {
273                    return Err(io::Error::UNKNOWN_THREAD_COUNT);
274                }
275            }
276
277            Ok(unsafe { NonZero::new_unchecked(cpus as usize) })
278        }
279        target_os = "nto" => {
280            unsafe {
281                use libc::_syspage_ptr;
282                if _syspage_ptr.is_null() {
283                    Err(io::const_error!(io::ErrorKind::NotFound, "no syspage available"))
284                } else {
285                    let cpus = (*_syspage_ptr).num_cpu;
286                    NonZero::new(cpus as usize)
287                        .ok_or(io::Error::UNKNOWN_THREAD_COUNT)
288                }
289            }
290        }
291        any(target_os = "solaris", target_os = "illumos") => {
292            let mut cpus = 0u32;
293            if unsafe { libc::pset_info(libc::PS_MYID, core::ptr::null_mut(), &mut cpus, core::ptr::null_mut()) } != 0 {
294                return Err(io::Error::UNKNOWN_THREAD_COUNT);
295            }
296            Ok(unsafe { NonZero::new_unchecked(cpus as usize) })
297        }
298        target_os = "haiku" => {
299            // system_info cpu_count field gets the static data set at boot time with `smp_set_num_cpus`
300            // `get_system_info` calls then `smp_get_num_cpus`
301            unsafe {
302                let mut sinfo: libc::system_info = crate::mem::zeroed();
303                let res = libc::get_system_info(&mut sinfo);
304
305                if res != libc::B_OK {
306                    return Err(io::Error::UNKNOWN_THREAD_COUNT);
307                }
308
309                Ok(NonZero::new_unchecked(sinfo.cpu_count as usize))
310            }
311        }
312        target_os = "vxworks" => {
313            // Note: there is also `vxCpuConfiguredGet`, closer to _SC_NPROCESSORS_CONF
314            // expectations than the actual cores availability.
315
316            // SAFETY: `vxCpuEnabledGet` always fetches a mask with at least one bit set
317            unsafe{
318                let set = libc::vxCpuEnabledGet();
319                Ok(NonZero::new_unchecked(set.count_ones() as usize))
320            }
321        }
322        _ => {
323            // FIXME: implement on Redox, l4re
324            Err(io::const_error!(io::ErrorKind::Unsupported, "getting the number of hardware threads is not supported on the target platform"))
325        }
326    }
327}
328
329pub fn current_os_id() -> Option<u64> {
330    // Most Unix platforms have a way to query an integer ID of the current thread, all with
331    // slightly different spellings.
332    //
333    // The OS thread ID is used rather than `pthread_self` so as to match what will be displayed
334    // for process inspection (debuggers, trace, `top`, etc.).
335    cfg_select! {
336        // Most platforms have a function returning a `pid_t` or int, which is an `i32`.
337        any(target_os = "android", target_os = "linux") => {
338            use crate::sys::pal::weak::syscall;
339
340            // `libc::gettid` is only available on glibc 2.30+, but the syscall is available
341            // since Linux 2.4.11.
342            syscall!(fn gettid() -> libc::pid_t;);
343
344            // SAFETY: FFI call with no preconditions.
345            let id: libc::pid_t = unsafe { gettid() };
346            Some(id as u64)
347        }
348        target_os = "nto" => {
349            // SAFETY: FFI call with no preconditions.
350            let id: libc::pid_t = unsafe { libc::gettid() };
351            Some(id as u64)
352        }
353        target_os = "openbsd" => {
354            // SAFETY: FFI call with no preconditions.
355            let id: libc::pid_t = unsafe { libc::getthrid() };
356            Some(id as u64)
357        }
358        target_os = "freebsd" => {
359            // SAFETY: FFI call with no preconditions.
360            let id: libc::c_int = unsafe { libc::pthread_getthreadid_np() };
361            Some(id as u64)
362        }
363        target_os = "netbsd" => {
364            // SAFETY: FFI call with no preconditions.
365            let id: libc::lwpid_t = unsafe { libc::_lwp_self() };
366            Some(id as u64)
367        }
368        any(target_os = "illumos", target_os = "solaris") => {
369            // On Illumos and Solaris, the `pthread_t` is the same as the OS thread ID.
370            // SAFETY: FFI call with no preconditions.
371            let id: libc::pthread_t = unsafe { libc::pthread_self() };
372            Some(id as u64)
373        }
374        target_vendor = "apple" => {
375            // Apple allows querying arbitrary thread IDs, `thread=NULL` queries the current thread.
376            let mut id = 0u64;
377            // SAFETY: `thread_id` is a valid pointer, no other preconditions.
378            let status: libc::c_int = unsafe { libc::pthread_threadid_np(0, &mut id) };
379            if status == 0 {
380                Some(id)
381            } else {
382                None
383            }
384        }
385        // Other platforms don't have an OS thread ID or don't have a way to access it.
386        _ => None,
387    }
388}
389
390#[cfg(any(
391    target_os = "linux",
392    target_os = "nto",
393    target_os = "solaris",
394    target_os = "illumos",
395    target_os = "vxworks",
396    target_os = "cygwin",
397    target_vendor = "apple",
398))]
399fn truncate_cstr<const MAX_WITH_NUL: usize>(cstr: &CStr) -> [libc::c_char; MAX_WITH_NUL] {
400    let mut result = [0; MAX_WITH_NUL];
401    for (src, dst) in cstr.to_bytes().iter().zip(&mut result[..MAX_WITH_NUL - 1]) {
402        *dst = *src as libc::c_char;
403    }
404    result
405}
406
407#[cfg(target_os = "android")]
408pub fn set_name(name: &CStr) {
409    const PR_SET_NAME: libc::c_int = 15;
410    unsafe {
411        let res = libc::prctl(
412            PR_SET_NAME,
413            name.as_ptr(),
414            0 as libc::c_ulong,
415            0 as libc::c_ulong,
416            0 as libc::c_ulong,
417        );
418        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
419        debug_assert_eq!(res, 0);
420    }
421}
422
423#[cfg(any(
424    target_os = "linux",
425    target_os = "freebsd",
426    target_os = "dragonfly",
427    target_os = "nuttx",
428    target_os = "cygwin"
429))]
430pub fn set_name(name: &CStr) {
431    unsafe {
432        cfg_select! {
433            any(target_os = "linux", target_os = "cygwin") => {
434                // Linux and Cygwin limits the allowed length of the name.
435                const TASK_COMM_LEN: usize = 16;
436                let name = truncate_cstr::<{ TASK_COMM_LEN }>(name);
437            }
438            _ => {
439                // FreeBSD, DragonFly BSD and NuttX do not enforce length limits.
440            }
441        };
442        // Available since glibc 2.12, musl 1.1.16, and uClibc 1.0.20 for Linux,
443        // FreeBSD 12.2 and 13.0, and DragonFly BSD 6.0.
444        let res = libc::pthread_setname_np(libc::pthread_self(), name.as_ptr());
445        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
446        debug_assert_eq!(res, 0);
447    }
448}
449
450#[cfg(target_os = "openbsd")]
451pub fn set_name(name: &CStr) {
452    unsafe {
453        libc::pthread_set_name_np(libc::pthread_self(), name.as_ptr());
454    }
455}
456
457#[cfg(target_vendor = "apple")]
458pub fn set_name(name: &CStr) {
459    unsafe {
460        let name = truncate_cstr::<{ libc::MAXTHREADNAMESIZE }>(name);
461        let res = libc::pthread_setname_np(name.as_ptr());
462        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
463        debug_assert_eq!(res, 0);
464    }
465}
466
467#[cfg(target_os = "netbsd")]
468pub fn set_name(name: &CStr) {
469    unsafe {
470        let res = libc::pthread_setname_np(
471            libc::pthread_self(),
472            c"%s".as_ptr(),
473            name.as_ptr() as *mut libc::c_void,
474        );
475        debug_assert_eq!(res, 0);
476    }
477}
478
479#[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto"))]
480pub fn set_name(name: &CStr) {
481    weak!(
482        fn pthread_setname_np(thread: libc::pthread_t, name: *const libc::c_char) -> libc::c_int;
483    );
484
485    if let Some(f) = pthread_setname_np.get() {
486        #[cfg(target_os = "nto")]
487        const THREAD_NAME_MAX: usize = libc::_NTO_THREAD_NAME_MAX as usize;
488        #[cfg(any(target_os = "solaris", target_os = "illumos"))]
489        const THREAD_NAME_MAX: usize = 32;
490
491        let name = truncate_cstr::<{ THREAD_NAME_MAX }>(name);
492        let res = unsafe { f(libc::pthread_self(), name.as_ptr()) };
493        debug_assert_eq!(res, 0);
494    }
495}
496
497#[cfg(target_os = "fuchsia")]
498pub fn set_name(name: &CStr) {
499    use crate::sys::pal::fuchsia::*;
500    unsafe {
501        zx_object_set_property(
502            zx_thread_self(),
503            ZX_PROP_NAME,
504            name.as_ptr() as *const libc::c_void,
505            name.to_bytes().len(),
506        );
507    }
508}
509
510#[cfg(target_os = "haiku")]
511pub fn set_name(name: &CStr) {
512    unsafe {
513        let thread_self = libc::find_thread(ptr::null_mut());
514        let res = libc::rename_thread(thread_self, name.as_ptr());
515        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
516        debug_assert_eq!(res, libc::B_OK);
517    }
518}
519
520#[cfg(target_os = "vxworks")]
521pub fn set_name(name: &CStr) {
522    let mut name = truncate_cstr::<{ (libc::VX_TASK_RENAME_LENGTH - 1) as usize }>(name);
523    let res = unsafe { libc::taskNameSet(libc::taskIdSelf(), name.as_mut_ptr()) };
524    debug_assert_eq!(res, libc::OK);
525}
526
527#[cfg(not(target_os = "espidf"))]
528pub fn sleep(dur: Duration) {
529    let mut secs = dur.as_secs();
530    let mut nsecs = dur.subsec_nanos() as _;
531
532    // If we're awoken with a signal then the return value will be -1 and
533    // nanosleep will fill in `ts` with the remaining time.
534    unsafe {
535        while secs > 0 || nsecs > 0 {
536            let mut ts = libc::timespec {
537                tv_sec: cmp::min(libc::time_t::MAX as u64, secs) as libc::time_t,
538                tv_nsec: nsecs,
539            };
540            secs -= ts.tv_sec as u64;
541            let ts_ptr = &raw mut ts;
542            if libc::nanosleep(ts_ptr, ts_ptr) == -1 {
543                assert_eq!(os::errno(), libc::EINTR);
544                secs += ts.tv_sec as u64;
545                nsecs = ts.tv_nsec;
546            } else {
547                nsecs = 0;
548            }
549        }
550    }
551}
552
553#[cfg(target_os = "espidf")]
554pub fn sleep(dur: Duration) {
555    // ESP-IDF does not have `nanosleep`, so we use `usleep` instead.
556    // As per the documentation of `usleep`, it is expected to support
557    // sleep times as big as at least up to 1 second.
558    //
559    // ESP-IDF does support almost up to `u32::MAX`, but due to a potential integer overflow in its
560    // `usleep` implementation
561    // (https://github.com/espressif/esp-idf/blob/d7ca8b94c852052e3bc33292287ef4dd62c9eeb1/components/newlib/time.c#L210),
562    // we limit the sleep time to the maximum one that would not cause the underlying `usleep` implementation to overflow
563    // (`portTICK_PERIOD_MS` can be anything between 1 to 1000, and is 10 by default).
564    const MAX_MICROS: u32 = u32::MAX - 1_000_000 - 1;
565
566    // Add any nanoseconds smaller than a microsecond as an extra microsecond
567    // so as to comply with the `std::thread::sleep` contract which mandates
568    // implementations to sleep for _at least_ the provided `dur`.
569    // We can't overflow `micros` as it is a `u128`, while `Duration` is a pair of
570    // (`u64` secs, `u32` nanos), where the nanos are strictly smaller than 1 second
571    // (i.e. < 1_000_000_000)
572    let mut micros = dur.as_micros() + if dur.subsec_nanos() % 1_000 > 0 { 1 } else { 0 };
573
574    while micros > 0 {
575        let st = if micros > MAX_MICROS as u128 { MAX_MICROS } else { micros as u32 };
576        unsafe {
577            libc::usleep(st);
578        }
579
580        micros -= st as u128;
581    }
582}
583
584// Any unix that has clock_nanosleep
585// If this list changes update the MIRI chock_nanosleep shim
586#[cfg(any(
587    target_os = "freebsd",
588    target_os = "netbsd",
589    target_os = "linux",
590    target_os = "android",
591    target_os = "solaris",
592    target_os = "illumos",
593    target_os = "dragonfly",
594    target_os = "hurd",
595    target_os = "fuchsia",
596    target_os = "vxworks",
597))]
598pub fn sleep_until(deadline: crate::time::Instant) {
599    use crate::time::Instant;
600
601    let Some(ts) = deadline.into_inner().into_timespec().to_timespec() else {
602        // The deadline is further in the future then can be passed to
603        // clock_nanosleep. We have to use Self::sleep instead. This might
604        // happen on 32 bit platforms, especially closer to 2038.
605        let now = Instant::now();
606        if let Some(delay) = deadline.checked_duration_since(now) {
607            sleep(delay);
608        }
609        return;
610    };
611
612    unsafe {
613        // When we get interrupted (res = EINTR) call clock_nanosleep again
614        loop {
615            let res = libc::clock_nanosleep(
616                crate::sys::time::Instant::CLOCK_ID,
617                libc::TIMER_ABSTIME,
618                &ts,
619                core::ptr::null_mut(), // not required with TIMER_ABSTIME
620            );
621
622            if res == 0 {
623                break;
624            } else {
625                assert_eq!(
626                    res,
627                    libc::EINTR,
628                    "timespec is in range,
629                         clockid is valid and kernel should support it"
630                );
631            }
632        }
633    }
634}
635
636pub fn yield_now() {
637    let ret = unsafe { libc::sched_yield() };
638    debug_assert_eq!(ret, 0);
639}
640
641#[cfg(any(target_os = "android", target_os = "linux"))]
642mod cgroups {
643    //! Currently not covered
644    //! * cgroup v2 in non-standard mountpoints
645    //! * paths containing control characters or spaces, since those would be escaped in procfs
646    //!   output and we don't unescape
647
648    use crate::borrow::Cow;
649    use crate::ffi::OsString;
650    use crate::fs::{File, exists};
651    use crate::io::{BufRead, Read};
652    use crate::os::unix::ffi::OsStringExt;
653    use crate::path::{Path, PathBuf};
654    use crate::str::from_utf8;
655
656    #[derive(PartialEq)]
657    enum Cgroup {
658        V1,
659        V2,
660    }
661
662    /// Returns cgroup CPU quota in core-equivalents, rounded down or usize::MAX if the quota cannot
663    /// be determined or is not set.
664    pub(super) fn quota() -> usize {
665        let mut quota = usize::MAX;
666        if cfg!(miri) {
667            // Attempting to open a file fails under default flags due to isolation.
668            // And Miri does not have parallelism anyway.
669            return quota;
670        }
671
672        let _: Option<()> = try {
673            let mut buf = Vec::with_capacity(128);
674            // find our place in the cgroup hierarchy
675            File::open("/proc/self/cgroup").ok()?.read_to_end(&mut buf).ok()?;
676            let (cgroup_path, version) =
677                buf.split(|&c| c == b'\n').fold(None, |previous, line| {
678                    let mut fields = line.splitn(3, |&c| c == b':');
679                    // 2nd field is a list of controllers for v1 or empty for v2
680                    let version = match fields.nth(1) {
681                        Some(b"") => Cgroup::V2,
682                        Some(controllers)
683                            if from_utf8(controllers)
684                                .is_ok_and(|c| c.split(',').any(|c| c == "cpu")) =>
685                        {
686                            Cgroup::V1
687                        }
688                        _ => return previous,
689                    };
690
691                    // already-found v1 trumps v2 since it explicitly specifies its controllers
692                    if previous.is_some() && version == Cgroup::V2 {
693                        return previous;
694                    }
695
696                    let path = fields.last()?;
697                    // skip leading slash
698                    Some((path[1..].to_owned(), version))
699                })?;
700            let cgroup_path = PathBuf::from(OsString::from_vec(cgroup_path));
701
702            quota = match version {
703                Cgroup::V1 => quota_v1(cgroup_path),
704                Cgroup::V2 => quota_v2(cgroup_path),
705            };
706        };
707
708        quota
709    }
710
711    fn quota_v2(group_path: PathBuf) -> usize {
712        let mut quota = usize::MAX;
713
714        let mut path = PathBuf::with_capacity(128);
715        let mut read_buf = String::with_capacity(20);
716
717        // standard mount location defined in file-hierarchy(7) manpage
718        let cgroup_mount = "/sys/fs/cgroup";
719
720        path.push(cgroup_mount);
721        path.push(&group_path);
722
723        path.push("cgroup.controllers");
724
725        // skip if we're not looking at cgroup2
726        if matches!(exists(&path), Err(_) | Ok(false)) {
727            return usize::MAX;
728        };
729
730        path.pop();
731
732        let _: Option<()> = try {
733            while path.starts_with(cgroup_mount) {
734                path.push("cpu.max");
735
736                read_buf.clear();
737
738                if File::open(&path).and_then(|mut f| f.read_to_string(&mut read_buf)).is_ok() {
739                    let raw_quota = read_buf.lines().next()?;
740                    let mut raw_quota = raw_quota.split(' ');
741                    let limit = raw_quota.next()?;
742                    let period = raw_quota.next()?;
743                    match (limit.parse::<usize>(), period.parse::<usize>()) {
744                        (Ok(limit), Ok(period)) if period > 0 => {
745                            quota = quota.min(limit / period);
746                        }
747                        _ => {}
748                    }
749                }
750
751                path.pop(); // pop filename
752                path.pop(); // pop dir
753            }
754        };
755
756        quota
757    }
758
759    fn quota_v1(group_path: PathBuf) -> usize {
760        let mut quota = usize::MAX;
761        let mut path = PathBuf::with_capacity(128);
762        let mut read_buf = String::with_capacity(20);
763
764        // Hardcode commonly used locations mentioned in the cgroups(7) manpage
765        // if that doesn't work scan mountinfo and adjust `group_path` for bind-mounts
766        let mounts: &[fn(&Path) -> Option<(_, &Path)>] = &[
767            |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu"), p)),
768            |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu,cpuacct"), p)),
769            // this can be expensive on systems with tons of mountpoints
770            // but we only get to this point when /proc/self/cgroups explicitly indicated
771            // this process belongs to a cpu-controller cgroup v1 and the defaults didn't work
772            find_mountpoint,
773        ];
774
775        for mount in mounts {
776            let Some((mount, group_path)) = mount(&group_path) else { continue };
777
778            path.clear();
779            path.push(mount.as_ref());
780            path.push(&group_path);
781
782            // skip if we guessed the mount incorrectly
783            if matches!(exists(&path), Err(_) | Ok(false)) {
784                continue;
785            }
786
787            while path.starts_with(mount.as_ref()) {
788                let mut parse_file = |name| {
789                    path.push(name);
790                    read_buf.clear();
791
792                    let f = File::open(&path);
793                    path.pop(); // restore buffer before any early returns
794                    f.ok()?.read_to_string(&mut read_buf).ok()?;
795                    let parsed = read_buf.trim().parse::<usize>().ok()?;
796
797                    Some(parsed)
798                };
799
800                let limit = parse_file("cpu.cfs_quota_us");
801                let period = parse_file("cpu.cfs_period_us");
802
803                match (limit, period) {
804                    (Some(limit), Some(period)) if period > 0 => quota = quota.min(limit / period),
805                    _ => {}
806                }
807
808                path.pop();
809            }
810
811            // we passed the try_exists above so we should have traversed the correct hierarchy
812            // when reaching this line
813            break;
814        }
815
816        quota
817    }
818
819    /// Scan mountinfo for cgroup v1 mountpoint with a cpu controller
820    ///
821    /// If the cgroupfs is a bind mount then `group_path` is adjusted to skip
822    /// over the already-included prefix
823    fn find_mountpoint(group_path: &Path) -> Option<(Cow<'static, str>, &Path)> {
824        let mut reader = File::open_buffered("/proc/self/mountinfo").ok()?;
825        let mut line = String::with_capacity(256);
826        loop {
827            line.clear();
828            if reader.read_line(&mut line).ok()? == 0 {
829                break;
830            }
831
832            let line = line.trim();
833            let mut items = line.split(' ');
834
835            let sub_path = items.nth(3)?;
836            let mount_point = items.next()?;
837            let mount_opts = items.next_back()?;
838            let filesystem_type = items.nth_back(1)?;
839
840            if filesystem_type != "cgroup" || !mount_opts.split(',').any(|opt| opt == "cpu") {
841                // not a cgroup / not a cpu-controller
842                continue;
843            }
844
845            let sub_path = Path::new(sub_path).strip_prefix("/").ok()?;
846
847            if !group_path.starts_with(sub_path) {
848                // this is a bind-mount and the bound subdirectory
849                // does not contain the cgroup this process belongs to
850                continue;
851            }
852
853            let trimmed_group_path = group_path.strip_prefix(sub_path).ok()?;
854
855            return Some((Cow::Owned(mount_point.to_owned()), trimmed_group_path));
856        }
857
858        None
859    }
860}
861
862// glibc >= 2.15 has a __pthread_get_minstack() function that returns
863// PTHREAD_STACK_MIN plus bytes needed for thread-local storage.
864// We need that information to avoid blowing up when a small stack
865// is created in an application with big thread-local storage requirements.
866// See #6233 for rationale and details.
867#[cfg(all(target_os = "linux", target_env = "gnu"))]
868unsafe fn min_stack_size(attr: *const libc::pthread_attr_t) -> usize {
869    // We use dlsym to avoid an ELF version dependency on GLIBC_PRIVATE. (#23628)
870    // We shouldn't really be using such an internal symbol, but there's currently
871    // no other way to account for the TLS size.
872    dlsym!(
873        fn __pthread_get_minstack(attr: *const libc::pthread_attr_t) -> libc::size_t;
874    );
875
876    match __pthread_get_minstack.get() {
877        None => libc::PTHREAD_STACK_MIN,
878        Some(f) => unsafe { f(attr) },
879    }
880}
881
882// No point in looking up __pthread_get_minstack() on non-glibc platforms.
883#[cfg(all(
884    not(all(target_os = "linux", target_env = "gnu")),
885    not(any(target_os = "netbsd", target_os = "nuttx"))
886))]
887unsafe fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
888    libc::PTHREAD_STACK_MIN
889}
890
891#[cfg(any(target_os = "netbsd", target_os = "nuttx"))]
892unsafe fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
893    static STACK: crate::sync::OnceLock<usize> = crate::sync::OnceLock::new();
894
895    *STACK.get_or_init(|| {
896        let mut stack = unsafe { libc::sysconf(libc::_SC_THREAD_STACK_MIN) };
897        if stack < 0 {
898            stack = 2048; // just a guess
899        }
900
901        stack as usize
902    })
903}