Skip to main content

std/sys/thread/
unix.rs

1#[cfg(not(any(
2    target_env = "newlib",
3    target_os = "l4re",
4    target_os = "emscripten",
5    target_os = "redox",
6    target_os = "hurd",
7    target_os = "aix",
8    target_os = "wasi",
9)))]
10use crate::ffi::CStr;
11use crate::mem::{self, DropGuard, ManuallyDrop};
12use crate::num::NonZero;
13#[cfg(all(target_os = "linux", target_env = "gnu"))]
14use crate::sys::weak::dlsym;
15#[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto",))]
16use crate::sys::weak::weak;
17use crate::thread::ThreadInit;
18use crate::time::Duration;
19use crate::{cmp, io, ptr, sys};
20#[cfg(not(any(
21    target_os = "l4re",
22    target_os = "vxworks",
23    target_os = "espidf",
24    target_os = "nuttx"
25)))]
26pub const DEFAULT_MIN_STACK_SIZE: usize = 2 * 1024 * 1024;
27#[cfg(target_os = "l4re")]
28pub const DEFAULT_MIN_STACK_SIZE: usize = 1024 * 1024;
29#[cfg(target_os = "vxworks")]
30pub const DEFAULT_MIN_STACK_SIZE: usize = 256 * 1024;
31#[cfg(any(target_os = "espidf", target_os = "nuttx"))]
32pub const DEFAULT_MIN_STACK_SIZE: usize = 0; // 0 indicates that the stack size configured in the ESP-IDF/NuttX menuconfig system should be used
33
34pub struct Thread {
35    id: libc::pthread_t,
36}
37
38// Some platforms may have pthread_t as a pointer in which case we still want
39// a thread to be Send/Sync
40unsafe impl Send for Thread {}
41unsafe impl Sync for Thread {}
42
43impl Thread {
44    // unsafe: see thread::Builder::spawn_unchecked for safety requirements
45    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
46    pub unsafe fn new(stack: usize, init: Box<ThreadInit>) -> io::Result<Thread> {
47        let data = init;
48        let mut attr: mem::MaybeUninit<libc::pthread_attr_t> = mem::MaybeUninit::uninit();
49        assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0);
50        let mut attr = DropGuard::new(&mut attr, |attr| {
51            assert_eq!(libc::pthread_attr_destroy(attr.as_mut_ptr()), 0)
52        });
53
54        #[cfg(any(target_os = "espidf", target_os = "nuttx"))]
55        if stack > 0 {
56            // Only set the stack if a non-zero value is passed
57            // 0 is used as an indication that the default stack size configured in the ESP-IDF/NuttX menuconfig system should be used
58            assert_eq!(
59                libc::pthread_attr_setstacksize(
60                    attr.as_mut_ptr(),
61                    cmp::max(stack, min_stack_size(attr.as_ptr()))
62                ),
63                0
64            );
65        }
66
67        #[cfg(not(any(target_os = "espidf", target_os = "nuttx")))]
68        {
69            let stack_size = cmp::max(stack, min_stack_size(attr.as_ptr()));
70
71            match libc::pthread_attr_setstacksize(attr.as_mut_ptr(), stack_size) {
72                0 => {}
73                n => {
74                    assert_eq!(n, libc::EINVAL);
75                    // EINVAL means |stack_size| is either too small or not a
76                    // multiple of the system page size. Because it's definitely
77                    // >= PTHREAD_STACK_MIN, it must be an alignment issue.
78                    // Round up to the nearest page and try again.
79                    let page_size = sys::os::page_size();
80                    let stack_size =
81                        (stack_size + page_size - 1) & (-(page_size as isize - 1) as usize - 1);
82
83                    // Some libc implementations, e.g. musl, place an upper bound
84                    // on the stack size, in which case we can only gracefully return
85                    // an error here.
86                    if libc::pthread_attr_setstacksize(attr.as_mut_ptr(), stack_size) != 0 {
87                        return Err(io::const_error!(
88                            io::ErrorKind::InvalidInput,
89                            "invalid stack size"
90                        ));
91                    }
92                }
93            };
94        }
95
96        let data = Box::into_raw(data);
97        let mut native: libc::pthread_t = mem::zeroed();
98        let ret = libc::pthread_create(&mut native, attr.as_ptr(), thread_start, data as *mut _);
99        return if ret == 0 {
100            Ok(Thread { id: native })
101        } else {
102            // The thread failed to start and as a result `data` was not consumed.
103            // Therefore, it is safe to reconstruct the box so that it gets deallocated.
104            drop(Box::from_raw(data));
105            Err(io::Error::from_raw_os_error(ret))
106        };
107
108        extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void {
109            unsafe {
110                // SAFETY: we are simply recreating the box that was leaked earlier.
111                let init = Box::from_raw(data as *mut ThreadInit);
112                let rust_start = init.init();
113
114                // Now that the thread information is set, set up our stack
115                // overflow handler.
116                let _handler = sys::stack_overflow::Handler::new();
117
118                rust_start();
119            }
120            ptr::null_mut()
121        }
122    }
123
124    pub fn join(self) {
125        let id = self.into_id();
126        let ret = unsafe { libc::pthread_join(id, ptr::null_mut()) };
127        assert!(ret == 0, "failed to join thread: {}", io::Error::from_raw_os_error(ret));
128    }
129
130    #[cfg(not(target_os = "wasi"))]
131    pub fn id(&self) -> libc::pthread_t {
132        self.id
133    }
134
135    pub fn into_id(self) -> libc::pthread_t {
136        ManuallyDrop::new(self).id
137    }
138}
139
140impl Drop for Thread {
141    fn drop(&mut self) {
142        let ret = unsafe { libc::pthread_detach(self.id) };
143        debug_assert_eq!(ret, 0);
144    }
145}
146
147pub fn available_parallelism() -> io::Result<NonZero<usize>> {
148    cfg_select! {
149        any(
150            target_os = "android",
151            target_os = "emscripten",
152            target_os = "fuchsia",
153            target_os = "hurd",
154            target_os = "linux",
155            target_os = "aix",
156            target_vendor = "apple",
157            target_os = "cygwin",
158        ) => {
159            #[allow(unused_assignments)]
160            #[allow(unused_mut)]
161            let mut quota = usize::MAX;
162
163            #[cfg(any(target_os = "android", target_os = "linux"))]
164            {
165                quota = cgroups::quota().max(1);
166                let mut set: libc::cpu_set_t = unsafe { mem::zeroed() };
167                unsafe {
168                    if libc::sched_getaffinity(0, size_of::<libc::cpu_set_t>(), &mut set) == 0 {
169                        let count = libc::CPU_COUNT(&set) as usize;
170                        let count = count.min(quota);
171
172                        // According to sched_getaffinity's API it should always be non-zero, but
173                        // some old MIPS kernels were buggy and zero-initialized the mask if
174                        // none was explicitly set.
175                        // In that case we use the sysconf fallback.
176                        if let Some(count) = NonZero::new(count) {
177                            return Ok(count)
178                        }
179                    }
180                }
181            }
182            match unsafe { libc::sysconf(libc::_SC_NPROCESSORS_ONLN) } {
183                -1 => Err(io::Error::last_os_error()),
184                0 => Err(io::Error::UNKNOWN_THREAD_COUNT),
185                cpus => {
186                    let count = cpus as usize;
187                    // Cover the unusual situation where we were able to get the quota but not the affinity mask
188                    let count = count.min(quota);
189                    Ok(unsafe { NonZero::new_unchecked(count) })
190                }
191            }
192        }
193        any(
194           target_os = "freebsd",
195           target_os = "dragonfly",
196           target_os = "openbsd",
197           target_os = "netbsd",
198        ) => {
199            use crate::ptr;
200
201            #[cfg(target_os = "freebsd")]
202            {
203                let mut set: libc::cpuset_t = unsafe { mem::zeroed() };
204                unsafe {
205                    if libc::cpuset_getaffinity(
206                        libc::CPU_LEVEL_WHICH,
207                        libc::CPU_WHICH_PID,
208                        -1,
209                        size_of::<libc::cpuset_t>(),
210                        &mut set,
211                    ) == 0 {
212                        let count = libc::CPU_COUNT(&set) as usize;
213                        if count > 0 {
214                            return Ok(NonZero::new_unchecked(count));
215                        }
216                    }
217                }
218            }
219
220            #[cfg(target_os = "netbsd")]
221            {
222                unsafe {
223                    let set = libc::_cpuset_create();
224                    if !set.is_null() {
225                        let mut count: usize = 0;
226                        if libc::pthread_getaffinity_np(libc::pthread_self(), libc::_cpuset_size(set), set) == 0 {
227                            for i in 0..libc::cpuid_t::MAX {
228                                match libc::_cpuset_isset(i, set) {
229                                    -1 => break,
230                                    0 => continue,
231                                    _ => count = count + 1,
232                                }
233                            }
234                        }
235                        libc::_cpuset_destroy(set);
236                        if let Some(count) = NonZero::new(count) {
237                            return Ok(count);
238                        }
239                    }
240                }
241            }
242
243            let mut cpus: libc::c_uint = 0;
244            let mut cpus_size = size_of_val(&cpus);
245
246            unsafe {
247                cpus = libc::sysconf(libc::_SC_NPROCESSORS_ONLN) as libc::c_uint;
248            }
249
250            // Fallback approach in case of errors or no hardware threads.
251            if cpus < 1 {
252                let mut mib = [libc::CTL_HW, libc::HW_NCPU, 0, 0];
253                let res = unsafe {
254                    libc::sysctl(
255                        mib.as_mut_ptr(),
256                        2,
257                        (&raw mut cpus) as *mut _,
258                        (&raw mut cpus_size) as *mut _,
259                        ptr::null_mut(),
260                        0,
261                    )
262                };
263
264                // Handle errors if any.
265                if res == -1 {
266                    return Err(io::Error::last_os_error());
267                } else if cpus == 0 {
268                    return Err(io::Error::UNKNOWN_THREAD_COUNT);
269                }
270            }
271
272            Ok(unsafe { NonZero::new_unchecked(cpus as usize) })
273        }
274        target_os = "nto" => {
275            unsafe {
276                use libc::_syspage_ptr;
277                if _syspage_ptr.is_null() {
278                    Err(io::const_error!(io::ErrorKind::NotFound, "no syspage available"))
279                } else {
280                    let cpus = (*_syspage_ptr).num_cpu;
281                    NonZero::new(cpus as usize)
282                        .ok_or(io::Error::UNKNOWN_THREAD_COUNT)
283                }
284            }
285        }
286        any(target_os = "solaris", target_os = "illumos") => {
287            let mut cpus = 0u32;
288            if unsafe { libc::pset_info(libc::PS_MYID, core::ptr::null_mut(), &mut cpus, core::ptr::null_mut()) } != 0 {
289                return Err(io::Error::UNKNOWN_THREAD_COUNT);
290            }
291            Ok(unsafe { NonZero::new_unchecked(cpus as usize) })
292        }
293        target_os = "haiku" => {
294            // system_info cpu_count field gets the static data set at boot time with `smp_set_num_cpus`
295            // `get_system_info` calls then `smp_get_num_cpus`
296            unsafe {
297                let mut sinfo: libc::system_info = crate::mem::zeroed();
298                let res = libc::get_system_info(&mut sinfo);
299
300                if res != libc::B_OK {
301                    return Err(io::Error::UNKNOWN_THREAD_COUNT);
302                }
303
304                Ok(NonZero::new_unchecked(sinfo.cpu_count as usize))
305            }
306        }
307        target_os = "vxworks" => {
308            // Note: there is also `vxCpuConfiguredGet`, closer to _SC_NPROCESSORS_CONF
309            // expectations than the actual cores availability.
310
311            // SAFETY: `vxCpuEnabledGet` always fetches a mask with at least one bit set
312            unsafe{
313                let set = libc::vxCpuEnabledGet();
314                Ok(NonZero::new_unchecked(set.count_ones() as usize))
315            }
316        }
317        _ => {
318            // FIXME: implement on Redox, l4re
319            Err(io::const_error!(io::ErrorKind::Unsupported, "getting the number of hardware threads is not supported on the target platform"))
320        }
321    }
322}
323
324pub fn current_os_id() -> Option<u64> {
325    // Most Unix platforms have a way to query an integer ID of the current thread, all with
326    // slightly different spellings.
327    //
328    // The OS thread ID is used rather than `pthread_self` so as to match what will be displayed
329    // for process inspection (debuggers, trace, `top`, etc.).
330    cfg_select! {
331        // Most platforms have a function returning a `pid_t` or int, which is an `i32`.
332        any(target_os = "android", target_os = "linux") => {
333            use crate::sys::pal::weak::syscall;
334
335            // `libc::gettid` is only available on glibc 2.30+, but the syscall is available
336            // since Linux 2.4.11.
337            syscall!(fn gettid() -> libc::pid_t;);
338
339            // SAFETY: FFI call with no preconditions.
340            let id: libc::pid_t = unsafe { gettid() };
341            Some(id as u64)
342        }
343        target_os = "nto" => {
344            // SAFETY: FFI call with no preconditions.
345            let id: libc::pid_t = unsafe { libc::gettid() };
346            Some(id as u64)
347        }
348        target_os = "openbsd" => {
349            // SAFETY: FFI call with no preconditions.
350            let id: libc::pid_t = unsafe { libc::getthrid() };
351            Some(id as u64)
352        }
353        target_os = "freebsd" => {
354            // SAFETY: FFI call with no preconditions.
355            let id: libc::c_int = unsafe { libc::pthread_getthreadid_np() };
356            Some(id as u64)
357        }
358        target_os = "netbsd" => {
359            // SAFETY: FFI call with no preconditions.
360            let id: libc::lwpid_t = unsafe { libc::_lwp_self() };
361            Some(id as u64)
362        }
363        any(target_os = "illumos", target_os = "solaris") => {
364            // On Illumos and Solaris, the `pthread_t` is the same as the OS thread ID.
365            // SAFETY: FFI call with no preconditions.
366            let id: libc::pthread_t = unsafe { libc::pthread_self() };
367            Some(id as u64)
368        }
369        target_vendor = "apple" => {
370            // Apple allows querying arbitrary thread IDs, `thread=NULL` queries the current thread.
371            let mut id = 0u64;
372            // SAFETY: `thread_id` is a valid pointer, no other preconditions.
373            let status: libc::c_int = unsafe { libc::pthread_threadid_np(0, &mut id) };
374            if status == 0 {
375                Some(id)
376            } else {
377                None
378            }
379        }
380        // Other platforms don't have an OS thread ID or don't have a way to access it.
381        _ => None,
382    }
383}
384
385#[cfg(any(
386    target_os = "linux",
387    target_os = "nto",
388    target_os = "solaris",
389    target_os = "illumos",
390    target_os = "vxworks",
391    target_os = "cygwin",
392    target_vendor = "apple",
393))]
394fn truncate_cstr<const MAX_WITH_NUL: usize>(cstr: &CStr) -> [libc::c_char; MAX_WITH_NUL] {
395    let mut result = [0; MAX_WITH_NUL];
396    for (src, dst) in cstr.to_bytes().iter().zip(&mut result[..MAX_WITH_NUL - 1]) {
397        *dst = *src as libc::c_char;
398    }
399    result
400}
401
402#[cfg(target_os = "android")]
403pub fn set_name(name: &CStr) {
404    const PR_SET_NAME: libc::c_int = 15;
405    unsafe {
406        let res = libc::prctl(
407            PR_SET_NAME,
408            name.as_ptr(),
409            0 as libc::c_ulong,
410            0 as libc::c_ulong,
411            0 as libc::c_ulong,
412        );
413        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
414        debug_assert_eq!(res, 0);
415    }
416}
417
418#[cfg(any(
419    target_os = "linux",
420    target_os = "freebsd",
421    target_os = "dragonfly",
422    target_os = "nuttx",
423    target_os = "cygwin"
424))]
425pub fn set_name(name: &CStr) {
426    unsafe {
427        cfg_select! {
428            any(target_os = "linux", target_os = "cygwin") => {
429                // Linux and Cygwin limits the allowed length of the name.
430                const TASK_COMM_LEN: usize = 16;
431                let name = truncate_cstr::<{ TASK_COMM_LEN }>(name);
432            }
433            _ => {
434                // FreeBSD, DragonFly BSD and NuttX do not enforce length limits.
435            }
436        };
437        // Available since glibc 2.12, musl 1.1.16, and uClibc 1.0.20 for Linux,
438        // FreeBSD 12.2 and 13.0, and DragonFly BSD 6.0.
439        let res = libc::pthread_setname_np(libc::pthread_self(), name.as_ptr());
440        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
441        debug_assert_eq!(res, 0);
442    }
443}
444
445#[cfg(target_os = "openbsd")]
446pub fn set_name(name: &CStr) {
447    unsafe {
448        libc::pthread_set_name_np(libc::pthread_self(), name.as_ptr());
449    }
450}
451
452#[cfg(target_vendor = "apple")]
453pub fn set_name(name: &CStr) {
454    unsafe {
455        let name = truncate_cstr::<{ libc::MAXTHREADNAMESIZE }>(name);
456        let res = libc::pthread_setname_np(name.as_ptr());
457        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
458        debug_assert_eq!(res, 0);
459    }
460}
461
462#[cfg(target_os = "netbsd")]
463pub fn set_name(name: &CStr) {
464    unsafe {
465        let res = libc::pthread_setname_np(
466            libc::pthread_self(),
467            c"%s".as_ptr(),
468            name.as_ptr() as *mut libc::c_void,
469        );
470        debug_assert_eq!(res, 0);
471    }
472}
473
474#[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto"))]
475pub fn set_name(name: &CStr) {
476    weak!(
477        fn pthread_setname_np(thread: libc::pthread_t, name: *const libc::c_char) -> libc::c_int;
478    );
479
480    if let Some(f) = pthread_setname_np.get() {
481        #[cfg(target_os = "nto")]
482        const THREAD_NAME_MAX: usize = libc::_NTO_THREAD_NAME_MAX as usize;
483        #[cfg(any(target_os = "solaris", target_os = "illumos"))]
484        const THREAD_NAME_MAX: usize = 32;
485
486        let name = truncate_cstr::<{ THREAD_NAME_MAX }>(name);
487        let res = unsafe { f(libc::pthread_self(), name.as_ptr()) };
488        debug_assert_eq!(res, 0);
489    }
490}
491
492#[cfg(target_os = "fuchsia")]
493pub fn set_name(name: &CStr) {
494    use crate::sys::pal::fuchsia::*;
495    unsafe {
496        zx_object_set_property(
497            zx_thread_self(),
498            ZX_PROP_NAME,
499            name.as_ptr() as *const libc::c_void,
500            name.to_bytes().len(),
501        );
502    }
503}
504
505#[cfg(target_os = "haiku")]
506pub fn set_name(name: &CStr) {
507    unsafe {
508        let thread_self = libc::find_thread(ptr::null_mut());
509        let res = libc::rename_thread(thread_self, name.as_ptr());
510        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
511        debug_assert_eq!(res, libc::B_OK);
512    }
513}
514
515#[cfg(target_os = "vxworks")]
516pub fn set_name(name: &CStr) {
517    let mut name = truncate_cstr::<{ (libc::VX_TASK_RENAME_LENGTH - 1) as usize }>(name);
518    let res = unsafe { libc::taskNameSet(libc::taskIdSelf(), name.as_mut_ptr()) };
519    debug_assert_eq!(res, libc::OK);
520}
521
522#[cfg(not(target_os = "espidf"))]
523pub fn sleep(dur: Duration) {
524    cfg_select! {
525        // Any unix that has clock_nanosleep
526        // If this list changes update the MIRI chock_nanosleep shim
527        any(
528            target_os = "freebsd",
529            target_os = "netbsd",
530            target_os = "linux",
531            target_os = "android",
532            target_os = "solaris",
533            target_os = "illumos",
534            target_os = "dragonfly",
535            target_os = "hurd",
536            target_os = "vxworks",
537            target_os = "wasi",
538        ) => {
539            // POSIX specifies that `nanosleep` uses CLOCK_REALTIME, but is not
540            // affected by clock adjustments. The timing of `sleep` however should
541            // be tied to `Instant` where possible. Thus, we use `clock_nanosleep`
542            // with a relative time interval instead, which allows explicitly
543            // specifying the clock.
544            //
545            // In practice, most systems (like e.g. Linux) actually use
546            // CLOCK_MONOTONIC for `nanosleep` anyway, but others like FreeBSD don't
547            // so it's better to be safe.
548            //
549            // wasi-libc prior to WebAssembly/wasi-libc#696 has a broken implementation
550            // of `nanosleep` which used `CLOCK_REALTIME` even though it is unsupported
551            // on WASIp2. Using `clock_nanosleep` directly bypasses the issue.
552            unsafe fn nanosleep(rqtp: *const libc::timespec, rmtp: *mut libc::timespec) -> libc::c_int {
553                unsafe { libc::clock_nanosleep(crate::sys::time::Instant::CLOCK_ID, 0, rqtp, rmtp) }
554            }
555        }
556        _ => {
557            unsafe fn nanosleep(rqtp: *const libc::timespec, rmtp: *mut libc::timespec) -> libc::c_int {
558                let r = unsafe { libc::nanosleep(rqtp, rmtp) };
559                // `clock_nanosleep` returns the error number directly, so mimic
560                // that behaviour to make the shared code below simpler.
561                if r == 0 { 0 } else { sys::io::errno() }
562            }
563        }
564    }
565
566    let mut secs = dur.as_secs();
567    let mut nsecs = dur.subsec_nanos() as _;
568
569    // If we're awoken with a signal then the return value will be -1 and
570    // nanosleep will fill in `ts` with the remaining time.
571    unsafe {
572        while secs > 0 || nsecs > 0 {
573            let mut ts = libc::timespec {
574                tv_sec: cmp::min(libc::time_t::MAX as u64, secs) as libc::time_t,
575                tv_nsec: nsecs,
576            };
577            secs -= ts.tv_sec as u64;
578            let ts_ptr = &raw mut ts;
579            let r = nanosleep(ts_ptr, ts_ptr);
580            if r != 0 {
581                assert_eq!(r, libc::EINTR);
582                secs += ts.tv_sec as u64;
583                nsecs = ts.tv_nsec;
584            } else {
585                nsecs = 0;
586            }
587        }
588    }
589}
590
591#[cfg(target_os = "espidf")]
592pub fn sleep(dur: Duration) {
593    // ESP-IDF does not have `nanosleep`, so we use `usleep` instead.
594    // As per the documentation of `usleep`, it is expected to support
595    // sleep times as big as at least up to 1 second.
596    //
597    // ESP-IDF does support almost up to `u32::MAX`, but due to a potential integer overflow in its
598    // `usleep` implementation
599    // (https://github.com/espressif/esp-idf/blob/d7ca8b94c852052e3bc33292287ef4dd62c9eeb1/components/newlib/time.c#L210),
600    // we limit the sleep time to the maximum one that would not cause the underlying `usleep` implementation to overflow
601    // (`portTICK_PERIOD_MS` can be anything between 1 to 1000, and is 10 by default).
602    const MAX_MICROS: u32 = u32::MAX - 1_000_000 - 1;
603
604    // Add any nanoseconds smaller than a microsecond as an extra microsecond
605    // so as to comply with the `std::thread::sleep` contract which mandates
606    // implementations to sleep for _at least_ the provided `dur`.
607    // We can't overflow `micros` as it is a `u128`, while `Duration` is a pair of
608    // (`u64` secs, `u32` nanos), where the nanos are strictly smaller than 1 second
609    // (i.e. < 1_000_000_000)
610    let mut micros = dur.as_micros() + if dur.subsec_nanos() % 1_000 > 0 { 1 } else { 0 };
611
612    while micros > 0 {
613        let st = if micros > MAX_MICROS as u128 { MAX_MICROS } else { micros as u32 };
614        unsafe {
615            libc::usleep(st);
616        }
617
618        micros -= st as u128;
619    }
620}
621
622// Any unix that has clock_nanosleep
623// If this list changes update the MIRI chock_nanosleep shim
624#[cfg(any(
625    target_os = "freebsd",
626    target_os = "netbsd",
627    target_os = "linux",
628    target_os = "android",
629    target_os = "solaris",
630    target_os = "illumos",
631    target_os = "dragonfly",
632    target_os = "hurd",
633    target_os = "vxworks",
634    target_os = "wasi",
635))]
636pub fn sleep_until(deadline: crate::time::Instant) {
637    use crate::time::Instant;
638
639    #[cfg(all(
640        target_os = "linux",
641        target_env = "gnu",
642        target_pointer_width = "32",
643        not(target_arch = "riscv32")
644    ))]
645    {
646        use crate::sys::pal::time::__timespec64;
647        use crate::sys::pal::weak::weak;
648
649        // This got added in glibc 2.31, along with a 64-bit `clock_gettime`
650        // function.
651        weak! {
652            fn __clock_nanosleep_time64(
653                clock_id: libc::clockid_t,
654                flags: libc::c_int,
655                req: *const __timespec64,
656                rem: *mut __timespec64,
657            ) -> libc::c_int;
658        }
659
660        if let Some(clock_nanosleep) = __clock_nanosleep_time64.get() {
661            let ts = deadline.into_inner().into_timespec().to_timespec64();
662            loop {
663                let r = unsafe {
664                    clock_nanosleep(
665                        crate::sys::time::Instant::CLOCK_ID,
666                        libc::TIMER_ABSTIME,
667                        &ts,
668                        core::ptr::null_mut(),
669                    )
670                };
671
672                match r {
673                    0 => return,
674                    libc::EINTR => continue,
675                    // If the underlying kernel doesn't support the 64-bit
676                    // syscall, `__clock_nanosleep_time64` will fail. The
677                    // error code nowadays is EOVERFLOW, but it used to be
678                    // ENOSYS – so just don't rely on any particular value.
679                    // The parameters are all valid, so the only reasons
680                    // why the call might fail are EINTR and the call not
681                    // being supported. Fall through to the clamping version
682                    // in that case.
683                    _ => break,
684                }
685            }
686        }
687    }
688
689    let Some(ts) = deadline.into_inner().into_timespec().to_timespec() else {
690        // The deadline is further in the future then can be passed to
691        // clock_nanosleep. We have to use Self::sleep instead. This might
692        // happen on 32 bit platforms, especially closer to 2038.
693        let now = Instant::now();
694        if let Some(delay) = deadline.checked_duration_since(now) {
695            sleep(delay);
696        }
697        return;
698    };
699
700    unsafe {
701        // When we get interrupted (res = EINTR) call clock_nanosleep again
702        loop {
703            let res = libc::clock_nanosleep(
704                crate::sys::time::Instant::CLOCK_ID,
705                libc::TIMER_ABSTIME,
706                &ts,
707                core::ptr::null_mut(), // not required with TIMER_ABSTIME
708            );
709
710            if res == 0 {
711                break;
712            } else {
713                assert_eq!(
714                    res,
715                    libc::EINTR,
716                    "timespec is in range,
717                         clockid is valid and kernel should support it"
718                );
719            }
720        }
721    }
722}
723
724#[cfg(target_vendor = "apple")]
725pub fn sleep_until(deadline: crate::time::Instant) {
726    unsafe extern "C" {
727        // This is defined in the public header mach/mach_time.h alongside
728        // `mach_absolute_time`, and like it has been available since the very
729        // beginning.
730        //
731        // There isn't really any documentation on this function, except for a
732        // short reference in technical note 2169:
733        // https://developer.apple.com/library/archive/technotes/tn2169/_index.html
734        safe fn mach_wait_until(deadline: u64) -> libc::kern_return_t;
735    }
736
737    // Make sure to round up to ensure that we definitely sleep until after
738    // the deadline has elapsed.
739    let Some(deadline) = deadline.into_inner().into_mach_absolute_time_ceil() else {
740        // Since the deadline is before the system boot time, it has already
741        // passed, so we can return immediately.
742        return;
743    };
744
745    // If the deadline is not representable, then sleep for the maximum duration
746    // possible and worry about the potential clock issues later (in ca. 600 years).
747    let deadline = deadline.try_into().unwrap_or(u64::MAX);
748    loop {
749        match mach_wait_until(deadline) {
750            // Success! The deadline has passed.
751            libc::KERN_SUCCESS => break,
752            // If the sleep gets interrupted by a signal, `mach_wait_until`
753            // returns KERN_ABORTED, so we need to restart the syscall.
754            // Also see Apple's implementation of the POSIX `nanosleep`, which
755            // converts this error to the POSIX equivalent EINTR:
756            // https://github.com/apple-oss-distributions/Libc/blob/55b54c0a0c37b3b24393b42b90a4c561d6c606b1/gen/nanosleep.c#L281-L306
757            libc::KERN_ABORTED => continue,
758            // All other errors indicate that something has gone wrong...
759            error => {
760                let description = unsafe { CStr::from_ptr(libc::mach_error_string(error)) };
761                panic!("mach_wait_until failed: {} (code {error})", description.display())
762            }
763        }
764    }
765}
766
767pub fn yield_now() {
768    let ret = unsafe { libc::sched_yield() };
769    debug_assert_eq!(ret, 0);
770}
771
772#[cfg(any(target_os = "android", target_os = "linux"))]
773mod cgroups {
774    //! Currently not covered
775    //! * cgroup v2 in non-standard mountpoints
776    //! * paths containing control characters or spaces, since those would be escaped in procfs
777    //!   output and we don't unescape
778
779    use crate::borrow::Cow;
780    use crate::ffi::OsString;
781    use crate::fs::{File, exists};
782    use crate::io::{BufRead, Read};
783    use crate::os::unix::ffi::OsStringExt;
784    use crate::path::{Path, PathBuf};
785    use crate::str::from_utf8;
786
787    #[derive(PartialEq)]
788    enum Cgroup {
789        V1,
790        V2,
791    }
792
793    /// Returns cgroup CPU quota in core-equivalents, rounded down or usize::MAX if the quota cannot
794    /// be determined or is not set.
795    pub(super) fn quota() -> usize {
796        let mut quota = usize::MAX;
797        if cfg!(miri) {
798            // Attempting to open a file fails under default flags due to isolation.
799            // And Miri does not have parallelism anyway.
800            return quota;
801        }
802
803        let _: Option<()> = try {
804            let mut buf = Vec::with_capacity(128);
805            // find our place in the cgroup hierarchy
806            File::open("/proc/self/cgroup").ok()?.read_to_end(&mut buf).ok()?;
807            let (cgroup_path, version) =
808                buf.split(|&c| c == b'\n').fold(None, |previous, line| {
809                    let mut fields = line.splitn(3, |&c| c == b':');
810                    // 2nd field is a list of controllers for v1 or empty for v2
811                    let version = match fields.nth(1) {
812                        Some(b"") => Cgroup::V2,
813                        Some(controllers)
814                            if from_utf8(controllers)
815                                .is_ok_and(|c| c.split(',').any(|c| c == "cpu")) =>
816                        {
817                            Cgroup::V1
818                        }
819                        _ => return previous,
820                    };
821
822                    // already-found v1 trumps v2 since it explicitly specifies its controllers
823                    if previous.is_some() && version == Cgroup::V2 {
824                        return previous;
825                    }
826
827                    let path = fields.last()?;
828                    // skip leading slash
829                    Some((path[1..].to_owned(), version))
830                })?;
831            let cgroup_path = PathBuf::from(OsString::from_vec(cgroup_path));
832
833            quota = match version {
834                Cgroup::V1 => quota_v1(cgroup_path),
835                Cgroup::V2 => quota_v2(cgroup_path),
836            };
837        };
838
839        quota
840    }
841
842    fn quota_v2(group_path: PathBuf) -> usize {
843        let mut quota = usize::MAX;
844
845        let mut path = PathBuf::with_capacity(128);
846        let mut read_buf = String::with_capacity(20);
847
848        // standard mount location defined in file-hierarchy(7) manpage
849        let cgroup_mount = "/sys/fs/cgroup";
850
851        path.push(cgroup_mount);
852        path.push(&group_path);
853
854        path.push("cgroup.controllers");
855
856        // skip if we're not looking at cgroup2
857        if matches!(exists(&path), Err(_) | Ok(false)) {
858            return usize::MAX;
859        };
860
861        path.pop();
862
863        let _: Option<()> = try {
864            while path.starts_with(cgroup_mount) {
865                path.push("cpu.max");
866
867                read_buf.clear();
868
869                if File::open(&path).and_then(|mut f| f.read_to_string(&mut read_buf)).is_ok() {
870                    let raw_quota = read_buf.lines().next()?;
871                    let mut raw_quota = raw_quota.split(' ');
872                    let limit = raw_quota.next()?;
873                    let period = raw_quota.next()?;
874                    match (limit.parse::<usize>(), period.parse::<usize>()) {
875                        (Ok(limit), Ok(period)) if period > 0 => {
876                            quota = quota.min(limit / period);
877                        }
878                        _ => {}
879                    }
880                }
881
882                path.pop(); // pop filename
883                path.pop(); // pop dir
884            }
885        };
886
887        quota
888    }
889
890    fn quota_v1(group_path: PathBuf) -> usize {
891        let mut quota = usize::MAX;
892        let mut path = PathBuf::with_capacity(128);
893        let mut read_buf = String::with_capacity(20);
894
895        // Hardcode commonly used locations mentioned in the cgroups(7) manpage
896        // if that doesn't work scan mountinfo and adjust `group_path` for bind-mounts
897        let mounts: &[fn(&Path) -> Option<(_, &Path)>] = &[
898            |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu"), p)),
899            |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu,cpuacct"), p)),
900            // this can be expensive on systems with tons of mountpoints
901            // but we only get to this point when /proc/self/cgroups explicitly indicated
902            // this process belongs to a cpu-controller cgroup v1 and the defaults didn't work
903            find_mountpoint,
904        ];
905
906        for mount in mounts {
907            let Some((mount, group_path)) = mount(&group_path) else { continue };
908
909            path.clear();
910            path.push(mount.as_ref());
911            path.push(&group_path);
912
913            // skip if we guessed the mount incorrectly
914            if matches!(exists(&path), Err(_) | Ok(false)) {
915                continue;
916            }
917
918            while path.starts_with(mount.as_ref()) {
919                let mut parse_file = |name| {
920                    path.push(name);
921                    read_buf.clear();
922
923                    let f = File::open(&path);
924                    path.pop(); // restore buffer before any early returns
925                    f.ok()?.read_to_string(&mut read_buf).ok()?;
926                    let parsed = read_buf.trim().parse::<usize>().ok()?;
927
928                    Some(parsed)
929                };
930
931                let limit = parse_file("cpu.cfs_quota_us");
932                let period = parse_file("cpu.cfs_period_us");
933
934                match (limit, period) {
935                    (Some(limit), Some(period)) if period > 0 => quota = quota.min(limit / period),
936                    _ => {}
937                }
938
939                path.pop();
940            }
941
942            // we passed the try_exists above so we should have traversed the correct hierarchy
943            // when reaching this line
944            break;
945        }
946
947        quota
948    }
949
950    /// Scan mountinfo for cgroup v1 mountpoint with a cpu controller
951    ///
952    /// If the cgroupfs is a bind mount then `group_path` is adjusted to skip
953    /// over the already-included prefix
954    fn find_mountpoint(group_path: &Path) -> Option<(Cow<'static, str>, &Path)> {
955        let mut reader = File::open_buffered("/proc/self/mountinfo").ok()?;
956        let mut line = String::with_capacity(256);
957        loop {
958            line.clear();
959            if reader.read_line(&mut line).ok()? == 0 {
960                break;
961            }
962
963            let line = line.trim();
964            let mut items = line.split(' ');
965
966            let sub_path = items.nth(3)?;
967            let mount_point = items.next()?;
968            let mount_opts = items.next_back()?;
969            let filesystem_type = items.nth_back(1)?;
970
971            if filesystem_type != "cgroup" || !mount_opts.split(',').any(|opt| opt == "cpu") {
972                // not a cgroup / not a cpu-controller
973                continue;
974            }
975
976            let sub_path = Path::new(sub_path).strip_prefix("/").ok()?;
977
978            if !group_path.starts_with(sub_path) {
979                // this is a bind-mount and the bound subdirectory
980                // does not contain the cgroup this process belongs to
981                continue;
982            }
983
984            let trimmed_group_path = group_path.strip_prefix(sub_path).ok()?;
985
986            return Some((Cow::Owned(mount_point.to_owned()), trimmed_group_path));
987        }
988
989        None
990    }
991}
992
993// glibc >= 2.15 has a __pthread_get_minstack() function that returns
994// PTHREAD_STACK_MIN plus bytes needed for thread-local storage.
995// We need that information to avoid blowing up when a small stack
996// is created in an application with big thread-local storage requirements.
997// See #6233 for rationale and details.
998#[cfg(all(target_os = "linux", target_env = "gnu"))]
999unsafe fn min_stack_size(attr: *const libc::pthread_attr_t) -> usize {
1000    // We use dlsym to avoid an ELF version dependency on GLIBC_PRIVATE. (#23628)
1001    // We shouldn't really be using such an internal symbol, but there's currently
1002    // no other way to account for the TLS size.
1003    dlsym!(
1004        fn __pthread_get_minstack(attr: *const libc::pthread_attr_t) -> libc::size_t;
1005    );
1006
1007    match __pthread_get_minstack.get() {
1008        None => libc::PTHREAD_STACK_MIN,
1009        Some(f) => unsafe { f(attr) },
1010    }
1011}
1012
1013// No point in looking up __pthread_get_minstack() on non-glibc platforms.
1014#[cfg(all(
1015    not(all(target_os = "linux", target_env = "gnu")),
1016    not(any(target_os = "netbsd", target_os = "nuttx"))
1017))]
1018unsafe fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
1019    libc::PTHREAD_STACK_MIN
1020}
1021
1022#[cfg(any(target_os = "netbsd", target_os = "nuttx"))]
1023unsafe fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
1024    static STACK: crate::sync::OnceLock<usize> = crate::sync::OnceLock::new();
1025
1026    *STACK.get_or_init(|| {
1027        let mut stack = unsafe { libc::sysconf(libc::_SC_THREAD_STACK_MIN) };
1028        if stack < 0 {
1029            stack = 2048; // just a guess
1030        }
1031
1032        stack as usize
1033    })
1034}