From 6bb4d4eaec7ba4559736b2acb73f8e85fef5f4de Mon Sep 17 00:00:00 2001 From: GuEe-GUI <2991707448@qq.com> Date: Sat, 27 Aug 2022 23:41:22 +0800 Subject: [PATCH] update base codes --- Makefile | 13 +- scripts/mkalltypes.sed | 15 ++ src/.gitignore | 2 + src/Makefile | 6 +- src/arch/generic/bits/fenv.h | 10 + src/arch/generic/bits/limits.h | 0 src/arch/riscv64/include/bits/alltypes.h.in | 18 ++ src/arch/riscv64/include/bits/fenv.h | 17 ++ src/arch/riscv64/include/bits/float.h | 16 ++ src/arch/x86/include/bits/alltypes.h.in | 31 +++ src/arch/x86/include/bits/fenv.h | 33 +++ src/arch/x86/include/bits/float.h | 20 ++ src/arch/x86/include/bits/limits.h | 1 + src/complex/__cexp.c | 87 +++++++ src/complex/__cexpf.c | 68 +++++ src/complex/cabs.c | 6 + src/complex/cabsf.c | 6 + src/complex/cabsl.c | 13 + src/complex/cacos.c | 11 + src/complex/cacosf.c | 11 + src/complex/cacosh.c | 12 + src/complex/cacoshf.c | 10 + src/complex/cacoshl.c | 17 ++ src/complex/cacosl.c | 16 ++ src/complex/carg.c | 6 + src/complex/cargf.c | 6 + src/complex/cargl.c | 13 + src/complex/casin.c | 17 ++ src/complex/casinf.c | 15 ++ src/complex/casinh.c | 9 + src/complex/casinhf.c | 7 + src/complex/casinhl.c | 14 + src/complex/casinl.c | 21 ++ src/complex/catan.c | 107 ++++++++ src/complex/catanf.c | 105 ++++++++ src/complex/catanh.c | 9 + src/complex/catanhf.c | 7 + src/complex/catanhl.c | 14 + src/complex/catanl.c | 114 ++++++++ src/complex/ccos.c | 8 + src/complex/ccosf.c | 6 + src/complex/ccosh.c | 140 ++++++++++ src/complex/ccoshf.c | 90 +++++++ src/complex/ccoshl.c | 7 + src/complex/ccosl.c | 13 + src/complex/cexp.c | 83 ++++++ src/complex/cexpf.c | 83 ++++++ src/complex/cexpl.c | 7 + src/complex/cimag.c | 6 + src/complex/cimagf.c | 6 + src/complex/cimagl.c | 6 + src/complex/clog.c | 14 + src/complex/clogf.c | 12 + src/complex/clogl.c | 18 ++ src/complex/conj.c | 6 + src/complex/conjf.c | 6 + src/complex/conjl.c | 6 + src/complex/cpow.c | 8 + src/complex/cpowf.c | 6 + src/complex/cpowl.c | 13 + src/complex/cproj.c | 8 + src/complex/cprojf.c | 8 + src/complex/cprojl.c | 15 ++ src/complex/creal.c | 6 + src/complex/crealf.c | 6 + src/complex/creall.c | 6 + src/complex/csin.c | 9 + src/complex/csinf.c | 7 + src/complex/csinh.c | 141 ++++++++++ src/complex/csinhf.c | 90 +++++++ src/complex/csinhl.c | 7 + src/complex/csinl.c | 14 + src/complex/csqrt.c | 100 ++++++++ src/complex/csqrtf.c | 82 ++++++ src/complex/csqrtl.c | 7 + src/complex/ctan.c | 9 + src/complex/ctanf.c | 7 + src/complex/ctanh.c | 129 ++++++++++ src/complex/ctanhf.c | 66 +++++ src/complex/ctanhl.c | 7 + src/complex/ctanl.c | 14 + src/ctype/isalpha.c | 10 - src/ctype/isdigit.c | 10 - src/ctype/isgraph.c | 10 - src/ctype/islower.c | 10 - src/ctype/isprint.c | 10 - src/ctype/isspace.c | 10 - src/ctype/isupper.c | 10 - src/fenv/__flt_rounds.c | 19 ++ src/fenv/aarch64/fenv.s | 68 +++++ src/fenv/arm/fenv-hf.S | 70 +++++ src/fenv/arm/fenv.c | 3 + src/fenv/fegetexceptflag.c | 7 + src/fenv/feholdexcept.c | 8 + src/fenv/fenv.c | 38 +++ src/fenv/fesetexceptflag.c | 8 + src/fenv/fesetround.c | 23 ++ src/fenv/feupdateenv.c | 9 + src/fenv/i386/fenv.s | 164 ++++++++++++ src/fenv/riscv64/fenv-sf.c | 3 + src/fenv/riscv64/fenv.S | 56 ++++ src/fenv/x86-64/fenv.s | 98 +++++++ src/include/bits/alltypes.h.in | 95 +++++++ src/include/complex.h | 132 ++++++++++ src/include/ctype.h | 48 ++++ src/include/features.h | 4 + src/include/fenv.h | 28 ++ src/include/float.h | 52 ++++ src/include/fnmatch.h | 24 ++ src/include/iso646.h | 20 ++ src/include/limits.h | 166 ++++++++++++ src/include/math.h | 17 ++ src/internal/complex_impl.h | 21 ++ src/internal/libm.h | 15 ++ src/math/__invtrigl.c | 63 +++++ src/math/__invtrigl.h | 8 + src/math/__math_invalidl.c | 9 + src/math/__polevll.c | 81 ++++++ src/math/__signbit.c | 13 + src/math/__signbitf.c | 11 + src/math/__signbitl.c | 14 + src/math/arm/fabs.c | 15 ++ src/math/arm/fabsf.c | 15 ++ src/math/arm/fma.c | 15 ++ src/math/arm/fmaf.c | 15 ++ src/math/arm/sqrt.c | 15 ++ src/math/arm/sqrtf.c | 15 ++ src/math/arm64/ceil.c | 7 + src/math/arm64/ceilf.c | 7 + src/math/arm64/fabs.c | 7 + src/math/arm64/fabsf.c | 7 + src/math/arm64/floor.c | 7 + src/math/arm64/floorf.c | 7 + src/math/arm64/fma.c | 7 + src/math/arm64/fmaf.c | 7 + src/math/arm64/fmax.c | 7 + src/math/arm64/fmaxf.c | 7 + src/math/arm64/fmin.c | 7 + src/math/arm64/fminf.c | 7 + src/math/arm64/llrint.c | 10 + src/math/arm64/llrintf.c | 10 + src/math/arm64/llround.c | 8 + src/math/arm64/llroundf.c | 8 + src/math/arm64/lrint.c | 10 + src/math/arm64/lrintf.c | 10 + src/math/arm64/lround.c | 8 + src/math/arm64/lroundf.c | 8 + src/math/arm64/nearbyint.c | 7 + src/math/arm64/nearbyintf.c | 7 + src/math/arm64/rint.c | 7 + src/math/arm64/rintf.c | 7 + src/math/arm64/round.c | 7 + src/math/arm64/roundf.c | 7 + src/math/arm64/sqrt.c | 7 + src/math/arm64/sqrtf.c | 7 + src/math/arm64/trunc.c | 7 + src/math/arm64/truncf.c | 7 + src/math/atan2l.c | 91 +++++++ src/math/atanl.c | 188 ++++++++++++++ src/math/copysignl.c | 16 ++ src/math/fabsl.c | 15 ++ src/math/frexpl.c | 34 +++ src/math/hypotl.c | 66 +++++ src/math/logl.c | 171 ++++++++++++ src/math/riscv64/copysign.c | 15 ++ src/math/riscv64/copysignf.c | 15 ++ src/math/riscv64/fabs.c | 15 ++ src/math/riscv64/fabsf.c | 15 ++ src/math/riscv64/fma.c | 15 ++ src/math/riscv64/fmaf.c | 15 ++ src/math/riscv64/fmax.c | 15 ++ src/math/riscv64/fmaxf.c | 15 ++ src/math/riscv64/fmin.c | 15 ++ src/math/riscv64/fminf.c | 15 ++ src/math/riscv64/sqrt.c | 15 ++ src/math/riscv64/sqrtf.c | 15 ++ src/math/sqrt_data.h | 13 + src/math/sqrtl.c | 271 ++++++++++++++++++++ src/math/x86-64/__invtrigl.s | 0 src/math/x86-64/acosl.s | 16 ++ src/math/x86-64/asinl.s | 12 + src/math/x86-64/atan2l.s | 7 + src/math/x86-64/atanl.s | 7 + src/math/x86-64/ceill.s | 1 + src/math/x86-64/exp2l.s | 83 ++++++ src/math/x86-64/expl.s | 101 ++++++++ src/math/x86-64/expm1l.s | 1 + src/math/x86-64/fabs.c | 10 + src/math/x86-64/fabsf.c | 10 + src/math/x86-64/fabsl.c | 7 + src/math/x86-64/floorl.s | 27 ++ src/math/x86-64/fma.c | 23 ++ src/math/x86-64/fmaf.c | 23 ++ src/math/x86-64/fmodl.c | 9 + src/math/x86-64/llrint.c | 8 + src/math/x86-64/llrintf.c | 8 + src/math/x86-64/llrintl.c | 8 + src/math/x86-64/log10l.s | 7 + src/math/x86-64/log1pl.s | 15 ++ src/math/x86-64/log2l.s | 7 + src/math/x86-64/logl.s | 7 + src/math/x86-64/lrint.c | 8 + src/math/x86-64/lrintf.c | 8 + src/math/x86-64/lrintl.c | 8 + src/math/x86-64/remainderl.c | 9 + src/math/x86-64/remquol.c | 32 +++ src/math/x86-64/rintl.c | 7 + src/math/x86-64/sqrt.c | 7 + src/math/x86-64/sqrtf.c | 7 + src/math/x86-64/sqrtl.c | 7 + src/math/x86-64/truncl.s | 1 + src/math/x86/__invtrigl.s | 0 src/math/x86/acos.s | 18 ++ src/math/x86/acosf.s | 16 ++ src/math/x86/acosl.s | 14 + src/math/x86/asin.s | 21 ++ src/math/x86/asinf.s | 23 ++ src/math/x86/asinl.s | 12 + src/math/x86/atan.s | 16 ++ src/math/x86/atan2.s | 15 ++ src/math/x86/atan2f.s | 17 ++ src/math/x86/atan2l.s | 7 + src/math/x86/atanf.s | 18 ++ src/math/x86/atanl.s | 7 + src/math/x86/ceil.s | 1 + src/math/x86/ceilf.s | 1 + src/math/x86/ceill.s | 1 + src/math/x86/exp2l.s | 1 + src/math/x86/exp_ld.s | 93 +++++++ src/math/x86/expl.s | 101 ++++++++ src/math/x86/expm1l.s | 1 + src/math/x86/fabs.c | 7 + src/math/x86/fabsf.c | 7 + src/math/x86/fabsl.c | 7 + src/math/x86/floor.s | 67 +++++ src/math/x86/floorf.s | 1 + src/math/x86/floorl.s | 1 + src/math/x86/fmod.c | 10 + src/math/x86/fmodf.c | 10 + src/math/x86/fmodl.c | 9 + src/math/x86/hypot.s | 45 ++++ src/math/x86/hypotf.s | 42 +++ src/math/x86/ldexp.s | 1 + src/math/x86/ldexpf.s | 1 + src/math/x86/ldexpl.s | 1 + src/math/x86/llrint.c | 8 + src/math/x86/llrintf.c | 8 + src/math/x86/llrintl.c | 8 + src/math/x86/log.s | 9 + src/math/x86/log10.s | 9 + src/math/x86/log10f.s | 9 + src/math/x86/log10l.s | 7 + src/math/x86/log1p.s | 25 ++ src/math/x86/log1pf.s | 26 ++ src/math/x86/log1pl.s | 15 ++ src/math/x86/log2.s | 9 + src/math/x86/log2f.s | 9 + src/math/x86/log2l.s | 7 + src/math/x86/logf.s | 9 + src/math/x86/logl.s | 7 + src/math/x86/lrint.c | 8 + src/math/x86/lrintf.c | 8 + src/math/x86/lrintl.c | 8 + src/math/x86/remainder.c | 12 + src/math/x86/remainderf.c | 12 + src/math/x86/remainderl.c | 9 + src/math/x86/remquo.s | 50 ++++ src/math/x86/remquof.s | 1 + src/math/x86/remquol.s | 1 + src/math/x86/rint.c | 7 + src/math/x86/rintf.c | 7 + src/math/x86/rintl.c | 7 + src/math/x86/scalbln.s | 1 + src/math/x86/scalblnf.s | 1 + src/math/x86/scalblnl.s | 1 + src/math/x86/scalbn.s | 33 +++ src/math/x86/scalbnf.s | 32 +++ src/math/x86/scalbnl.s | 32 +++ src/math/x86/sqrt.c | 15 ++ src/math/x86/sqrtf.c | 12 + src/math/x86/sqrtl.c | 7 + src/math/x86/trunc.s | 1 + src/math/x86/truncf.s | 1 + src/math/x86/truncl.s | 1 + 284 files changed, 6454 insertions(+), 72 deletions(-) create mode 100644 scripts/mkalltypes.sed create mode 100644 src/.gitignore create mode 100644 src/arch/generic/bits/fenv.h create mode 100644 src/arch/generic/bits/limits.h create mode 100644 src/arch/riscv64/include/bits/alltypes.h.in create mode 100644 src/arch/riscv64/include/bits/fenv.h create mode 100644 src/arch/riscv64/include/bits/float.h create mode 100644 src/arch/x86/include/bits/alltypes.h.in create mode 100644 src/arch/x86/include/bits/fenv.h create mode 100644 src/arch/x86/include/bits/float.h create mode 100644 src/arch/x86/include/bits/limits.h create mode 100644 src/complex/__cexp.c create mode 100644 src/complex/__cexpf.c create mode 100644 src/complex/cabs.c create mode 100644 src/complex/cabsf.c create mode 100644 src/complex/cabsl.c create mode 100644 src/complex/cacos.c create mode 100644 src/complex/cacosf.c create mode 100644 src/complex/cacosh.c create mode 100644 src/complex/cacoshf.c create mode 100644 src/complex/cacoshl.c create mode 100644 src/complex/cacosl.c create mode 100644 src/complex/carg.c create mode 100644 src/complex/cargf.c create mode 100644 src/complex/cargl.c create mode 100644 src/complex/casin.c create mode 100644 src/complex/casinf.c create mode 100644 src/complex/casinh.c create mode 100644 src/complex/casinhf.c create mode 100644 src/complex/casinhl.c create mode 100644 src/complex/casinl.c create mode 100644 src/complex/catan.c create mode 100644 src/complex/catanf.c create mode 100644 src/complex/catanh.c create mode 100644 src/complex/catanhf.c create mode 100644 src/complex/catanhl.c create mode 100644 src/complex/catanl.c create mode 100644 src/complex/ccos.c create mode 100644 src/complex/ccosf.c create mode 100644 src/complex/ccosh.c create mode 100644 src/complex/ccoshf.c create mode 100644 src/complex/ccoshl.c create mode 100644 src/complex/ccosl.c create mode 100644 src/complex/cexp.c create mode 100644 src/complex/cexpf.c create mode 100644 src/complex/cexpl.c create mode 100644 src/complex/cimag.c create mode 100644 src/complex/cimagf.c create mode 100644 src/complex/cimagl.c create mode 100644 src/complex/clog.c create mode 100644 src/complex/clogf.c create mode 100644 src/complex/clogl.c create mode 100644 src/complex/conj.c create mode 100644 src/complex/conjf.c create mode 100644 src/complex/conjl.c create mode 100644 src/complex/cpow.c create mode 100644 src/complex/cpowf.c create mode 100644 src/complex/cpowl.c create mode 100644 src/complex/cproj.c create mode 100644 src/complex/cprojf.c create mode 100644 src/complex/cprojl.c create mode 100644 src/complex/creal.c create mode 100644 src/complex/crealf.c create mode 100644 src/complex/creall.c create mode 100644 src/complex/csin.c create mode 100644 src/complex/csinf.c create mode 100644 src/complex/csinh.c create mode 100644 src/complex/csinhf.c create mode 100644 src/complex/csinhl.c create mode 100644 src/complex/csinl.c create mode 100644 src/complex/csqrt.c create mode 100644 src/complex/csqrtf.c create mode 100644 src/complex/csqrtl.c create mode 100644 src/complex/ctan.c create mode 100644 src/complex/ctanf.c create mode 100644 src/complex/ctanh.c create mode 100644 src/complex/ctanhf.c create mode 100644 src/complex/ctanhl.c create mode 100644 src/complex/ctanl.c delete mode 100644 src/ctype/isalpha.c delete mode 100644 src/ctype/isdigit.c delete mode 100644 src/ctype/isgraph.c delete mode 100644 src/ctype/islower.c delete mode 100644 src/ctype/isprint.c delete mode 100644 src/ctype/isspace.c delete mode 100644 src/ctype/isupper.c create mode 100644 src/fenv/__flt_rounds.c create mode 100644 src/fenv/aarch64/fenv.s create mode 100644 src/fenv/arm/fenv-hf.S create mode 100644 src/fenv/arm/fenv.c create mode 100644 src/fenv/fegetexceptflag.c create mode 100644 src/fenv/feholdexcept.c create mode 100644 src/fenv/fenv.c create mode 100644 src/fenv/fesetexceptflag.c create mode 100644 src/fenv/fesetround.c create mode 100644 src/fenv/feupdateenv.c create mode 100644 src/fenv/i386/fenv.s create mode 100644 src/fenv/riscv64/fenv-sf.c create mode 100644 src/fenv/riscv64/fenv.S create mode 100644 src/fenv/x86-64/fenv.s create mode 100644 src/include/bits/alltypes.h.in create mode 100644 src/include/complex.h create mode 100644 src/include/fenv.h create mode 100644 src/include/float.h create mode 100644 src/include/fnmatch.h create mode 100644 src/include/iso646.h create mode 100644 src/include/limits.h create mode 100644 src/internal/complex_impl.h create mode 100644 src/math/__invtrigl.c create mode 100644 src/math/__invtrigl.h create mode 100644 src/math/__math_invalidl.c create mode 100644 src/math/__polevll.c create mode 100644 src/math/__signbit.c create mode 100644 src/math/__signbitf.c create mode 100644 src/math/__signbitl.c create mode 100644 src/math/arm/fabs.c create mode 100644 src/math/arm/fabsf.c create mode 100644 src/math/arm/fma.c create mode 100644 src/math/arm/fmaf.c create mode 100644 src/math/arm/sqrt.c create mode 100644 src/math/arm/sqrtf.c create mode 100644 src/math/arm64/ceil.c create mode 100644 src/math/arm64/ceilf.c create mode 100644 src/math/arm64/fabs.c create mode 100644 src/math/arm64/fabsf.c create mode 100644 src/math/arm64/floor.c create mode 100644 src/math/arm64/floorf.c create mode 100644 src/math/arm64/fma.c create mode 100644 src/math/arm64/fmaf.c create mode 100644 src/math/arm64/fmax.c create mode 100644 src/math/arm64/fmaxf.c create mode 100644 src/math/arm64/fmin.c create mode 100644 src/math/arm64/fminf.c create mode 100644 src/math/arm64/llrint.c create mode 100644 src/math/arm64/llrintf.c create mode 100644 src/math/arm64/llround.c create mode 100644 src/math/arm64/llroundf.c create mode 100644 src/math/arm64/lrint.c create mode 100644 src/math/arm64/lrintf.c create mode 100644 src/math/arm64/lround.c create mode 100644 src/math/arm64/lroundf.c create mode 100644 src/math/arm64/nearbyint.c create mode 100644 src/math/arm64/nearbyintf.c create mode 100644 src/math/arm64/rint.c create mode 100644 src/math/arm64/rintf.c create mode 100644 src/math/arm64/round.c create mode 100644 src/math/arm64/roundf.c create mode 100644 src/math/arm64/sqrt.c create mode 100644 src/math/arm64/sqrtf.c create mode 100644 src/math/arm64/trunc.c create mode 100644 src/math/arm64/truncf.c create mode 100644 src/math/atan2l.c create mode 100644 src/math/atanl.c create mode 100644 src/math/copysignl.c create mode 100644 src/math/fabsl.c create mode 100644 src/math/frexpl.c create mode 100644 src/math/hypotl.c create mode 100644 src/math/logl.c create mode 100644 src/math/riscv64/copysign.c create mode 100644 src/math/riscv64/copysignf.c create mode 100644 src/math/riscv64/fabs.c create mode 100644 src/math/riscv64/fabsf.c create mode 100644 src/math/riscv64/fma.c create mode 100644 src/math/riscv64/fmaf.c create mode 100644 src/math/riscv64/fmax.c create mode 100644 src/math/riscv64/fmaxf.c create mode 100644 src/math/riscv64/fmin.c create mode 100644 src/math/riscv64/fminf.c create mode 100644 src/math/riscv64/sqrt.c create mode 100644 src/math/riscv64/sqrtf.c create mode 100644 src/math/sqrt_data.h create mode 100644 src/math/sqrtl.c create mode 100644 src/math/x86-64/__invtrigl.s create mode 100644 src/math/x86-64/acosl.s create mode 100644 src/math/x86-64/asinl.s create mode 100644 src/math/x86-64/atan2l.s create mode 100644 src/math/x86-64/atanl.s create mode 100644 src/math/x86-64/ceill.s create mode 100644 src/math/x86-64/exp2l.s create mode 100644 src/math/x86-64/expl.s create mode 100644 src/math/x86-64/expm1l.s create mode 100644 src/math/x86-64/fabs.c create mode 100644 src/math/x86-64/fabsf.c create mode 100644 src/math/x86-64/fabsl.c create mode 100644 src/math/x86-64/floorl.s create mode 100644 src/math/x86-64/fma.c create mode 100644 src/math/x86-64/fmaf.c create mode 100644 src/math/x86-64/fmodl.c create mode 100644 src/math/x86-64/llrint.c create mode 100644 src/math/x86-64/llrintf.c create mode 100644 src/math/x86-64/llrintl.c create mode 100644 src/math/x86-64/log10l.s create mode 100644 src/math/x86-64/log1pl.s create mode 100644 src/math/x86-64/log2l.s create mode 100644 src/math/x86-64/logl.s create mode 100644 src/math/x86-64/lrint.c create mode 100644 src/math/x86-64/lrintf.c create mode 100644 src/math/x86-64/lrintl.c create mode 100644 src/math/x86-64/remainderl.c create mode 100644 src/math/x86-64/remquol.c create mode 100644 src/math/x86-64/rintl.c create mode 100644 src/math/x86-64/sqrt.c create mode 100644 src/math/x86-64/sqrtf.c create mode 100644 src/math/x86-64/sqrtl.c create mode 100644 src/math/x86-64/truncl.s create mode 100644 src/math/x86/__invtrigl.s create mode 100644 src/math/x86/acos.s create mode 100644 src/math/x86/acosf.s create mode 100644 src/math/x86/acosl.s create mode 100644 src/math/x86/asin.s create mode 100644 src/math/x86/asinf.s create mode 100644 src/math/x86/asinl.s create mode 100644 src/math/x86/atan.s create mode 100644 src/math/x86/atan2.s create mode 100644 src/math/x86/atan2f.s create mode 100644 src/math/x86/atan2l.s create mode 100644 src/math/x86/atanf.s create mode 100644 src/math/x86/atanl.s create mode 100644 src/math/x86/ceil.s create mode 100644 src/math/x86/ceilf.s create mode 100644 src/math/x86/ceill.s create mode 100644 src/math/x86/exp2l.s create mode 100644 src/math/x86/exp_ld.s create mode 100644 src/math/x86/expl.s create mode 100644 src/math/x86/expm1l.s create mode 100644 src/math/x86/fabs.c create mode 100644 src/math/x86/fabsf.c create mode 100644 src/math/x86/fabsl.c create mode 100644 src/math/x86/floor.s create mode 100644 src/math/x86/floorf.s create mode 100644 src/math/x86/floorl.s create mode 100644 src/math/x86/fmod.c create mode 100644 src/math/x86/fmodf.c create mode 100644 src/math/x86/fmodl.c create mode 100644 src/math/x86/hypot.s create mode 100644 src/math/x86/hypotf.s create mode 100644 src/math/x86/ldexp.s create mode 100644 src/math/x86/ldexpf.s create mode 100644 src/math/x86/ldexpl.s create mode 100644 src/math/x86/llrint.c create mode 100644 src/math/x86/llrintf.c create mode 100644 src/math/x86/llrintl.c create mode 100644 src/math/x86/log.s create mode 100644 src/math/x86/log10.s create mode 100644 src/math/x86/log10f.s create mode 100644 src/math/x86/log10l.s create mode 100644 src/math/x86/log1p.s create mode 100644 src/math/x86/log1pf.s create mode 100644 src/math/x86/log1pl.s create mode 100644 src/math/x86/log2.s create mode 100644 src/math/x86/log2f.s create mode 100644 src/math/x86/log2l.s create mode 100644 src/math/x86/logf.s create mode 100644 src/math/x86/logl.s create mode 100644 src/math/x86/lrint.c create mode 100644 src/math/x86/lrintf.c create mode 100644 src/math/x86/lrintl.c create mode 100644 src/math/x86/remainder.c create mode 100644 src/math/x86/remainderf.c create mode 100644 src/math/x86/remainderl.c create mode 100644 src/math/x86/remquo.s create mode 100644 src/math/x86/remquof.s create mode 100644 src/math/x86/remquol.s create mode 100644 src/math/x86/rint.c create mode 100644 src/math/x86/rintf.c create mode 100644 src/math/x86/rintl.c create mode 100644 src/math/x86/scalbln.s create mode 100644 src/math/x86/scalblnf.s create mode 100644 src/math/x86/scalblnl.s create mode 100644 src/math/x86/scalbn.s create mode 100644 src/math/x86/scalbnf.s create mode 100644 src/math/x86/scalbnl.s create mode 100644 src/math/x86/sqrt.c create mode 100644 src/math/x86/sqrtf.c create mode 100644 src/math/x86/sqrtl.c create mode 100644 src/math/x86/trunc.s create mode 100644 src/math/x86/truncf.s create mode 100644 src/math/x86/truncl.s diff --git a/Makefile b/Makefile index 58c6db4..175d682 100644 --- a/Makefile +++ b/Makefile @@ -7,29 +7,37 @@ # Change Logs: # Date Author Notes # 2022-06-05 JasonHu Init +# 2022-07-24 GuEe-GUI Add generate ## MAKE :=make CP :=cp +RM :=rm -rf SDK_DIR := ./sdk SDK_INC_DIR := $(SDK_DIR)/include SDK_LIB_DIR := $(SDK_DIR)/lib +SCRIPT_DIR := ./scripts LIBNXBASE_NAME := libnxbase.a LIBNXBASE_DIR := ../nxos-baselib LIBNXBASE_LIB_DIR := $(LIBNXBASE_DIR)/build/ LIBNXBASE_INC_DIR := $(LIBNXBASE_DIR)/src/include +# generate +GENERATE_FILES = \ + src/include/bits/alltypes.h + # # Cmds # .PHONY: all clean -all: +all: $(GENERATE_FILES) @$(MAKE) -s -C src O=../build clean: + @$(RM) $(GENERATE_FILES) @$(MAKE) -s -C src clean O=../build # update lib @@ -37,3 +45,6 @@ ulib: @$(CP) $(LIBNXBASE_LIB_DIR)/$(LIBNXBASE_NAME) $(SDK_LIB_DIR) @$(CP) -r $(LIBNXBASE_INC_DIR)/* $(SDK_INC_DIR) @echo copy user lib done. + +src/include/bits/alltypes.h: + sed -f $(SCRIPT_DIR)/mkalltypes.sed src/arch/$(ARCH)/include/bits/alltypes.h.in src/include/bits/alltypes.h.in > $@ diff --git a/scripts/mkalltypes.sed b/scripts/mkalltypes.sed new file mode 100644 index 0000000..fa15efc --- /dev/null +++ b/scripts/mkalltypes.sed @@ -0,0 +1,15 @@ +/^TYPEDEF/s/TYPEDEF \(.*\) \([^ ]*\);$/#if defined(__NEED_\2) \&\& !defined(__DEFINED_\2)\ +typedef \1 \2;\ +#define __DEFINED_\2\ +#endif\ +/ +/^STRUCT/s/STRUCT * \([^ ]*\) \(.*\);$/#if defined(__NEED_struct_\1) \&\& !defined(__DEFINED_struct_\1)\ +struct \1 \2;\ +#define __DEFINED_struct_\1\ +#endif\ +/ +/^UNION/s/UNION * \([^ ]*\) \(.*\);$/#if defined(__NEED_union_\1) \&\& !defined(__DEFINED_union_\1)\ +union \1 \2;\ +#define __DEFINED_union_\1\ +#endif\ +/ diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..0ace700 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,2 @@ +# generate +include/bits/alltypes.h \ No newline at end of file diff --git a/src/Makefile b/src/Makefile index e8ca307..ab5fc37 100644 --- a/src/Makefile +++ b/src/Makefile @@ -20,7 +20,7 @@ else $(error unsupported arch!) endif -X_CFLAGS := $(MCFLAGS) -fno-builtin -fno-stack-protector +X_CFLAGS := $(MCFLAGS) -fno-builtin -fno-stack-protector -Wno-unknown-pragmas X_CFLAGS += -nostdlib -nostdinc -Wall -O3 -ffunction-sections -fdata-sections -ffreestanding -std=gnu99 X_ACFLAGS += $(MCFLAGS) -Wall -O3 -ffunction-sections -fdata-sections -ffreestanding -std=gnu99 @@ -44,11 +44,15 @@ X_LIBDIRS := $(LIBS_DIR) X_LIBS += libnxbase.a SRC += exit/ +SRC += complex/ SRC += ctype/ SRC += crt/ SRC += errno/ +SRC += fenv/ +SRC += fenv/$(ARCH)/ SRC += string/ SRC += math/ +SRC += math/$(ARCH)/ SRC += signal/ SRC += gcc/$(ARCH)/ SRC += arch/$(ARCH)/*.S diff --git a/src/arch/generic/bits/fenv.h b/src/arch/generic/bits/fenv.h new file mode 100644 index 0000000..edbdea2 --- /dev/null +++ b/src/arch/generic/bits/fenv.h @@ -0,0 +1,10 @@ +#define FE_ALL_EXCEPT 0 +#define FE_TONEAREST 0 + +typedef unsigned long fexcept_t; + +typedef struct { + unsigned long __cw; +} fenv_t; + +#define FE_DFL_ENV ((const fenv_t *) -1) diff --git a/src/arch/generic/bits/limits.h b/src/arch/generic/bits/limits.h new file mode 100644 index 0000000..e69de29 diff --git a/src/arch/riscv64/include/bits/alltypes.h.in b/src/arch/riscv64/include/bits/alltypes.h.in new file mode 100644 index 0000000..4579d17 --- /dev/null +++ b/src/arch/riscv64/include/bits/alltypes.h.in @@ -0,0 +1,18 @@ +#define _Addr long +#define _Int64 long +#define _Reg long + +#define __BYTE_ORDER 1234 +#define __LONG_MAX 0x7fffffffffffffffL + +#ifndef __cplusplus +TYPEDEF int wchar_t; +#endif + +TYPEDEF int blksize_t; +TYPEDEF unsigned int nlink_t; + +TYPEDEF float float_t; +TYPEDEF double double_t; + +TYPEDEF struct { long long __ll; long double __ld; } max_align_t; diff --git a/src/arch/riscv64/include/bits/fenv.h b/src/arch/riscv64/include/bits/fenv.h new file mode 100644 index 0000000..806ec40 --- /dev/null +++ b/src/arch/riscv64/include/bits/fenv.h @@ -0,0 +1,17 @@ +#define FE_INVALID 16 +#define FE_DIVBYZERO 8 +#define FE_OVERFLOW 4 +#define FE_UNDERFLOW 2 +#define FE_INEXACT 1 + +#define FE_ALL_EXCEPT 31 + +#define FE_TONEAREST 0 +#define FE_DOWNWARD 2 +#define FE_UPWARD 3 +#define FE_TOWARDZERO 1 + +typedef unsigned int fexcept_t; +typedef unsigned int fenv_t; + +#define FE_DFL_ENV ((const fenv_t *) -1) diff --git a/src/arch/riscv64/include/bits/float.h b/src/arch/riscv64/include/bits/float.h new file mode 100644 index 0000000..719c790 --- /dev/null +++ b/src/arch/riscv64/include/bits/float.h @@ -0,0 +1,16 @@ +#define FLT_EVAL_METHOD 0 + +#define LDBL_TRUE_MIN 6.47517511943802511092443895822764655e-4966L +#define LDBL_MIN 3.36210314311209350626267781732175260e-4932L +#define LDBL_MAX 1.18973149535723176508575932662800702e+4932L +#define LDBL_EPSILON 1.92592994438723585305597794258492732e-34L + +#define LDBL_MANT_DIG 113 +#define LDBL_MIN_EXP (-16381) +#define LDBL_MAX_EXP 16384 + +#define LDBL_DIG 33 +#define LDBL_MIN_10_EXP (-4931) +#define LDBL_MAX_10_EXP 4932 + +#define DECIMAL_DIG 36 diff --git a/src/arch/x86/include/bits/alltypes.h.in b/src/arch/x86/include/bits/alltypes.h.in new file mode 100644 index 0000000..6feb03a --- /dev/null +++ b/src/arch/x86/include/bits/alltypes.h.in @@ -0,0 +1,31 @@ +#define _REDIR_TIME64 1 +#define _Addr int +#define _Int64 long long +#define _Reg int + +#define __BYTE_ORDER 1234 +#define __LONG_MAX 0x7fffffffL + +#ifndef __cplusplus +#ifdef __WCHAR_TYPE__ +TYPEDEF __WCHAR_TYPE__ wchar_t; +#else +TYPEDEF long wchar_t; +#endif +#endif + +#if defined(__FLT_EVAL_METHOD__) && __FLT_EVAL_METHOD__ == 0 +TYPEDEF float float_t; +TYPEDEF double double_t; +#else +TYPEDEF long double float_t; +TYPEDEF long double double_t; +#endif + +#if !defined(__cplusplus) +TYPEDEF struct { _Alignas(8) long long __ll; long double __ld; } max_align_t; +#elif defined(__GNUC__) +TYPEDEF struct { __attribute__((__aligned__(8))) long long __ll; long double __ld; } max_align_t; +#else +TYPEDEF struct { alignas(8) long long __ll; long double __ld; } max_align_t; +#endif diff --git a/src/arch/x86/include/bits/fenv.h b/src/arch/x86/include/bits/fenv.h new file mode 100644 index 0000000..4430009 --- /dev/null +++ b/src/arch/x86/include/bits/fenv.h @@ -0,0 +1,33 @@ +#define FE_INVALID 1 +#define __FE_DENORM 2 +#define FE_DIVBYZERO 4 +#define FE_OVERFLOW 8 +#define FE_UNDERFLOW 16 +#define FE_INEXACT 32 + +#define FE_ALL_EXCEPT 63 + +#define FE_TONEAREST 0 +#define FE_DOWNWARD 0x400 +#define FE_UPWARD 0x800 +#define FE_TOWARDZERO 0xc00 + +typedef unsigned short fexcept_t; + +typedef struct { + unsigned short __control_word; + unsigned short __unused1; + unsigned short __status_word; + unsigned short __unused2; + unsigned short __tags; + unsigned short __unused3; + unsigned int __eip; + unsigned short __cs_selector; + unsigned int __opcode:11; + unsigned int __unused4:5; + unsigned int __data_offset; + unsigned short __data_selector; + unsigned short __unused5; +} fenv_t; + +#define FE_DFL_ENV ((const fenv_t *) -1) diff --git a/src/arch/x86/include/bits/float.h b/src/arch/x86/include/bits/float.h new file mode 100644 index 0000000..dd6e402 --- /dev/null +++ b/src/arch/x86/include/bits/float.h @@ -0,0 +1,20 @@ +#ifdef __FLT_EVAL_METHOD__ +#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__ +#else +#define FLT_EVAL_METHOD 2 +#endif + +#define LDBL_TRUE_MIN 3.6451995318824746025e-4951L +#define LDBL_MIN 3.3621031431120935063e-4932L +#define LDBL_MAX 1.1897314953572317650e+4932L +#define LDBL_EPSILON 1.0842021724855044340e-19L + +#define LDBL_MANT_DIG 64 +#define LDBL_MIN_EXP (-16381) +#define LDBL_MAX_EXP 16384 + +#define LDBL_DIG 18 +#define LDBL_MIN_10_EXP (-4931) +#define LDBL_MAX_10_EXP 4932 + +#define DECIMAL_DIG 21 diff --git a/src/arch/x86/include/bits/limits.h b/src/arch/x86/include/bits/limits.h new file mode 100644 index 0000000..07743b6 --- /dev/null +++ b/src/arch/x86/include/bits/limits.h @@ -0,0 +1 @@ +#define PAGESIZE 4096 diff --git a/src/complex/__cexp.c b/src/complex/__cexp.c new file mode 100644 index 0000000..003d20a --- /dev/null +++ b/src/complex/__cexp.c @@ -0,0 +1,87 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_exp.c */ +/*- + * Copyright (c) 2011 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "complex_impl.h" + +static const uint32_t k = 1799; /* constant for reduction */ +static const double kln2 = 1246.97177782734161156; /* k * ln2 */ + +/* + * Compute exp(x), scaled to avoid spurious overflow. An exponent is + * returned separately in 'expt'. + * + * Input: ln(DBL_MAX) <= x < ln(2 * DBL_MAX / DBL_MIN_DENORM) ~= 1454.91 + * Output: 2**1023 <= y < 2**1024 + */ +static double __frexp_exp(double x, int *expt) +{ + double exp_x; + uint32_t hx; + + /* + * We use exp(x) = exp(x - kln2) * 2**k, carefully chosen to + * minimize |exp(kln2) - 2**k|. We also scale the exponent of + * exp_x to MAX_EXP so that the result can be multiplied by + * a tiny number without losing accuracy due to denormalization. + */ + exp_x = exp(x - kln2); + GET_HIGH_WORD(hx, exp_x); + *expt = (hx >> 20) - (0x3ff + 1023) + k; + SET_HIGH_WORD(exp_x, (hx & 0xfffff) | ((0x3ff + 1023) << 20)); + return exp_x; +} + +/* + * __ldexp_cexp(x, expt) compute exp(x) * 2**expt. + * It is intended for large arguments (real part >= ln(DBL_MAX)) + * where care is needed to avoid overflow. + * + * The present implementation is narrowly tailored for our hyperbolic and + * exponential functions. We assume expt is small (0 or -1), and the caller + * has filtered out very large x, for which overflow would be inevitable. + */ +double complex __ldexp_cexp(double complex z, int expt) +{ + double x, y, exp_x, scale1, scale2; + int ex_expt, half_expt; + + x = creal(z); + y = cimag(z); + exp_x = __frexp_exp(x, &ex_expt); + expt += ex_expt; + + /* + * Arrange so that scale1 * scale2 == 2**expt. We use this to + * compensate for scalbn being horrendously slow. + */ + half_expt = expt / 2; + INSERT_WORDS(scale1, (0x3ff + half_expt) << 20, 0); + half_expt = expt - half_expt; + INSERT_WORDS(scale2, (0x3ff + half_expt) << 20, 0); + + return CMPLX(cos(y) * exp_x * scale1 * scale2, sin(y) * exp_x * scale1 * scale2); +} diff --git a/src/complex/__cexpf.c b/src/complex/__cexpf.c new file mode 100644 index 0000000..ee5ff2b --- /dev/null +++ b/src/complex/__cexpf.c @@ -0,0 +1,68 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_expf.c */ +/*- + * Copyright (c) 2011 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "complex_impl.h" + +static const uint32_t k = 235; /* constant for reduction */ +static const float kln2 = 162.88958740F; /* k * ln2 */ + +/* + * See __cexp.c for details. + * + * Input: ln(FLT_MAX) <= x < ln(2 * FLT_MAX / FLT_MIN_DENORM) ~= 192.7 + * Output: 2**127 <= y < 2**128 + */ +static float __frexp_expf(float x, int *expt) +{ + float exp_x; + uint32_t hx; + + exp_x = expf(x - kln2); + GET_FLOAT_WORD(hx, exp_x); + *expt = (hx >> 23) - (0x7f + 127) + k; + SET_FLOAT_WORD(exp_x, (hx & 0x7fffff) | ((0x7f + 127) << 23)); + return exp_x; +} + +float complex __ldexp_cexpf(float complex z, int expt) +{ + float x, y, exp_x, scale1, scale2; + int ex_expt, half_expt; + + x = crealf(z); + y = cimagf(z); + exp_x = __frexp_expf(x, &ex_expt); + expt += ex_expt; + + half_expt = expt / 2; + SET_FLOAT_WORD(scale1, (0x7f + half_expt) << 23); + half_expt = expt - half_expt; + SET_FLOAT_WORD(scale2, (0x7f + half_expt) << 23); + + return CMPLXF(cosf(y) * exp_x * scale1 * scale2, + sinf(y) * exp_x * scale1 * scale2); +} diff --git a/src/complex/cabs.c b/src/complex/cabs.c new file mode 100644 index 0000000..c5ad58a --- /dev/null +++ b/src/complex/cabs.c @@ -0,0 +1,6 @@ +#include "complex_impl.h" + +double cabs(double complex z) +{ + return hypot(creal(z), cimag(z)); +} diff --git a/src/complex/cabsf.c b/src/complex/cabsf.c new file mode 100644 index 0000000..619f28d --- /dev/null +++ b/src/complex/cabsf.c @@ -0,0 +1,6 @@ +#include "complex_impl.h" + +float cabsf(float complex z) +{ + return hypotf(crealf(z), cimagf(z)); +} diff --git a/src/complex/cabsl.c b/src/complex/cabsl.c new file mode 100644 index 0000000..d37e3f2 --- /dev/null +++ b/src/complex/cabsl.c @@ -0,0 +1,13 @@ +#include "complex_impl.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double cabsl(long double complex z) +{ + return cabs(z); +} +#else +long double cabsl(long double complex z) +{ + return hypotl(creall(z), cimagl(z)); +} +#endif diff --git a/src/complex/cacos.c b/src/complex/cacos.c new file mode 100644 index 0000000..c39d257 --- /dev/null +++ b/src/complex/cacos.c @@ -0,0 +1,11 @@ +#include "complex_impl.h" + +// FIXME: Hull et al. "Implementing the complex arcsine and arccosine functions using exception handling" 1997 + +/* acos(z) = pi/2 - asin(z) */ + +double complex cacos(double complex z) +{ + z = casin(z); + return CMPLX(M_PI_2 - creal(z), -cimag(z)); +} diff --git a/src/complex/cacosf.c b/src/complex/cacosf.c new file mode 100644 index 0000000..ed8acf0 --- /dev/null +++ b/src/complex/cacosf.c @@ -0,0 +1,11 @@ +#include "complex_impl.h" + +// FIXME + +static const float float_pi_2 = M_PI_2; + +float complex cacosf(float complex z) +{ + z = casinf(z); + return CMPLXF(float_pi_2 - crealf(z), -cimagf(z)); +} diff --git a/src/complex/cacosh.c b/src/complex/cacosh.c new file mode 100644 index 0000000..76127f7 --- /dev/null +++ b/src/complex/cacosh.c @@ -0,0 +1,12 @@ +#include "complex_impl.h" + +/* acosh(z) = i acos(z) */ + +double complex cacosh(double complex z) +{ + int zineg = signbit(cimag(z)); + + z = cacos(z); + if (zineg) return CMPLX(cimag(z), -creal(z)); + else return CMPLX(-cimag(z), creal(z)); +} diff --git a/src/complex/cacoshf.c b/src/complex/cacoshf.c new file mode 100644 index 0000000..8bd8058 --- /dev/null +++ b/src/complex/cacoshf.c @@ -0,0 +1,10 @@ +#include "complex_impl.h" + +float complex cacoshf(float complex z) +{ + int zineg = signbit(cimagf(z)); + + z = cacosf(z); + if (zineg) return CMPLXF(cimagf(z), -crealf(z)); + else return CMPLXF(-cimagf(z), crealf(z)); +} diff --git a/src/complex/cacoshl.c b/src/complex/cacoshl.c new file mode 100644 index 0000000..3a284be --- /dev/null +++ b/src/complex/cacoshl.c @@ -0,0 +1,17 @@ +#include "complex_impl.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double complex cacoshl(long double complex z) +{ + return cacosh(z); +} +#else +long double complex cacoshl(long double complex z) +{ + int zineg = signbit(cimagl(z)); + + z = cacosl(z); + if (zineg) return CMPLXL(cimagl(z), -creall(z)); + else return CMPLXL(-cimagl(z), creall(z)); +} +#endif diff --git a/src/complex/cacosl.c b/src/complex/cacosl.c new file mode 100644 index 0000000..cc20dcd --- /dev/null +++ b/src/complex/cacosl.c @@ -0,0 +1,16 @@ +#include "complex_impl.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double complex cacosl(long double complex z) +{ + return cacos(z); +} +#else +// FIXME +#define PI_2 1.57079632679489661923132169163975144L +long double complex cacosl(long double complex z) +{ + z = casinl(z); + return CMPLXL(PI_2 - creall(z), -cimagl(z)); +} +#endif diff --git a/src/complex/carg.c b/src/complex/carg.c new file mode 100644 index 0000000..dfe9b97 --- /dev/null +++ b/src/complex/carg.c @@ -0,0 +1,6 @@ +#include "complex_impl.h" + +double carg(double complex z) +{ + return atan2(cimag(z), creal(z)); +} diff --git a/src/complex/cargf.c b/src/complex/cargf.c new file mode 100644 index 0000000..9a6c19b --- /dev/null +++ b/src/complex/cargf.c @@ -0,0 +1,6 @@ +#include "complex_impl.h" + +float cargf(float complex z) +{ + return atan2f(cimagf(z), crealf(z)); +} diff --git a/src/complex/cargl.c b/src/complex/cargl.c new file mode 100644 index 0000000..88f95f9 --- /dev/null +++ b/src/complex/cargl.c @@ -0,0 +1,13 @@ +#include "complex_impl.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double cargl(long double complex z) +{ + return carg(z); +} +#else +long double cargl(long double complex z) +{ + return atan2l(cimagl(z), creall(z)); +} +#endif diff --git a/src/complex/casin.c b/src/complex/casin.c new file mode 100644 index 0000000..3244beb --- /dev/null +++ b/src/complex/casin.c @@ -0,0 +1,17 @@ +#include "complex_impl.h" + +// FIXME + +/* asin(z) = -i log(i z + sqrt(1 - z*z)) */ + +double complex casin(double complex z) +{ + double complex w; + double x, y; + + x = creal(z); + y = cimag(z); + w = CMPLX(1.0 - (x - y)*(x + y), -2.0*x*y); + double complex r = clog(CMPLX(-y, x) + csqrt(w)); + return CMPLX(cimag(r), -creal(r)); +} diff --git a/src/complex/casinf.c b/src/complex/casinf.c new file mode 100644 index 0000000..2cda2f0 --- /dev/null +++ b/src/complex/casinf.c @@ -0,0 +1,15 @@ +#include "complex_impl.h" + +// FIXME + +float complex casinf(float complex z) +{ + float complex w; + float x, y; + + x = crealf(z); + y = cimagf(z); + w = CMPLXF(1.0 - (x - y)*(x + y), -2.0*x*y); + float complex r = clogf(CMPLXF(-y, x) + csqrtf(w)); + return CMPLXF(cimagf(r), -crealf(r)); +} diff --git a/src/complex/casinh.c b/src/complex/casinh.c new file mode 100644 index 0000000..50bf27c --- /dev/null +++ b/src/complex/casinh.c @@ -0,0 +1,9 @@ +#include "complex_impl.h" + +/* asinh(z) = -i asin(i z) */ + +double complex casinh(double complex z) +{ + z = casin(CMPLX(-cimag(z), creal(z))); + return CMPLX(cimag(z), -creal(z)); +} diff --git a/src/complex/casinhf.c b/src/complex/casinhf.c new file mode 100644 index 0000000..93d82e5 --- /dev/null +++ b/src/complex/casinhf.c @@ -0,0 +1,7 @@ +#include "complex_impl.h" + +float complex casinhf(float complex z) +{ + z = casinf(CMPLXF(-cimagf(z), crealf(z))); + return CMPLXF(cimagf(z), -crealf(z)); +} diff --git a/src/complex/casinhl.c b/src/complex/casinhl.c new file mode 100644 index 0000000..68ba3dd --- /dev/null +++ b/src/complex/casinhl.c @@ -0,0 +1,14 @@ +#include "complex_impl.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double complex casinhl(long double complex z) +{ + return casinh(z); +} +#else +long double complex casinhl(long double complex z) +{ + z = casinl(CMPLXL(-cimagl(z), creall(z))); + return CMPLXL(cimagl(z), -creall(z)); +} +#endif diff --git a/src/complex/casinl.c b/src/complex/casinl.c new file mode 100644 index 0000000..072adc4 --- /dev/null +++ b/src/complex/casinl.c @@ -0,0 +1,21 @@ +#include "complex_impl.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double complex casinl(long double complex z) +{ + return casin(z); +} +#else +// FIXME +long double complex casinl(long double complex z) +{ + long double complex w; + long double x, y; + + x = creall(z); + y = cimagl(z); + w = CMPLXL(1.0 - (x - y)*(x + y), -2.0*x*y); + long double complex r = clogl(CMPLXL(-y, x) + csqrtl(w)); + return CMPLXL(cimagl(r), -creall(r)); +} +#endif diff --git a/src/complex/catan.c b/src/complex/catan.c new file mode 100644 index 0000000..ccc2fb5 --- /dev/null +++ b/src/complex/catan.c @@ -0,0 +1,107 @@ +/* origin: OpenBSD /usr/src/lib/libm/src/s_catan.c */ +/* + * Copyright (c) 2008 Stephen L. Moshier + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Complex circular arc tangent + * + * + * SYNOPSIS: + * + * double complex catan(); + * double complex z, w; + * + * w = catan (z); + * + * + * DESCRIPTION: + * + * If + * z = x + iy, + * + * then + * 1 ( 2x ) + * Re w = - arctan(-----------) + k PI + * 2 ( 2 2) + * (1 - x - y ) + * + * ( 2 2) + * 1 (x + (y+1) ) + * Im w = - log(------------) + * 4 ( 2 2) + * (x + (y-1) ) + * + * Where k is an arbitrary integer. + * + * catan(z) = -i catanh(iz). + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * DEC -10,+10 5900 1.3e-16 7.8e-18 + * IEEE -10,+10 30000 2.3e-15 8.5e-17 + * The check catan( ctan(z) ) = z, with |x| and |y| < PI/2, + * had peak relative error 1.5e-16, rms relative error + * 2.9e-17. See also clog(). + */ + +#include "complex_impl.h" + +#define MAXNUM 1.0e308 + +static const double DP1 = 3.14159265160560607910E0; +static const double DP2 = 1.98418714791870343106E-9; +static const double DP3 = 1.14423774522196636802E-17; + +static double _redupi(double x) +{ + double t; + long i; + + t = x/M_PI; + if (t >= 0.0) + t += 0.5; + else + t -= 0.5; + + i = t; /* the multiple */ + t = i; + t = ((x - t * DP1) - t * DP2) - t * DP3; + return t; +} + +double complex catan(double complex z) +{ + double complex w; + double a, t, x, x2, y; + + x = creal(z); + y = cimag(z); + + x2 = x * x; + a = 1.0 - x2 - (y * y); + + t = 0.5 * atan2(2.0 * x, a); + w = _redupi(t); + + t = y - 1.0; + a = x2 + (t * t); + + t = y + 1.0; + a = (x2 + t * t)/a; + w = CMPLX(w, 0.25 * log(a)); + return w; +} diff --git a/src/complex/catanf.c b/src/complex/catanf.c new file mode 100644 index 0000000..1d569f2 --- /dev/null +++ b/src/complex/catanf.c @@ -0,0 +1,105 @@ +/* origin: OpenBSD /usr/src/lib/libm/src/s_catanf.c */ +/* + * Copyright (c) 2008 Stephen L. Moshier + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Complex circular arc tangent + * + * + * SYNOPSIS: + * + * float complex catanf(); + * float complex z, w; + * + * w = catanf( z ); + * + * + * DESCRIPTION: + * + * If + * z = x + iy, + * + * then + * 1 ( 2x ) + * Re w = - arctan(-----------) + k PI + * 2 ( 2 2) + * (1 - x - y ) + * + * ( 2 2) + * 1 (x + (y+1) ) + * Im w = - log(------------) + * 4 ( 2 2) + * (x + (y-1) ) + * + * Where k is an arbitrary integer. + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE -10,+10 30000 2.3e-6 5.2e-8 + */ + +#include "complex_impl.h" + +#define MAXNUMF 1.0e38F + +static const double DP1 = 3.140625; +static const double DP2 = 9.67502593994140625E-4; +static const double DP3 = 1.509957990978376432E-7; + +static const float float_pi = M_PI; + +static float _redupif(float xx) +{ + float x, t; + long i; + + x = xx; + t = x/float_pi; + if (t >= 0.0f) + t += 0.5f; + else + t -= 0.5f; + + i = t; /* the multiple */ + t = i; + t = ((x - t * DP1) - t * DP2) - t * DP3; + return t; +} + +float complex catanf(float complex z) +{ + float complex w; + float a, t, x, x2, y; + + x = crealf(z); + y = cimagf(z); + + x2 = x * x; + a = 1.0f - x2 - (y * y); + + t = 0.5f * atan2f(2.0f * x, a); + w = _redupif(t); + + t = y - 1.0f; + a = x2 + (t * t); + + t = y + 1.0f; + a = (x2 + (t * t))/a; + w = CMPLXF(w, 0.25f * logf(a)); + return w; +} diff --git a/src/complex/catanh.c b/src/complex/catanh.c new file mode 100644 index 0000000..c324c7f --- /dev/null +++ b/src/complex/catanh.c @@ -0,0 +1,9 @@ +#include "complex_impl.h" + +/* atanh = -i atan(i z) */ + +double complex catanh(double complex z) +{ + z = catan(CMPLX(-cimag(z), creal(z))); + return CMPLX(cimag(z), -creal(z)); +} diff --git a/src/complex/catanhf.c b/src/complex/catanhf.c new file mode 100644 index 0000000..b0505f6 --- /dev/null +++ b/src/complex/catanhf.c @@ -0,0 +1,7 @@ +#include "complex_impl.h" + +float complex catanhf(float complex z) +{ + z = catanf(CMPLXF(-cimagf(z), crealf(z))); + return CMPLXF(cimagf(z), -crealf(z)); +} diff --git a/src/complex/catanhl.c b/src/complex/catanhl.c new file mode 100644 index 0000000..6025c41 --- /dev/null +++ b/src/complex/catanhl.c @@ -0,0 +1,14 @@ +#include "complex_impl.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double complex catanhl(long double complex z) +{ + return catanh(z); +} +#else +long double complex catanhl(long double complex z) +{ + z = catanl(CMPLXL(-cimagl(z), creall(z))); + return CMPLXL(cimagl(z), -creall(z)); +} +#endif diff --git a/src/complex/catanl.c b/src/complex/catanl.c new file mode 100644 index 0000000..e62526c --- /dev/null +++ b/src/complex/catanl.c @@ -0,0 +1,114 @@ +/* origin: OpenBSD /usr/src/lib/libm/src/s_catanl.c */ +/* + * Copyright (c) 2008 Stephen L. Moshier + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Complex circular arc tangent + * + * + * SYNOPSIS: + * + * long double complex catanl(); + * long double complex z, w; + * + * w = catanl( z ); + * + * + * DESCRIPTION: + * + * If + * z = x + iy, + * + * then + * 1 ( 2x ) + * Re w = - arctan(-----------) + k PI + * 2 ( 2 2) + * (1 - x - y ) + * + * ( 2 2) + * 1 (x + (y+1) ) + * Im w = - log(------------) + * 4 ( 2 2) + * (x + (y-1) ) + * + * Where k is an arbitrary integer. + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * DEC -10,+10 5900 1.3e-16 7.8e-18 + * IEEE -10,+10 30000 2.3e-15 8.5e-17 + * The check catan( ctan(z) ) = z, with |x| and |y| < PI/2, + * had peak relative error 1.5e-16, rms relative error + * 2.9e-17. See also clog(). + */ + +#include +#include +#include "complex_impl.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double complex catanl(long double complex z) +{ + return catan(z); +} +#else +static const long double PIL = 3.141592653589793238462643383279502884197169L; +static const long double DP1 = 3.14159265358979323829596852490908531763125L; +static const long double DP2 = 1.6667485837041756656403424829301998703007e-19L; +static const long double DP3 = 1.8830410776607851167459095484560349402753e-39L; + +static long double redupil(long double x) +{ + long double t; + long i; + + t = x / PIL; + if (t >= 0.0L) + t += 0.5L; + else + t -= 0.5L; + + i = t; /* the multiple */ + t = i; + t = ((x - t * DP1) - t * DP2) - t * DP3; + return t; +} + +long double complex catanl(long double complex z) +{ + long double complex w; + long double a, t, x, x2, y; + + x = creall(z); + y = cimagl(z); + + x2 = x * x; + a = 1.0L - x2 - (y * y); + + t = atan2l(2.0L * x, a) * 0.5L; + w = redupil(t); + + t = y - 1.0L; + a = x2 + (t * t); + + t = y + 1.0L; + a = (x2 + (t * t)) / a; + w = CMPLXF(w, 0.25L * logl(a)); + return w; +} +#endif diff --git a/src/complex/ccos.c b/src/complex/ccos.c new file mode 100644 index 0000000..f32e1fa --- /dev/null +++ b/src/complex/ccos.c @@ -0,0 +1,8 @@ +#include "complex_impl.h" + +/* cos(z) = cosh(i z) */ + +double complex ccos(double complex z) +{ + return ccosh(CMPLX(-cimag(z), creal(z))); +} diff --git a/src/complex/ccosf.c b/src/complex/ccosf.c new file mode 100644 index 0000000..490be9b --- /dev/null +++ b/src/complex/ccosf.c @@ -0,0 +1,6 @@ +#include "complex_impl.h" + +float complex ccosf(float complex z) +{ + return ccoshf(CMPLXF(-cimagf(z), crealf(z))); +} diff --git a/src/complex/ccosh.c b/src/complex/ccosh.c new file mode 100644 index 0000000..c995da7 --- /dev/null +++ b/src/complex/ccosh.c @@ -0,0 +1,140 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_ccosh.c */ +/*- + * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Hyperbolic cosine of a complex argument z = x + i y. + * + * cosh(z) = cosh(x+iy) + * = cosh(x) cos(y) + i sinh(x) sin(y). + * + * Exceptional values are noted in the comments within the source code. + * These values and the return value were taken from n1124.pdf. + */ + +#include "complex_impl.h" + +static const double huge = 0x1p1023; + +double complex ccosh(double complex z) +{ + double x, y, h; + int32_t hx, hy, ix, iy, lx, ly; + + x = creal(z); + y = cimag(z); + + EXTRACT_WORDS(hx, lx, x); + EXTRACT_WORDS(hy, ly, y); + + ix = 0x7fffffff & hx; + iy = 0x7fffffff & hy; + + /* Handle the nearly-non-exceptional cases where x and y are finite. */ + if (ix < 0x7ff00000 && iy < 0x7ff00000) { + if ((iy | ly) == 0) + return CMPLX(cosh(x), x * y); + if (ix < 0x40360000) /* small x: normal case */ + return CMPLX(cosh(x) * cos(y), sinh(x) * sin(y)); + + /* |x| >= 22, so cosh(x) ~= exp(|x|) */ + if (ix < 0x40862e42) { + /* x < 710: exp(|x|) won't overflow */ + h = exp(fabs(x)) * 0.5; + return CMPLX(h * cos(y), copysign(h, x) * sin(y)); + } else if (ix < 0x4096bbaa) { + /* x < 1455: scale to avoid overflow */ + z = __ldexp_cexp(CMPLX(fabs(x), y), -1); + return CMPLX(creal(z), cimag(z) * copysign(1, x)); + } else { + /* x >= 1455: the result always overflows */ + h = huge * x; + return CMPLX(h * h * cos(y), h * sin(y)); + } + } + + /* + * cosh(+-0 +- I Inf) = dNaN + I sign(d(+-0, dNaN))0. + * The sign of 0 in the result is unspecified. Choice = normally + * the same as dNaN. Raise the invalid floating-point exception. + * + * cosh(+-0 +- I NaN) = d(NaN) + I sign(d(+-0, NaN))0. + * The sign of 0 in the result is unspecified. Choice = normally + * the same as d(NaN). + */ + if ((ix | lx) == 0 && iy >= 0x7ff00000) + return CMPLX(y - y, copysign(0, x * (y - y))); + + /* + * cosh(+-Inf +- I 0) = +Inf + I (+-)(+-)0. + * + * cosh(NaN +- I 0) = d(NaN) + I sign(d(NaN, +-0))0. + * The sign of 0 in the result is unspecified. + */ + if ((iy | ly) == 0 && ix >= 0x7ff00000) { + if (((hx & 0xfffff) | lx) == 0) + return CMPLX(x * x, copysign(0, x) * y); + return CMPLX(x * x, copysign(0, (x + x) * y)); + } + + /* + * cosh(x +- I Inf) = dNaN + I dNaN. + * Raise the invalid floating-point exception for finite nonzero x. + * + * cosh(x + I NaN) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero x. Choice = don't raise (except for signaling NaNs). + */ + if (ix < 0x7ff00000 && iy >= 0x7ff00000) + return CMPLX(y - y, x * (y - y)); + + /* + * cosh(+-Inf + I NaN) = +Inf + I d(NaN). + * + * cosh(+-Inf +- I Inf) = +Inf + I dNaN. + * The sign of Inf in the result is unspecified. Choice = always +. + * Raise the invalid floating-point exception. + * + * cosh(+-Inf + I y) = +Inf cos(y) +- I Inf sin(y) + */ + if (ix >= 0x7ff00000 && ((hx & 0xfffff) | lx) == 0) { + if (iy >= 0x7ff00000) + return CMPLX(x * x, x * (y - y)); + return CMPLX((x * x) * cos(y), x * sin(y)); + } + + /* + * cosh(NaN + I NaN) = d(NaN) + I d(NaN). + * + * cosh(NaN +- I Inf) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception. + * Choice = raise. + * + * cosh(NaN + I y) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero y. Choice = don't raise (except for signaling NaNs). + */ + return CMPLX((x * x) * (y - y), (x + x) * (y - y)); +} diff --git a/src/complex/ccoshf.c b/src/complex/ccoshf.c new file mode 100644 index 0000000..189ce94 --- /dev/null +++ b/src/complex/ccoshf.c @@ -0,0 +1,90 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_ccoshf.c */ +/*- + * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Hyperbolic cosine of a complex argument. See s_ccosh.c for details. + */ + +#include "complex_impl.h" + +static const float huge = 0x1p127; + +float complex ccoshf(float complex z) +{ + float x, y, h; + int32_t hx, hy, ix, iy; + + x = crealf(z); + y = cimagf(z); + + GET_FLOAT_WORD(hx, x); + GET_FLOAT_WORD(hy, y); + + ix = 0x7fffffff & hx; + iy = 0x7fffffff & hy; + + if (ix < 0x7f800000 && iy < 0x7f800000) { + if (iy == 0) + return CMPLXF(coshf(x), x * y); + if (ix < 0x41100000) /* small x: normal case */ + return CMPLXF(coshf(x) * cosf(y), sinhf(x) * sinf(y)); + + /* |x| >= 9, so cosh(x) ~= exp(|x|) */ + if (ix < 0x42b17218) { + /* x < 88.7: expf(|x|) won't overflow */ + h = expf(fabsf(x)) * 0.5f; + return CMPLXF(h * cosf(y), copysignf(h, x) * sinf(y)); + } else if (ix < 0x4340b1e7) { + /* x < 192.7: scale to avoid overflow */ + z = __ldexp_cexpf(CMPLXF(fabsf(x), y), -1); + return CMPLXF(crealf(z), cimagf(z) * copysignf(1, x)); + } else { + /* x >= 192.7: the result always overflows */ + h = huge * x; + return CMPLXF(h * h * cosf(y), h * sinf(y)); + } + } + + if (ix == 0 && iy >= 0x7f800000) + return CMPLXF(y - y, copysignf(0, x * (y - y))); + + if (iy == 0 && ix >= 0x7f800000) { + if ((hx & 0x7fffff) == 0) + return CMPLXF(x * x, copysignf(0, x) * y); + return CMPLXF(x * x, copysignf(0, (x + x) * y)); + } + + if (ix < 0x7f800000 && iy >= 0x7f800000) + return CMPLXF(y - y, x * (y - y)); + + if (ix >= 0x7f800000 && (hx & 0x7fffff) == 0) { + if (iy >= 0x7f800000) + return CMPLXF(x * x, x * (y - y)); + return CMPLXF((x * x) * cosf(y), x * sinf(y)); + } + + return CMPLXF((x * x) * (y - y), (x + x) * (y - y)); +} diff --git a/src/complex/ccoshl.c b/src/complex/ccoshl.c new file mode 100644 index 0000000..ffb4d8a --- /dev/null +++ b/src/complex/ccoshl.c @@ -0,0 +1,7 @@ +#include "complex_impl.h" + +//FIXME +long double complex ccoshl(long double complex z) +{ + return ccosh(z); +} diff --git a/src/complex/ccosl.c b/src/complex/ccosl.c new file mode 100644 index 0000000..2530006 --- /dev/null +++ b/src/complex/ccosl.c @@ -0,0 +1,13 @@ +#include "complex_impl.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double complex ccosl(long double complex z) +{ + return ccos(z); +} +#else +long double complex ccosl(long double complex z) +{ + return ccoshl(CMPLXL(-cimagl(z), creall(z))); +} +#endif diff --git a/src/complex/cexp.c b/src/complex/cexp.c new file mode 100644 index 0000000..7fb489b --- /dev/null +++ b/src/complex/cexp.c @@ -0,0 +1,83 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_cexp.c */ +/*- + * Copyright (c) 2011 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "complex_impl.h" + +static const uint32_t +exp_ovfl = 0x40862e42, /* high bits of MAX_EXP * ln2 ~= 710 */ +cexp_ovfl = 0x4096b8e4; /* (MAX_EXP - MIN_DENORM_EXP) * ln2 */ + +double complex cexp(double complex z) +{ + double x, y, exp_x; + uint32_t hx, hy, lx, ly; + + x = creal(z); + y = cimag(z); + + EXTRACT_WORDS(hy, ly, y); + hy &= 0x7fffffff; + + /* cexp(x + I 0) = exp(x) + I 0 */ + if ((hy | ly) == 0) + return CMPLX(exp(x), y); + EXTRACT_WORDS(hx, lx, x); + /* cexp(0 + I y) = cos(y) + I sin(y) */ + if (((hx & 0x7fffffff) | lx) == 0) + return CMPLX(cos(y), sin(y)); + + if (hy >= 0x7ff00000) { + if (lx != 0 || (hx & 0x7fffffff) != 0x7ff00000) { + /* cexp(finite|NaN +- I Inf|NaN) = NaN + I NaN */ + return CMPLX(y - y, y - y); + } else if (hx & 0x80000000) { + /* cexp(-Inf +- I Inf|NaN) = 0 + I 0 */ + return CMPLX(0.0, 0.0); + } else { + /* cexp(+Inf +- I Inf|NaN) = Inf + I NaN */ + return CMPLX(x, y - y); + } + } + + if (hx >= exp_ovfl && hx <= cexp_ovfl) { + /* + * x is between 709.7 and 1454.3, so we must scale to avoid + * overflow in exp(x). + */ + return __ldexp_cexp(z, 0); + } else { + /* + * Cases covered here: + * - x < exp_ovfl and exp(x) won't overflow (common case) + * - x > cexp_ovfl, so exp(x) * s overflows for all s > 0 + * - x = +-Inf (generated by exp()) + * - x = NaN (spurious inexact exception from y) + */ + exp_x = exp(x); + return CMPLX(exp_x * cos(y), exp_x * sin(y)); + } +} diff --git a/src/complex/cexpf.c b/src/complex/cexpf.c new file mode 100644 index 0000000..00d258f --- /dev/null +++ b/src/complex/cexpf.c @@ -0,0 +1,83 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_cexpf.c */ +/*- + * Copyright (c) 2011 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "complex_impl.h" + +static const uint32_t +exp_ovfl = 0x42b17218, /* MAX_EXP * ln2 ~= 88.722839355 */ +cexp_ovfl = 0x43400074; /* (MAX_EXP - MIN_DENORM_EXP) * ln2 */ + +float complex cexpf(float complex z) +{ + float x, y, exp_x; + uint32_t hx, hy; + + x = crealf(z); + y = cimagf(z); + + GET_FLOAT_WORD(hy, y); + hy &= 0x7fffffff; + + /* cexp(x + I 0) = exp(x) + I 0 */ + if (hy == 0) + return CMPLXF(expf(x), y); + GET_FLOAT_WORD(hx, x); + /* cexp(0 + I y) = cos(y) + I sin(y) */ + if ((hx & 0x7fffffff) == 0) + return CMPLXF(cosf(y), sinf(y)); + + if (hy >= 0x7f800000) { + if ((hx & 0x7fffffff) != 0x7f800000) { + /* cexp(finite|NaN +- I Inf|NaN) = NaN + I NaN */ + return CMPLXF(y - y, y - y); + } else if (hx & 0x80000000) { + /* cexp(-Inf +- I Inf|NaN) = 0 + I 0 */ + return CMPLXF(0.0, 0.0); + } else { + /* cexp(+Inf +- I Inf|NaN) = Inf + I NaN */ + return CMPLXF(x, y - y); + } + } + + if (hx >= exp_ovfl && hx <= cexp_ovfl) { + /* + * x is between 88.7 and 192, so we must scale to avoid + * overflow in expf(x). + */ + return __ldexp_cexpf(z, 0); + } else { + /* + * Cases covered here: + * - x < exp_ovfl and exp(x) won't overflow (common case) + * - x > cexp_ovfl, so exp(x) * s overflows for all s > 0 + * - x = +-Inf (generated by exp()) + * - x = NaN (spurious inexact exception from y) + */ + exp_x = expf(x); + return CMPLXF(exp_x * cosf(y), exp_x * sinf(y)); + } +} diff --git a/src/complex/cexpl.c b/src/complex/cexpl.c new file mode 100644 index 0000000..d4df950 --- /dev/null +++ b/src/complex/cexpl.c @@ -0,0 +1,7 @@ +#include "complex_impl.h" + +//FIXME +long double complex cexpl(long double complex z) +{ + return cexp(z); +} diff --git a/src/complex/cimag.c b/src/complex/cimag.c new file mode 100644 index 0000000..d6b0e68 --- /dev/null +++ b/src/complex/cimag.c @@ -0,0 +1,6 @@ +#include "complex_impl.h" + +double (cimag)(double complex z) +{ + return cimag(z); +} diff --git a/src/complex/cimagf.c b/src/complex/cimagf.c new file mode 100644 index 0000000..b7166dc --- /dev/null +++ b/src/complex/cimagf.c @@ -0,0 +1,6 @@ +#include "complex_impl.h" + +float (cimagf)(float complex z) +{ + return cimagf(z); +} diff --git a/src/complex/cimagl.c b/src/complex/cimagl.c new file mode 100644 index 0000000..4db77f2 --- /dev/null +++ b/src/complex/cimagl.c @@ -0,0 +1,6 @@ +#include "complex_impl.h" + +long double (cimagl)(long double complex z) +{ + return cimagl(z); +} diff --git a/src/complex/clog.c b/src/complex/clog.c new file mode 100644 index 0000000..b587c29 --- /dev/null +++ b/src/complex/clog.c @@ -0,0 +1,14 @@ +#include "complex_impl.h" + +// FIXME + +/* log(z) = log(|z|) + i arg(z) */ + +double complex clog(double complex z) +{ + double r, phi; + + r = cabs(z); + phi = carg(z); + return CMPLX(log(r), phi); +} diff --git a/src/complex/clogf.c b/src/complex/clogf.c new file mode 100644 index 0000000..0389d47 --- /dev/null +++ b/src/complex/clogf.c @@ -0,0 +1,12 @@ +#include "complex_impl.h" + +// FIXME + +float complex clogf(float complex z) +{ + float r, phi; + + r = cabsf(z); + phi = cargf(z); + return CMPLXF(logf(r), phi); +} diff --git a/src/complex/clogl.c b/src/complex/clogl.c new file mode 100644 index 0000000..88e83e8 --- /dev/null +++ b/src/complex/clogl.c @@ -0,0 +1,18 @@ +#include "complex_impl.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double complex clogl(long double complex z) +{ + return clog(z); +} +#else +// FIXME +long double complex clogl(long double complex z) +{ + long double r, phi; + + r = cabsl(z); + phi = cargl(z); + return CMPLXL(logl(r), phi); +} +#endif diff --git a/src/complex/conj.c b/src/complex/conj.c new file mode 100644 index 0000000..a3b19a4 --- /dev/null +++ b/src/complex/conj.c @@ -0,0 +1,6 @@ +#include "complex_impl.h" + +double complex conj(double complex z) +{ + return CMPLX(creal(z), -cimag(z)); +} diff --git a/src/complex/conjf.c b/src/complex/conjf.c new file mode 100644 index 0000000..b2195c8 --- /dev/null +++ b/src/complex/conjf.c @@ -0,0 +1,6 @@ +#include "complex_impl.h" + +float complex conjf(float complex z) +{ + return CMPLXF(crealf(z), -cimagf(z)); +} diff --git a/src/complex/conjl.c b/src/complex/conjl.c new file mode 100644 index 0000000..87a4ebe --- /dev/null +++ b/src/complex/conjl.c @@ -0,0 +1,6 @@ +#include "complex_impl.h" + +long double complex conjl(long double complex z) +{ + return CMPLXL(creall(z), -cimagl(z)); +} diff --git a/src/complex/cpow.c b/src/complex/cpow.c new file mode 100644 index 0000000..1137d39 --- /dev/null +++ b/src/complex/cpow.c @@ -0,0 +1,8 @@ +#include "complex_impl.h" + +/* pow(z, c) = exp(c log(z)), See C99 G.6.4.1 */ + +double complex cpow(double complex z, double complex c) +{ + return cexp(c * clog(z)); +} diff --git a/src/complex/cpowf.c b/src/complex/cpowf.c new file mode 100644 index 0000000..f3fd4b7 --- /dev/null +++ b/src/complex/cpowf.c @@ -0,0 +1,6 @@ +#include "complex_impl.h" + +float complex cpowf(float complex z, float complex c) +{ + return cexpf(c * clogf(z)); +} diff --git a/src/complex/cpowl.c b/src/complex/cpowl.c new file mode 100644 index 0000000..be36f04 --- /dev/null +++ b/src/complex/cpowl.c @@ -0,0 +1,13 @@ +#include "complex_impl.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double complex cpowl(long double complex z, long double complex c) +{ + return cpow(z, c); +} +#else +long double complex cpowl(long double complex z, long double complex c) +{ + return cexpl(c * clogl(z)); +} +#endif diff --git a/src/complex/cproj.c b/src/complex/cproj.c new file mode 100644 index 0000000..d2b8f5a --- /dev/null +++ b/src/complex/cproj.c @@ -0,0 +1,8 @@ +#include "complex_impl.h" + +double complex cproj(double complex z) +{ + if (isinf(creal(z)) || isinf(cimag(z))) + return CMPLX(INFINITY, copysign(0.0, cimag(z))); + return z; +} diff --git a/src/complex/cprojf.c b/src/complex/cprojf.c new file mode 100644 index 0000000..15a874b --- /dev/null +++ b/src/complex/cprojf.c @@ -0,0 +1,8 @@ +#include "complex_impl.h" + +float complex cprojf(float complex z) +{ + if (isinf(crealf(z)) || isinf(cimagf(z))) + return CMPLXF(INFINITY, copysignf(0.0, cimagf(z))); + return z; +} diff --git a/src/complex/cprojl.c b/src/complex/cprojl.c new file mode 100644 index 0000000..531ffa1 --- /dev/null +++ b/src/complex/cprojl.c @@ -0,0 +1,15 @@ +#include "complex_impl.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double complex cprojl(long double complex z) +{ + return cproj(z); +} +#else +long double complex cprojl(long double complex z) +{ + if (isinf(creall(z)) || isinf(cimagl(z))) + return CMPLXL(INFINITY, copysignl(0.0, cimagl(z))); + return z; +} +#endif diff --git a/src/complex/creal.c b/src/complex/creal.c new file mode 100644 index 0000000..f670304 --- /dev/null +++ b/src/complex/creal.c @@ -0,0 +1,6 @@ +#include + +double (creal)(double complex z) +{ + return creal(z); +} diff --git a/src/complex/crealf.c b/src/complex/crealf.c new file mode 100644 index 0000000..5dc3ff1 --- /dev/null +++ b/src/complex/crealf.c @@ -0,0 +1,6 @@ +#include + +float (crealf)(float complex z) +{ + return crealf(z); +} diff --git a/src/complex/creall.c b/src/complex/creall.c new file mode 100644 index 0000000..fd9dc34 --- /dev/null +++ b/src/complex/creall.c @@ -0,0 +1,6 @@ +#include + +long double (creall)(long double complex z) +{ + return creall(z); +} diff --git a/src/complex/csin.c b/src/complex/csin.c new file mode 100644 index 0000000..535c4bf --- /dev/null +++ b/src/complex/csin.c @@ -0,0 +1,9 @@ +#include "complex_impl.h" + +/* sin(z) = -i sinh(i z) */ + +double complex csin(double complex z) +{ + z = csinh(CMPLX(-cimag(z), creal(z))); + return CMPLX(cimag(z), -creal(z)); +} diff --git a/src/complex/csinf.c b/src/complex/csinf.c new file mode 100644 index 0000000..69f5164 --- /dev/null +++ b/src/complex/csinf.c @@ -0,0 +1,7 @@ +#include "complex_impl.h" + +float complex csinf(float complex z) +{ + z = csinhf(CMPLXF(-cimagf(z), crealf(z))); + return CMPLXF(cimagf(z), -crealf(z)); +} diff --git a/src/complex/csinh.c b/src/complex/csinh.c new file mode 100644 index 0000000..eda0ab5 --- /dev/null +++ b/src/complex/csinh.c @@ -0,0 +1,141 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_csinh.c */ +/*- + * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Hyperbolic sine of a complex argument z = x + i y. + * + * sinh(z) = sinh(x+iy) + * = sinh(x) cos(y) + i cosh(x) sin(y). + * + * Exceptional values are noted in the comments within the source code. + * These values and the return value were taken from n1124.pdf. + */ + +#include "complex_impl.h" + +static const double huge = 0x1p1023; + +double complex csinh(double complex z) +{ + double x, y, h; + int32_t hx, hy, ix, iy, lx, ly; + + x = creal(z); + y = cimag(z); + + EXTRACT_WORDS(hx, lx, x); + EXTRACT_WORDS(hy, ly, y); + + ix = 0x7fffffff & hx; + iy = 0x7fffffff & hy; + + /* Handle the nearly-non-exceptional cases where x and y are finite. */ + if (ix < 0x7ff00000 && iy < 0x7ff00000) { + if ((iy | ly) == 0) + return CMPLX(sinh(x), y); + if (ix < 0x40360000) /* small x: normal case */ + return CMPLX(sinh(x) * cos(y), cosh(x) * sin(y)); + + /* |x| >= 22, so cosh(x) ~= exp(|x|) */ + if (ix < 0x40862e42) { + /* x < 710: exp(|x|) won't overflow */ + h = exp(fabs(x)) * 0.5; + return CMPLX(copysign(h, x) * cos(y), h * sin(y)); + } else if (ix < 0x4096bbaa) { + /* x < 1455: scale to avoid overflow */ + z = __ldexp_cexp(CMPLX(fabs(x), y), -1); + return CMPLX(creal(z) * copysign(1, x), cimag(z)); + } else { + /* x >= 1455: the result always overflows */ + h = huge * x; + return CMPLX(h * cos(y), h * h * sin(y)); + } + } + + /* + * sinh(+-0 +- I Inf) = sign(d(+-0, dNaN))0 + I dNaN. + * The sign of 0 in the result is unspecified. Choice = normally + * the same as dNaN. Raise the invalid floating-point exception. + * + * sinh(+-0 +- I NaN) = sign(d(+-0, NaN))0 + I d(NaN). + * The sign of 0 in the result is unspecified. Choice = normally + * the same as d(NaN). + */ + if ((ix | lx) == 0 && iy >= 0x7ff00000) + return CMPLX(copysign(0, x * (y - y)), y - y); + + /* + * sinh(+-Inf +- I 0) = +-Inf + I +-0. + * + * sinh(NaN +- I 0) = d(NaN) + I +-0. + */ + if ((iy | ly) == 0 && ix >= 0x7ff00000) { + if (((hx & 0xfffff) | lx) == 0) + return CMPLX(x, y); + return CMPLX(x, copysign(0, y)); + } + + /* + * sinh(x +- I Inf) = dNaN + I dNaN. + * Raise the invalid floating-point exception for finite nonzero x. + * + * sinh(x + I NaN) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero x. Choice = don't raise (except for signaling NaNs). + */ + if (ix < 0x7ff00000 && iy >= 0x7ff00000) + return CMPLX(y - y, x * (y - y)); + + /* + * sinh(+-Inf + I NaN) = +-Inf + I d(NaN). + * The sign of Inf in the result is unspecified. Choice = normally + * the same as d(NaN). + * + * sinh(+-Inf +- I Inf) = +Inf + I dNaN. + * The sign of Inf in the result is unspecified. Choice = always +. + * Raise the invalid floating-point exception. + * + * sinh(+-Inf + I y) = +-Inf cos(y) + I Inf sin(y) + */ + if (ix >= 0x7ff00000 && ((hx & 0xfffff) | lx) == 0) { + if (iy >= 0x7ff00000) + return CMPLX(x * x, x * (y - y)); + return CMPLX(x * cos(y), INFINITY * sin(y)); + } + + /* + * sinh(NaN + I NaN) = d(NaN) + I d(NaN). + * + * sinh(NaN +- I Inf) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception. + * Choice = raise. + * + * sinh(NaN + I y) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero y. Choice = don't raise (except for signaling NaNs). + */ + return CMPLX((x * x) * (y - y), (x + x) * (y - y)); +} diff --git a/src/complex/csinhf.c b/src/complex/csinhf.c new file mode 100644 index 0000000..eb1d98c --- /dev/null +++ b/src/complex/csinhf.c @@ -0,0 +1,90 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_csinhf.c */ +/*- + * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Hyperbolic sine of a complex argument z. See s_csinh.c for details. + */ + +#include "complex_impl.h" + +static const float huge = 0x1p127; + +float complex csinhf(float complex z) +{ + float x, y, h; + int32_t hx, hy, ix, iy; + + x = crealf(z); + y = cimagf(z); + + GET_FLOAT_WORD(hx, x); + GET_FLOAT_WORD(hy, y); + + ix = 0x7fffffff & hx; + iy = 0x7fffffff & hy; + + if (ix < 0x7f800000 && iy < 0x7f800000) { + if (iy == 0) + return CMPLXF(sinhf(x), y); + if (ix < 0x41100000) /* small x: normal case */ + return CMPLXF(sinhf(x) * cosf(y), coshf(x) * sinf(y)); + + /* |x| >= 9, so cosh(x) ~= exp(|x|) */ + if (ix < 0x42b17218) { + /* x < 88.7: expf(|x|) won't overflow */ + h = expf(fabsf(x)) * 0.5f; + return CMPLXF(copysignf(h, x) * cosf(y), h * sinf(y)); + } else if (ix < 0x4340b1e7) { + /* x < 192.7: scale to avoid overflow */ + z = __ldexp_cexpf(CMPLXF(fabsf(x), y), -1); + return CMPLXF(crealf(z) * copysignf(1, x), cimagf(z)); + } else { + /* x >= 192.7: the result always overflows */ + h = huge * x; + return CMPLXF(h * cosf(y), h * h * sinf(y)); + } + } + + if (ix == 0 && iy >= 0x7f800000) + return CMPLXF(copysignf(0, x * (y - y)), y - y); + + if (iy == 0 && ix >= 0x7f800000) { + if ((hx & 0x7fffff) == 0) + return CMPLXF(x, y); + return CMPLXF(x, copysignf(0, y)); + } + + if (ix < 0x7f800000 && iy >= 0x7f800000) + return CMPLXF(y - y, x * (y - y)); + + if (ix >= 0x7f800000 && (hx & 0x7fffff) == 0) { + if (iy >= 0x7f800000) + return CMPLXF(x * x, x * (y - y)); + return CMPLXF(x * cosf(y), INFINITY * sinf(y)); + } + + return CMPLXF((x * x) * (y - y), (x + x) * (y - y)); +} diff --git a/src/complex/csinhl.c b/src/complex/csinhl.c new file mode 100644 index 0000000..09fd18f --- /dev/null +++ b/src/complex/csinhl.c @@ -0,0 +1,7 @@ +#include "complex_impl.h" + +//FIXME +long double complex csinhl(long double complex z) +{ + return csinh(z); +} diff --git a/src/complex/csinl.c b/src/complex/csinl.c new file mode 100644 index 0000000..90a4eb3 --- /dev/null +++ b/src/complex/csinl.c @@ -0,0 +1,14 @@ +#include "complex_impl.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double complex csinl(long double complex z) +{ + return csin(z); +} +#else +long double complex csinl(long double complex z) +{ + z = csinhl(CMPLXL(-cimagl(z), creall(z))); + return CMPLXL(cimagl(z), -creall(z)); +} +#endif diff --git a/src/complex/csqrt.c b/src/complex/csqrt.c new file mode 100644 index 0000000..c36de00 --- /dev/null +++ b/src/complex/csqrt.c @@ -0,0 +1,100 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_csqrt.c */ +/*- + * Copyright (c) 2007 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "complex_impl.h" + +/* + * gcc doesn't implement complex multiplication or division correctly, + * so we need to handle infinities specially. We turn on this pragma to + * notify conforming c99 compilers that the fast-but-incorrect code that + * gcc generates is acceptable, since the special cases have already been + * handled. + */ +#pragma STDC CX_LIMITED_RANGE ON + +/* We risk spurious overflow for components >= DBL_MAX / (1 + sqrt(2)). */ +#define THRESH 0x1.a827999fcef32p+1022 + +double complex csqrt(double complex z) +{ + double complex result; + double a, b; + double t; + int scale; + + a = creal(z); + b = cimag(z); + + /* Handle special cases. */ + if (z == 0) + return CMPLX(0, b); + if (isinf(b)) + return CMPLX(INFINITY, b); + if (isnan(a)) { + t = (b - b) / (b - b); /* raise invalid if b is not a NaN */ + return CMPLX(a, t); /* return NaN + NaN i */ + } + if (isinf(a)) { + /* + * csqrt(inf + NaN i) = inf + NaN i + * csqrt(inf + y i) = inf + 0 i + * csqrt(-inf + NaN i) = NaN +- inf i + * csqrt(-inf + y i) = 0 + inf i + */ + if (signbit(a)) + return CMPLX(fabs(b - b), copysign(a, b)); + else + return CMPLX(a, copysign(b - b, b)); + } + /* + * The remaining special case (b is NaN) is handled just fine by + * the normal code path below. + */ + + /* Scale to avoid overflow. */ + if (fabs(a) >= THRESH || fabs(b) >= THRESH) { + a *= 0.25; + b *= 0.25; + scale = 1; + } else { + scale = 0; + } + + /* Algorithm 312, CACM vol 10, Oct 1967. */ + if (a >= 0) { + t = sqrt((a + hypot(a, b)) * 0.5); + result = CMPLX(t, b / (2 * t)); + } else { + t = sqrt((-a + hypot(a, b)) * 0.5); + result = CMPLX(fabs(b) / (2 * t), copysign(t, b)); + } + + /* Rescale. */ + if (scale) + result *= 2; + return result; +} diff --git a/src/complex/csqrtf.c b/src/complex/csqrtf.c new file mode 100644 index 0000000..a616397 --- /dev/null +++ b/src/complex/csqrtf.c @@ -0,0 +1,82 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_csqrtf.c */ +/*- + * Copyright (c) 2007 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "complex_impl.h" + +/* + * gcc doesn't implement complex multiplication or division correctly, + * so we need to handle infinities specially. We turn on this pragma to + * notify conforming c99 compilers that the fast-but-incorrect code that + * gcc generates is acceptable, since the special cases have already been + * handled. + */ +#pragma STDC CX_LIMITED_RANGE ON + +float complex csqrtf(float complex z) +{ + float a = crealf(z), b = cimagf(z); + double t; + + /* Handle special cases. */ + if (z == 0) + return CMPLXF(0, b); + if (isinf(b)) + return CMPLXF(INFINITY, b); + if (isnan(a)) { + t = (b - b) / (b - b); /* raise invalid if b is not a NaN */ + return CMPLXF(a, t); /* return NaN + NaN i */ + } + if (isinf(a)) { + /* + * csqrtf(inf + NaN i) = inf + NaN i + * csqrtf(inf + y i) = inf + 0 i + * csqrtf(-inf + NaN i) = NaN +- inf i + * csqrtf(-inf + y i) = 0 + inf i + */ + if (signbit(a)) + return CMPLXF(fabsf(b - b), copysignf(a, b)); + else + return CMPLXF(a, copysignf(b - b, b)); + } + /* + * The remaining special case (b is NaN) is handled just fine by + * the normal code path below. + */ + + /* + * We compute t in double precision to avoid overflow and to + * provide correct rounding in nearly all cases. + * This is Algorithm 312, CACM vol 10, Oct 1967. + */ + if (a >= 0) { + t = sqrt((a + hypot(a, b)) * 0.5); + return CMPLXF(t, b / (2.0 * t)); + } else { + t = sqrt((-a + hypot(a, b)) * 0.5); + return CMPLXF(fabsf(b) / (2.0 * t), copysignf(t, b)); + } +} diff --git a/src/complex/csqrtl.c b/src/complex/csqrtl.c new file mode 100644 index 0000000..2253937 --- /dev/null +++ b/src/complex/csqrtl.c @@ -0,0 +1,7 @@ +#include "complex_impl.h" + +//FIXME +long double complex csqrtl(long double complex z) +{ + return csqrt(z); +} diff --git a/src/complex/ctan.c b/src/complex/ctan.c new file mode 100644 index 0000000..918717b --- /dev/null +++ b/src/complex/ctan.c @@ -0,0 +1,9 @@ +#include "complex_impl.h" + +/* tan(z) = -i tanh(i z) */ + +double complex ctan(double complex z) +{ + z = ctanh(CMPLX(-cimag(z), creal(z))); + return CMPLX(cimag(z), -creal(z)); +} diff --git a/src/complex/ctanf.c b/src/complex/ctanf.c new file mode 100644 index 0000000..04c3ff1 --- /dev/null +++ b/src/complex/ctanf.c @@ -0,0 +1,7 @@ +#include "complex_impl.h" + +float complex ctanf(float complex z) +{ + z = ctanhf(CMPLXF(-cimagf(z), crealf(z))); + return CMPLXF(cimagf(z), -crealf(z)); +} diff --git a/src/complex/ctanh.c b/src/complex/ctanh.c new file mode 100644 index 0000000..54004cd --- /dev/null +++ b/src/complex/ctanh.c @@ -0,0 +1,129 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_ctanh.c */ +/*- + * Copyright (c) 2011 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Hyperbolic tangent of a complex argument z = x + i y. + * + * The algorithm is from: + * + * W. Kahan. Branch Cuts for Complex Elementary Functions or Much + * Ado About Nothing's Sign Bit. In The State of the Art in + * Numerical Analysis, pp. 165 ff. Iserles and Powell, eds., 1987. + * + * Method: + * + * Let t = tan(x) + * beta = 1/cos^2(y) + * s = sinh(x) + * rho = cosh(x) + * + * We have: + * + * tanh(z) = sinh(z) / cosh(z) + * + * sinh(x) cos(y) + i cosh(x) sin(y) + * = --------------------------------- + * cosh(x) cos(y) + i sinh(x) sin(y) + * + * cosh(x) sinh(x) / cos^2(y) + i tan(y) + * = ------------------------------------- + * 1 + sinh^2(x) / cos^2(y) + * + * beta rho s + i t + * = ---------------- + * 1 + beta s^2 + * + * Modifications: + * + * I omitted the original algorithm's handling of overflow in tan(x) after + * verifying with nearpi.c that this can't happen in IEEE single or double + * precision. I also handle large x differently. + */ + +#include "complex_impl.h" + +double complex ctanh(double complex z) +{ + double x, y; + double t, beta, s, rho, denom; + uint32_t hx, ix, lx; + + x = creal(z); + y = cimag(z); + + EXTRACT_WORDS(hx, lx, x); + ix = hx & 0x7fffffff; + + /* + * ctanh(NaN + i 0) = NaN + i 0 + * + * ctanh(NaN + i y) = NaN + i NaN for y != 0 + * + * The imaginary part has the sign of x*sin(2*y), but there's no + * special effort to get this right. + * + * ctanh(+-Inf +- i Inf) = +-1 +- 0 + * + * ctanh(+-Inf + i y) = +-1 + 0 sin(2y) for y finite + * + * The imaginary part of the sign is unspecified. This special + * case is only needed to avoid a spurious invalid exception when + * y is infinite. + */ + if (ix >= 0x7ff00000) { + if ((ix & 0xfffff) | lx) /* x is NaN */ + return CMPLX(x, (y == 0 ? y : x * y)); + SET_HIGH_WORD(x, hx - 0x40000000); /* x = copysign(1, x) */ + return CMPLX(x, copysign(0, isinf(y) ? y : sin(y) * cos(y))); + } + + /* + * ctanh(+-0 + i NAN) = +-0 + i NaN + * ctanh(+-0 +- i Inf) = +-0 + i NaN + * ctanh(x + i NAN) = NaN + i NaN + * ctanh(x +- i Inf) = NaN + i NaN + */ + if (!isfinite(y)) + return CMPLX(x ? y - y : x, y - y); + + /* + * ctanh(+-huge + i +-y) ~= +-1 +- i 2sin(2y)/exp(2x), using the + * approximation sinh^2(huge) ~= exp(2*huge) / 4. + * We use a modified formula to avoid spurious overflow. + */ + if (ix >= 0x40360000) { /* x >= 22 */ + double exp_mx = exp(-fabs(x)); + return CMPLX(copysign(1, x), 4 * sin(y) * cos(y) * exp_mx * exp_mx); + } + + /* Kahan's algorithm */ + t = tan(y); + beta = 1.0 + t * t; /* = 1 / cos^2(y) */ + s = sinh(x); + rho = sqrt(1 + s * s); /* = cosh(x) */ + denom = 1 + beta * s * s; + return CMPLX((beta * rho * s) / denom, t / denom); +} diff --git a/src/complex/ctanhf.c b/src/complex/ctanhf.c new file mode 100644 index 0000000..7f422ba --- /dev/null +++ b/src/complex/ctanhf.c @@ -0,0 +1,66 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_ctanhf.c */ +/*- + * Copyright (c) 2011 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Hyperbolic tangent of a complex argument z. See s_ctanh.c for details. + */ + +#include "complex_impl.h" + +float complex ctanhf(float complex z) +{ + float x, y; + float t, beta, s, rho, denom; + uint32_t hx, ix; + + x = crealf(z); + y = cimagf(z); + + GET_FLOAT_WORD(hx, x); + ix = hx & 0x7fffffff; + + if (ix >= 0x7f800000) { + if (ix & 0x7fffff) + return CMPLXF(x, (y == 0 ? y : x * y)); + SET_FLOAT_WORD(x, hx - 0x40000000); + return CMPLXF(x, copysignf(0, isinf(y) ? y : sinf(y) * cosf(y))); + } + + if (!isfinite(y)) + return CMPLXF(ix ? y - y : x, y - y); + + if (ix >= 0x41300000) { /* x >= 11 */ + float exp_mx = expf(-fabsf(x)); + return CMPLXF(copysignf(1, x), 4 * sinf(y) * cosf(y) * exp_mx * exp_mx); + } + + t = tanf(y); + beta = 1.0 + t * t; + s = sinhf(x); + rho = sqrtf(1 + s * s); + denom = 1 + beta * s * s; + return CMPLXF((beta * rho * s) / denom, t / denom); +} diff --git a/src/complex/ctanhl.c b/src/complex/ctanhl.c new file mode 100644 index 0000000..45d5862 --- /dev/null +++ b/src/complex/ctanhl.c @@ -0,0 +1,7 @@ +#include "complex_impl.h" + +//FIXME +long double complex ctanhl(long double complex z) +{ + return ctanh(z); +} diff --git a/src/complex/ctanl.c b/src/complex/ctanl.c new file mode 100644 index 0000000..4b87420 --- /dev/null +++ b/src/complex/ctanl.c @@ -0,0 +1,14 @@ +#include "complex_impl.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double complex ctanl(long double complex z) +{ + return ctan(z); +} +#else +long double complex ctanl(long double complex z) +{ + z = ctanhl(CMPLXL(-cimagl(z), creall(z))); + return CMPLXL(cimagl(z), -creall(z)); +} +#endif diff --git a/src/ctype/isalpha.c b/src/ctype/isalpha.c deleted file mode 100644 index 14f5d86..0000000 --- a/src/ctype/isalpha.c +++ /dev/null @@ -1,10 +0,0 @@ -/* - * libc/ctype/isalpha.c - */ - -#include - -int isalpha(int c) -{ - return (((unsigned)c | 32) - 'a') < 26; -} diff --git a/src/ctype/isdigit.c b/src/ctype/isdigit.c deleted file mode 100644 index 81245d7..0000000 --- a/src/ctype/isdigit.c +++ /dev/null @@ -1,10 +0,0 @@ -/* - * libc/ctype/isdigit.c - */ - -#include - -int isdigit(int c) -{ - return ((unsigned)c - '0') < 10; -} diff --git a/src/ctype/isgraph.c b/src/ctype/isgraph.c deleted file mode 100644 index cb81299..0000000 --- a/src/ctype/isgraph.c +++ /dev/null @@ -1,10 +0,0 @@ -/* - * libc/ctype/isgraph.c - */ - -#include - -int isgraph(int c) -{ - return ((unsigned)c - 0x21) < 0x5e; -} diff --git a/src/ctype/islower.c b/src/ctype/islower.c deleted file mode 100644 index 7895f33..0000000 --- a/src/ctype/islower.c +++ /dev/null @@ -1,10 +0,0 @@ -/* - * libc/ctype/islower.c - */ - -#include - -int islower(int c) -{ - return ((unsigned)c - 'a') < 26; -} diff --git a/src/ctype/isprint.c b/src/ctype/isprint.c deleted file mode 100644 index 845a259..0000000 --- a/src/ctype/isprint.c +++ /dev/null @@ -1,10 +0,0 @@ -/* - * libc/ctype/isprint.c - */ - -#include - -int isprint(int c) -{ - return ((unsigned)c - 0x20) < 0x5f; -} diff --git a/src/ctype/isspace.c b/src/ctype/isspace.c deleted file mode 100644 index 7e3f94a..0000000 --- a/src/ctype/isspace.c +++ /dev/null @@ -1,10 +0,0 @@ -/* - * libc/ctype/isspace.c - */ - -#include - -int isspace(int c) -{ - return (c == ' ') || ((unsigned)c - '\t' < 5); -} diff --git a/src/ctype/isupper.c b/src/ctype/isupper.c deleted file mode 100644 index 8cce11a..0000000 --- a/src/ctype/isupper.c +++ /dev/null @@ -1,10 +0,0 @@ -/* - * libc/ctype/isupper.c - */ - -#include - -int isupper(int c) -{ - return ((unsigned)c - 'A') < 26; -} diff --git a/src/fenv/__flt_rounds.c b/src/fenv/__flt_rounds.c new file mode 100644 index 0000000..ec0b368 --- /dev/null +++ b/src/fenv/__flt_rounds.c @@ -0,0 +1,19 @@ +#include +#include + +int __flt_rounds() +{ + switch (fegetround()) { +#ifdef FE_TOWARDZERO + case FE_TOWARDZERO: return 0; +#endif + case FE_TONEAREST: return 1; +#ifdef FE_UPWARD + case FE_UPWARD: return 2; +#endif +#ifdef FE_DOWNWARD + case FE_DOWNWARD: return 3; +#endif + } + return -1; +} diff --git a/src/fenv/aarch64/fenv.s b/src/fenv/aarch64/fenv.s new file mode 100644 index 0000000..8f3ec96 --- /dev/null +++ b/src/fenv/aarch64/fenv.s @@ -0,0 +1,68 @@ +.global fegetround +.type fegetround,%function +fegetround: + mrs x0, fpcr + and w0, w0, #0xc00000 + ret + +.global __fesetround +.hidden __fesetround +.type __fesetround,%function +__fesetround: + mrs x1, fpcr + bic w1, w1, #0xc00000 + orr w1, w1, w0 + msr fpcr, x1 + mov w0, #0 + ret + +.global fetestexcept +.type fetestexcept,%function +fetestexcept: + and w0, w0, #0x1f + mrs x1, fpsr + and w0, w0, w1 + ret + +.global feclearexcept +.type feclearexcept,%function +feclearexcept: + and w0, w0, #0x1f + mrs x1, fpsr + bic w1, w1, w0 + msr fpsr, x1 + mov w0, #0 + ret + +.global feraiseexcept +.type feraiseexcept,%function +feraiseexcept: + and w0, w0, #0x1f + mrs x1, fpsr + orr w1, w1, w0 + msr fpsr, x1 + mov w0, #0 + ret + +.global fegetenv +.type fegetenv,%function +fegetenv: + mrs x1, fpcr + mrs x2, fpsr + stp w1, w2, [x0] + mov w0, #0 + ret + +// TODO preserve some bits +.global fesetenv +.type fesetenv,%function +fesetenv: + mov x1, #0 + mov x2, #0 + cmn x0, #1 + b.eq 1f + ldp w1, w2, [x0] +1: msr fpcr, x1 + msr fpsr, x2 + mov w0, #0 + ret diff --git a/src/fenv/arm/fenv-hf.S b/src/fenv/arm/fenv-hf.S new file mode 100644 index 0000000..2a1de0d --- /dev/null +++ b/src/fenv/arm/fenv-hf.S @@ -0,0 +1,70 @@ +#if __ARM_PCS_VFP + +.syntax unified +.fpu vfp + +.global fegetround +.type fegetround,%function +fegetround: + fmrx r0, fpscr + and r0, r0, #0xc00000 + bx lr + +.global __fesetround +.hidden __fesetround +.type __fesetround,%function +__fesetround: + fmrx r3, fpscr + bic r3, r3, #0xc00000 + orr r3, r3, r0 + fmxr fpscr, r3 + mov r0, #0 + bx lr + +.global fetestexcept +.type fetestexcept,%function +fetestexcept: + and r0, r0, #0x1f + fmrx r3, fpscr + and r0, r0, r3 + bx lr + +.global feclearexcept +.type feclearexcept,%function +feclearexcept: + and r0, r0, #0x1f + fmrx r3, fpscr + bic r3, r3, r0 + fmxr fpscr, r3 + mov r0, #0 + bx lr + +.global feraiseexcept +.type feraiseexcept,%function +feraiseexcept: + and r0, r0, #0x1f + fmrx r3, fpscr + orr r3, r3, r0 + fmxr fpscr, r3 + mov r0, #0 + bx lr + +.global fegetenv +.type fegetenv,%function +fegetenv: + fmrx r3, fpscr + str r3, [r0] + mov r0, #0 + bx lr + +.global fesetenv +.type fesetenv,%function +fesetenv: + cmn r0, #1 + moveq r3, #0 + ldrne r3, [r0] + fmxr fpscr, r3 + mov r0, #0 + bx lr + +#endif diff --git a/src/fenv/arm/fenv.c b/src/fenv/arm/fenv.c new file mode 100644 index 0000000..ad295f5 --- /dev/null +++ b/src/fenv/arm/fenv.c @@ -0,0 +1,3 @@ +#if !__ARM_PCS_VFP +#include "../fenv.c" +#endif diff --git a/src/fenv/fegetexceptflag.c b/src/fenv/fegetexceptflag.c new file mode 100644 index 0000000..bab0b44 --- /dev/null +++ b/src/fenv/fegetexceptflag.c @@ -0,0 +1,7 @@ +#include + +int fegetexceptflag(fexcept_t *fp, int mask) +{ + *fp = fetestexcept(mask); + return 0; +} diff --git a/src/fenv/feholdexcept.c b/src/fenv/feholdexcept.c new file mode 100644 index 0000000..73ff1fa --- /dev/null +++ b/src/fenv/feholdexcept.c @@ -0,0 +1,8 @@ +#include + +int feholdexcept(fenv_t *envp) +{ + fegetenv(envp); + feclearexcept(FE_ALL_EXCEPT); + return 0; +} diff --git a/src/fenv/fenv.c b/src/fenv/fenv.c new file mode 100644 index 0000000..5588dad --- /dev/null +++ b/src/fenv/fenv.c @@ -0,0 +1,38 @@ +#include + +/* Dummy functions for archs lacking fenv implementation */ + +int feclearexcept(int mask) +{ + return 0; +} + +int feraiseexcept(int mask) +{ + return 0; +} + +int fetestexcept(int mask) +{ + return 0; +} + +int fegetround(void) +{ + return FE_TONEAREST; +} + +int __fesetround(int r) +{ + return 0; +} + +int fegetenv(fenv_t *envp) +{ + return 0; +} + +int fesetenv(const fenv_t *envp) +{ + return 0; +} diff --git a/src/fenv/fesetexceptflag.c b/src/fenv/fesetexceptflag.c new file mode 100644 index 0000000..af5f102 --- /dev/null +++ b/src/fenv/fesetexceptflag.c @@ -0,0 +1,8 @@ +#include + +int fesetexceptflag(const fexcept_t *fp, int mask) +{ + feclearexcept(~*fp & mask); + feraiseexcept(*fp & mask); + return 0; +} diff --git a/src/fenv/fesetround.c b/src/fenv/fesetround.c new file mode 100644 index 0000000..47a0418 --- /dev/null +++ b/src/fenv/fesetround.c @@ -0,0 +1,23 @@ +#include +#include + +/* __fesetround wrapper for arch independent argument check */ + +_hidden int __fesetround(int); + +int fesetround(int r) +{ + if (r != FE_TONEAREST +#ifdef FE_DOWNWARD + && r != FE_DOWNWARD +#endif +#ifdef FE_UPWARD + && r != FE_UPWARD +#endif +#ifdef FE_TOWARDZERO + && r != FE_TOWARDZERO +#endif + ) + return -1; + return __fesetround(r); +} diff --git a/src/fenv/feupdateenv.c b/src/fenv/feupdateenv.c new file mode 100644 index 0000000..50cef8e --- /dev/null +++ b/src/fenv/feupdateenv.c @@ -0,0 +1,9 @@ +#include + +int feupdateenv(const fenv_t *envp) +{ + int ex = fetestexcept(FE_ALL_EXCEPT); + fesetenv(envp); + feraiseexcept(ex); + return 0; +} diff --git a/src/fenv/i386/fenv.s b/src/fenv/i386/fenv.s new file mode 100644 index 0000000..e7f7932 --- /dev/null +++ b/src/fenv/i386/fenv.s @@ -0,0 +1,164 @@ +.hidden __hwcap + +.global feclearexcept +.type feclearexcept,@function +feclearexcept: + mov 4(%esp),%ecx + and $0x3f,%ecx + fnstsw %ax + # consider sse fenv as well if the cpu has XMM capability + call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jz 2f + # maintain exceptions in the sse mxcsr, clear x87 exceptions + test %eax,%ecx + jz 1f + fnclex +1: push %edx + stmxcsr (%esp) + pop %edx + and $0x3f,%eax + or %eax,%edx + test %edx,%ecx + jz 1f + not %ecx + and %ecx,%edx + push %edx + ldmxcsr (%esp) + pop %edx +1: xor %eax,%eax + ret + # only do the expensive x87 fenv load/store when needed +2: test %eax,%ecx + jz 1b + not %ecx + and %ecx,%eax + test $0x3f,%eax + jz 1f + fnclex + jmp 1b +1: sub $32,%esp + fnstenv (%esp) + mov %al,4(%esp) + fldenv (%esp) + add $32,%esp + xor %eax,%eax + ret + +.global feraiseexcept +.type feraiseexcept,@function +feraiseexcept: + mov 4(%esp),%eax + and $0x3f,%eax + sub $32,%esp + fnstenv (%esp) + or %al,4(%esp) + fldenv (%esp) + add $32,%esp + xor %eax,%eax + ret + +.global __fesetround +.hidden __fesetround +.type __fesetround,@function +__fesetround: + mov 4(%esp),%ecx + push %eax + xor %eax,%eax + fnstcw (%esp) + andb $0xf3,1(%esp) + or %ch,1(%esp) + fldcw (%esp) + # consider sse fenv as well if the cpu has XMM capability + call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jz 1f + stmxcsr (%esp) + shl $3,%ch + andb $0x9f,1(%esp) + or %ch,1(%esp) + ldmxcsr (%esp) +1: pop %ecx + ret + +.global fegetround +.type fegetround,@function +fegetround: + push %eax + fnstcw (%esp) + pop %eax + and $0xc00,%eax + ret + +.global fegetenv +.type fegetenv,@function +fegetenv: + mov 4(%esp),%ecx + xor %eax,%eax + fnstenv (%ecx) + # consider sse fenv as well if the cpu has XMM capability + call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jz 1f + push %eax + stmxcsr (%esp) + pop %edx + and $0x3f,%edx + or %edx,4(%ecx) +1: ret + +.global fesetenv +.type fesetenv,@function +fesetenv: + mov 4(%esp),%ecx + xor %eax,%eax + inc %ecx + jz 1f + fldenv -1(%ecx) + movl -1(%ecx),%ecx + jmp 2f +1: push %eax + push %eax + push %eax + push %eax + pushl $0xffff + push %eax + pushl $0x37f + fldenv (%esp) + add $28,%esp + # consider sse fenv as well if the cpu has XMM capability +2: call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jz 1f + # mxcsr := same rounding mode, cleared exceptions, default mask + and $0xc00,%ecx + shl $3,%ecx + or $0x1f80,%ecx + mov %ecx,4(%esp) + ldmxcsr 4(%esp) +1: ret + +.global fetestexcept +.type fetestexcept,@function +fetestexcept: + mov 4(%esp),%ecx + and $0x3f,%ecx + fnstsw %ax + # consider sse fenv as well if the cpu has XMM capability + call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jz 1f + stmxcsr 4(%esp) + or 4(%esp),%eax +1: and %ecx,%eax + ret diff --git a/src/fenv/riscv64/fenv-sf.c b/src/fenv/riscv64/fenv-sf.c new file mode 100644 index 0000000..ecd3cb5 --- /dev/null +++ b/src/fenv/riscv64/fenv-sf.c @@ -0,0 +1,3 @@ +#ifndef __riscv_flen +#include "../fenv.c" +#endif diff --git a/src/fenv/riscv64/fenv.S b/src/fenv/riscv64/fenv.S new file mode 100644 index 0000000..0ea78bf --- /dev/null +++ b/src/fenv/riscv64/fenv.S @@ -0,0 +1,56 @@ +#ifdef __riscv_flen + +.global feclearexcept +.type feclearexcept, %function +feclearexcept: + csrc fflags, a0 + li a0, 0 + ret + +.global feraiseexcept +.type feraiseexcept, %function +feraiseexcept: + csrs fflags, a0 + li a0, 0 + ret + +.global fetestexcept +.type fetestexcept, %function +fetestexcept: + frflags t0 + and a0, t0, a0 + ret + +.global fegetround +.type fegetround, %function +fegetround: + frrm a0 + ret + +.global __fesetround +.type __fesetround, %function +__fesetround: + fsrm t0, a0 + li a0, 0 + ret + +.global fegetenv +.type fegetenv, %function +fegetenv: + frcsr t0 + sw t0, 0(a0) + li a0, 0 + ret + +.global fesetenv +.type fesetenv, %function +fesetenv: + li t2, -1 + li t1, 0 + beq a0, t2, 1f + lw t1, 0(a0) +1: fscsr t1 + li a0, 0 + ret + +#endif diff --git a/src/fenv/x86-64/fenv.s b/src/fenv/x86-64/fenv.s new file mode 100644 index 0000000..98d876d --- /dev/null +++ b/src/fenv/x86-64/fenv.s @@ -0,0 +1,98 @@ +.global feclearexcept +.type feclearexcept,@function +feclearexcept: + # maintain exceptions in the sse mxcsr, clear x87 exceptions + mov %edi,%ecx + and $0x3f,%ecx + fnstsw %ax + test %eax,%ecx + jz 1f + fnclex +1: stmxcsr -8(%rsp) + and $0x3f,%eax + or %eax,-8(%rsp) + test %ecx,-8(%rsp) + jz 1f + not %ecx + and %ecx,-8(%rsp) + ldmxcsr -8(%rsp) +1: xor %eax,%eax + ret + +.global feraiseexcept +.type feraiseexcept,@function +feraiseexcept: + and $0x3f,%edi + stmxcsr -8(%rsp) + or %edi,-8(%rsp) + ldmxcsr -8(%rsp) + xor %eax,%eax + ret + +.global __fesetround +.hidden __fesetround +.type __fesetround,@function +__fesetround: + push %rax + xor %eax,%eax + mov %edi,%ecx + fnstcw (%rsp) + andb $0xf3,1(%rsp) + or %ch,1(%rsp) + fldcw (%rsp) + stmxcsr (%rsp) + shl $3,%ch + andb $0x9f,1(%rsp) + or %ch,1(%rsp) + ldmxcsr (%rsp) + pop %rcx + ret + +.global fegetround +.type fegetround,@function +fegetround: + push %rax + stmxcsr (%rsp) + pop %rax + shr $3,%eax + and $0xc00,%eax + ret + +.global fegetenv +.type fegetenv,@function +fegetenv: + xor %eax,%eax + fnstenv (%rdi) + stmxcsr 28(%rdi) + ret + +.global fesetenv +.type fesetenv,@function +fesetenv: + xor %eax,%eax + inc %rdi + jz 1f + fldenv -1(%rdi) + ldmxcsr 27(%rdi) + ret +1: push %rax + push %rax + pushq $0xffff + pushq $0x37f + fldenv (%rsp) + pushq $0x1f80 + ldmxcsr (%rsp) + add $40,%rsp + ret + +.global fetestexcept +.type fetestexcept,@function +fetestexcept: + and $0x3f,%edi + push %rax + stmxcsr (%rsp) + pop %rsi + fnstsw %ax + or %esi,%eax + and %edi,%eax + ret diff --git a/src/include/bits/alltypes.h.in b/src/include/bits/alltypes.h.in new file mode 100644 index 0000000..d47aeea --- /dev/null +++ b/src/include/bits/alltypes.h.in @@ -0,0 +1,95 @@ +#define __LITTLE_ENDIAN 1234 +#define __BIG_ENDIAN 4321 +#define __USE_TIME_BITS64 1 + +TYPEDEF unsigned _Addr size_t; +TYPEDEF unsigned _Addr uintptr_t; +TYPEDEF _Addr ptrdiff_t; +TYPEDEF _Addr ssize_t; +TYPEDEF _Addr intptr_t; +TYPEDEF _Addr regoff_t; +TYPEDEF _Reg register_t; +TYPEDEF _Int64 time_t; +TYPEDEF _Int64 suseconds_t; + +TYPEDEF signed char int8_t; +TYPEDEF signed short int16_t; +TYPEDEF signed int int32_t; +TYPEDEF signed _Int64 int64_t; +TYPEDEF signed _Int64 intmax_t; +TYPEDEF unsigned char uint8_t; +TYPEDEF unsigned short uint16_t; +TYPEDEF unsigned int uint32_t; +TYPEDEF unsigned _Int64 uint64_t; +TYPEDEF unsigned _Int64 u_int64_t; +TYPEDEF unsigned _Int64 uintmax_t; + +TYPEDEF unsigned mode_t; +TYPEDEF unsigned _Reg nlink_t; +TYPEDEF _Int64 off_t; +TYPEDEF unsigned _Int64 ino_t; +TYPEDEF unsigned _Int64 dev_t; +TYPEDEF long blksize_t; +TYPEDEF _Int64 blkcnt_t; +TYPEDEF unsigned _Int64 fsblkcnt_t; +TYPEDEF unsigned _Int64 fsfilcnt_t; + +TYPEDEF unsigned wint_t; +TYPEDEF unsigned long wctype_t; + +TYPEDEF void * timer_t; +TYPEDEF int clockid_t; +TYPEDEF long clock_t; +STRUCT timeval { time_t tv_sec; suseconds_t tv_usec; }; +STRUCT timespec { time_t tv_sec; int :8*(sizeof(time_t)-sizeof(long))*(__BYTE_ORDER==4321); long tv_nsec; int :8*(sizeof(time_t)-sizeof(long))*(__BYTE_ORDER!=4321); }; + +TYPEDEF int pid_t; +TYPEDEF unsigned id_t; +TYPEDEF unsigned uid_t; +TYPEDEF unsigned gid_t; +TYPEDEF int key_t; +TYPEDEF unsigned useconds_t; + +#ifdef __cplusplus +TYPEDEF unsigned long pthread_t; +#else +TYPEDEF struct __pthread * pthread_t; +#endif +TYPEDEF int pthread_once_t; +TYPEDEF unsigned pthread_key_t; +TYPEDEF int pthread_spinlock_t; +TYPEDEF struct { unsigned __attr; } pthread_mutexattr_t; +TYPEDEF struct { unsigned __attr; } pthread_condattr_t; +TYPEDEF struct { unsigned __attr; } pthread_barrierattr_t; +TYPEDEF struct { unsigned __attr[2]; } pthread_rwlockattr_t; + +STRUCT _IO_FILE { char __x; }; +TYPEDEF struct _IO_FILE FILE; + +TYPEDEF __builtin_va_list va_list; +TYPEDEF __builtin_va_list __isoc_va_list; + +TYPEDEF struct __mbstate_t { unsigned __opaque1, __opaque2; } mbstate_t; + +TYPEDEF struct __locale_struct * locale_t; + +TYPEDEF struct __sigset_t { unsigned long __bits[128/sizeof(long)]; } sigset_t; + +STRUCT iovec { void *iov_base; size_t iov_len; }; + +STRUCT winsize { unsigned short ws_row, ws_col, ws_xpixel, ws_ypixel; }; + +TYPEDEF unsigned socklen_t; +TYPEDEF unsigned short sa_family_t; + +TYPEDEF struct { union { int __i[sizeof(long)==8?14:9]; volatile int __vi[sizeof(long)==8?14:9]; unsigned long __s[sizeof(long)==8?7:9]; } __u; } pthread_attr_t; +TYPEDEF struct { union { int __i[sizeof(long)==8?10:6]; volatile int __vi[sizeof(long)==8?10:6]; volatile void *volatile __p[sizeof(long)==8?5:6]; } __u; } pthread_mutex_t; +TYPEDEF struct { union { int __i[sizeof(long)==8?10:6]; volatile int __vi[sizeof(long)==8?10:6]; volatile void *volatile __p[sizeof(long)==8?5:6]; } __u; } mtx_t; +TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12*sizeof(int)/sizeof(void*)]; } __u; } pthread_cond_t; +TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12*sizeof(int)/sizeof(void*)]; } __u; } cnd_t; +TYPEDEF struct { union { int __i[sizeof(long)==8?14:8]; volatile int __vi[sizeof(long)==8?14:8]; void *__p[sizeof(long)==8?7:8]; } __u; } pthread_rwlock_t; +TYPEDEF struct { union { int __i[sizeof(long)==8?8:5]; volatile int __vi[sizeof(long)==8?8:5]; void *__p[sizeof(long)==8?4:5]; } __u; } pthread_barrier_t; + +#undef _Addr +#undef _Int64 +#undef _Reg diff --git a/src/include/complex.h b/src/include/complex.h new file mode 100644 index 0000000..a21b497 --- /dev/null +++ b/src/include/complex.h @@ -0,0 +1,132 @@ +#ifndef _COMPLEX_H +#define _COMPLEX_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define complex _Complex +#ifdef __GNUC__ +#define _Complex_I (__extension__ (0.0f+1.0fi)) +#else +#define _Complex_I (0.0f+1.0fi) +#endif +#define I _Complex_I + +double complex cacos(double complex); +float complex cacosf(float complex); +long double complex cacosl(long double complex); + +double complex casin(double complex); +float complex casinf(float complex); +long double complex casinl(long double complex); + +double complex catan(double complex); +float complex catanf(float complex); +long double complex catanl(long double complex); + +double complex ccos(double complex); +float complex ccosf(float complex); +long double complex ccosl(long double complex); + +double complex csin(double complex); +float complex csinf(float complex); +long double complex csinl(long double complex); + +double complex ctan(double complex); +float complex ctanf(float complex); +long double complex ctanl(long double complex); + +double complex cacosh(double complex); +float complex cacoshf(float complex); +long double complex cacoshl(long double complex); + +double complex casinh(double complex); +float complex casinhf(float complex); +long double complex casinhl(long double complex); + +double complex catanh(double complex); +float complex catanhf(float complex); +long double complex catanhl(long double complex); + +double complex ccosh(double complex); +float complex ccoshf(float complex); +long double complex ccoshl(long double complex); + +double complex csinh(double complex); +float complex csinhf(float complex); +long double complex csinhl(long double complex); + +double complex ctanh(double complex); +float complex ctanhf(float complex); +long double complex ctanhl(long double complex); + +double complex cexp(double complex); +float complex cexpf(float complex); +long double complex cexpl(long double complex); + +double complex clog(double complex); +float complex clogf(float complex); +long double complex clogl(long double complex); + +double cabs(double complex); +float cabsf(float complex); +long double cabsl(long double complex); + +double complex cpow(double complex, double complex); +float complex cpowf(float complex, float complex); +long double complex cpowl(long double complex, long double complex); + +double complex csqrt(double complex); +float complex csqrtf(float complex); +long double complex csqrtl(long double complex); + +double carg(double complex); +float cargf(float complex); +long double cargl(long double complex); + +double cimag(double complex); +float cimagf(float complex); +long double cimagl(long double complex); + +double complex conj(double complex); +float complex conjf(float complex); +long double complex conjl(long double complex); + +double complex cproj(double complex); +float complex cprojf(float complex); +long double complex cprojl(long double complex); + +double creal(double complex); +float crealf(float complex); +long double creall(long double complex); + +#ifndef __cplusplus +#define __CIMAG(x, t) (+(union { _Complex t __z; t __xy[2]; }){(_Complex t)(x)}.__xy[1]) + +#define creal(x) ((double)(x)) +#define crealf(x) ((float)(x)) +#define creall(x) ((long double)(x)) + +#define cimag(x) __CIMAG(x, double) +#define cimagf(x) __CIMAG(x, float) +#define cimagl(x) __CIMAG(x, long double) +#endif + +#if __STDC_VERSION__ >= 201112L +#if defined(_Imaginary_I) +#define __CMPLX(x, y, t) ((t)(x) + _Imaginary_I*(t)(y)) +#elif defined(__clang__) +#define __CMPLX(x, y, t) (+(_Complex t){ (t)(x), (t)(y) }) +#else +#define __CMPLX(x, y, t) (__builtin_complex((t)(x), (t)(y))) +#endif +#define CMPLX(x, y) __CMPLX(x, y, double) +#define CMPLXF(x, y) __CMPLX(x, y, float) +#define CMPLXL(x, y) __CMPLX(x, y, long double) +#endif + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/include/ctype.h b/src/include/ctype.h index 51631c9..32bcef4 100644 --- a/src/include/ctype.h +++ b/src/include/ctype.h @@ -22,6 +22,54 @@ int isxdigit(int); int tolower(int); int toupper(int); +#ifndef __cplusplus +static __inline int __isspace(int _c) +{ + return _c == ' ' || (unsigned)_c-'\t' < 5; +} + +#define isalpha(a) (0 ? isalpha(a) : (((unsigned)(a)|32)-'a') < 26) +#define isdigit(a) (0 ? isdigit(a) : ((unsigned)(a)-'0') < 10) +#define islower(a) (0 ? islower(a) : ((unsigned)(a)-'a') < 26) +#define isupper(a) (0 ? isupper(a) : ((unsigned)(a)-'A') < 26) +#define isprint(a) (0 ? isprint(a) : ((unsigned)(a)-0x20) < 0x5f) +#define isgraph(a) (0 ? isgraph(a) : ((unsigned)(a)-0x21) < 0x5e) +#define isspace(a) __isspace(a) +#endif + + +#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \ + || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) \ + || defined(_BSD_SOURCE) + +#define __NEED_locale_t +#include + +int isalnum_l(int, locale_t); +int isalpha_l(int, locale_t); +int isblank_l(int, locale_t); +int iscntrl_l(int, locale_t); +int isdigit_l(int, locale_t); +int isgraph_l(int, locale_t); +int islower_l(int, locale_t); +int isprint_l(int, locale_t); +int ispunct_l(int, locale_t); +int isspace_l(int, locale_t); +int isupper_l(int, locale_t); +int isxdigit_l(int, locale_t); +int tolower_l(int, locale_t); +int toupper_l(int, locale_t); + +int isascii(int); +int toascii(int); +#define _tolower(a) ((a)|0x20) +#define _toupper(a) ((a)&0x5f) +#ifndef __cplusplus +#define isascii(a) (0 ? isascii(a) : (unsigned)(a) < 128) +#endif + +#endif + #ifdef __cplusplus } #endif diff --git a/src/include/features.h b/src/include/features.h index c108bb9..33ea87c 100644 --- a/src/include/features.h +++ b/src/include/features.h @@ -22,4 +22,8 @@ #define __REDIR(x,y) __typeof__(x) x __asm__(#y) +#define _weak __attribute__((__weak__)) +#define _hidden __attribute__((__visibility__("hidden"))) +#define _weak_alias(old, new) extern __typeof(old) new __attribute__((__weak__, __alias__(#old))) + #endif diff --git a/src/include/fenv.h b/src/include/fenv.h new file mode 100644 index 0000000..05de990 --- /dev/null +++ b/src/include/fenv.h @@ -0,0 +1,28 @@ +#ifndef _FENV_H +#define _FENV_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +int feclearexcept(int); +int fegetexceptflag(fexcept_t *, int); +int feraiseexcept(int); +int fesetexceptflag(const fexcept_t *, int); +int fetestexcept(int); + +int fegetround(void); +int fesetround(int); + +int fegetenv(fenv_t *); +int feholdexcept(fenv_t *); +int fesetenv(const fenv_t *); +int feupdateenv(const fenv_t *); + +#ifdef __cplusplus +} +#endif +#endif + diff --git a/src/include/float.h b/src/include/float.h new file mode 100644 index 0000000..713aadb --- /dev/null +++ b/src/include/float.h @@ -0,0 +1,52 @@ +#ifndef _FLOAT_H +#define _FLOAT_H + +#ifdef __cplusplus +extern "C" { +#endif + +int __flt_rounds(void); +#define FLT_ROUNDS (__flt_rounds()) + +#define FLT_RADIX 2 + +#define FLT_TRUE_MIN 1.40129846432481707092e-45F +#define FLT_MIN 1.17549435082228750797e-38F +#define FLT_MAX 3.40282346638528859812e+38F +#define FLT_EPSILON 1.1920928955078125e-07F + +#define FLT_MANT_DIG 24 +#define FLT_MIN_EXP (-125) +#define FLT_MAX_EXP 128 +#define FLT_HAS_SUBNORM 1 + +#define FLT_DIG 6 +#define FLT_DECIMAL_DIG 9 +#define FLT_MIN_10_EXP (-37) +#define FLT_MAX_10_EXP 38 + +#define DBL_TRUE_MIN 4.94065645841246544177e-324 +#define DBL_MIN 2.22507385850720138309e-308 +#define DBL_MAX 1.79769313486231570815e+308 +#define DBL_EPSILON 2.22044604925031308085e-16 + +#define DBL_MANT_DIG 53 +#define DBL_MIN_EXP (-1021) +#define DBL_MAX_EXP 1024 +#define DBL_HAS_SUBNORM 1 + +#define DBL_DIG 15 +#define DBL_DECIMAL_DIG 17 +#define DBL_MIN_10_EXP (-307) +#define DBL_MAX_10_EXP 308 + +#define LDBL_HAS_SUBNORM 1 +#define LDBL_DECIMAL_DIG DECIMAL_DIG + +#include + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/include/fnmatch.h b/src/include/fnmatch.h new file mode 100644 index 0000000..f959321 --- /dev/null +++ b/src/include/fnmatch.h @@ -0,0 +1,24 @@ +#ifndef _FNMATCH_H +#define _FNMATCH_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define FNM_PATHNAME 0x1 +#define FNM_NOESCAPE 0x2 +#define FNM_PERIOD 0x4 +#define FNM_LEADING_DIR 0x8 +#define FNM_CASEFOLD 0x10 +#define FNM_FILE_NAME FNM_PATHNAME + +#define FNM_NOMATCH 1 +#define FNM_NOSYS (-1) + +int fnmatch(const char *, const char *, int); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/include/iso646.h b/src/include/iso646.h new file mode 100644 index 0000000..88ff53d --- /dev/null +++ b/src/include/iso646.h @@ -0,0 +1,20 @@ +#ifndef _ISO646_H +#define _ISO646_H + +#ifndef __cplusplus + +#define and && +#define and_eq &= +#define bitand & +#define bitor | +#define compl ~ +#define not ! +#define not_eq != +#define or || +#define or_eq |= +#define xor ^ +#define xor_eq ^= + +#endif + +#endif diff --git a/src/include/limits.h b/src/include/limits.h new file mode 100644 index 0000000..53a27b9 --- /dev/null +++ b/src/include/limits.h @@ -0,0 +1,166 @@ +#ifndef _LIMITS_H +#define _LIMITS_H + +#include + +#include /* __LONG_MAX */ + +/* Support signed or unsigned plain-char */ + +#if '\xff' > 0 +#define CHAR_MIN 0 +#define CHAR_MAX 255 +#else +#define CHAR_MIN (-128) +#define CHAR_MAX 127 +#endif + +#define CHAR_BIT 8 +#define SCHAR_MIN (-128) +#define SCHAR_MAX 127 +#define UCHAR_MAX 255 +#define SHRT_MIN (-1-0x7fff) +#define SHRT_MAX 0x7fff +#define USHRT_MAX 0xffff +#define INT_MIN (-1-0x7fffffff) +#define INT_MAX 0x7fffffff +#define UINT_MAX 0xffffffffU +#define LONG_MIN (-LONG_MAX-1) +#define LONG_MAX __LONG_MAX +#define ULONG_MAX (2UL*LONG_MAX+1) +#define LLONG_MIN (-LLONG_MAX-1) +#define LLONG_MAX 0x7fffffffffffffffLL +#define ULLONG_MAX (2ULL*LLONG_MAX+1) + +#define MB_LEN_MAX 4 + +#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \ + || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE) + +#include + +#define PIPE_BUF 4096 +#define FILESIZEBITS 64 +#ifndef NAME_MAX +#define NAME_MAX 255 +#endif +#define PATH_MAX 4096 +#define NGROUPS_MAX 32 +#define ARG_MAX 131072 +#define IOV_MAX 1024 +#define SYMLOOP_MAX 40 +#define WORD_BIT 32 +#define SSIZE_MAX LONG_MAX +#define TZNAME_MAX 6 +#define TTY_NAME_MAX 32 +#define HOST_NAME_MAX 255 + +#if LONG_MAX == 0x7fffffffL +#define LONG_BIT 32 +#else +#define LONG_BIT 64 +#endif + +/* Implementation choices... */ + +#define PTHREAD_KEYS_MAX 128 +#define PTHREAD_STACK_MIN 2048 +#define PTHREAD_DESTRUCTOR_ITERATIONS 4 +#define SEM_VALUE_MAX 0x7fffffff +#define SEM_NSEMS_MAX 256 +#define DELAYTIMER_MAX 0x7fffffff +#define MQ_PRIO_MAX 32768 +#define LOGIN_NAME_MAX 256 + +/* Arbitrary numbers... */ + +#define BC_BASE_MAX 99 +#define BC_DIM_MAX 2048 +#define BC_SCALE_MAX 99 +#define BC_STRING_MAX 1000 +#define CHARCLASS_NAME_MAX 14 +#define COLL_WEIGHTS_MAX 2 +#define EXPR_NEST_MAX 32 +#define LINE_MAX 4096 +#define RE_DUP_MAX 255 + +#define NL_ARGMAX 9 +#define NL_MSGMAX 32767 +#define NL_SETMAX 255 +#define NL_TEXTMAX 2048 + +#endif + +#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) || defined(_XOPEN_SOURCE) + +#ifdef PAGESIZE +#define PAGE_SIZE PAGESIZE +#endif +#define NZERO 20 +#define NL_LANGMAX 32 + +#endif + +#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) \ + || (defined(_XOPEN_SOURCE) && _XOPEN_SOURCE+0 < 700) + +#define NL_NMAX 16 + +#endif + +/* POSIX/SUS requirements follow. These numbers come directly + * from SUS and have nothing to do with the host system. */ + +#define _POSIX_AIO_LISTIO_MAX 2 +#define _POSIX_AIO_MAX 1 +#define _POSIX_ARG_MAX 4096 +#define _POSIX_CHILD_MAX 25 +#define _POSIX_CLOCKRES_MIN 20000000 +#define _POSIX_DELAYTIMER_MAX 32 +#define _POSIX_HOST_NAME_MAX 255 +#define _POSIX_LINK_MAX 8 +#define _POSIX_LOGIN_NAME_MAX 9 +#define _POSIX_MAX_CANON 255 +#define _POSIX_MAX_INPUT 255 +#define _POSIX_MQ_OPEN_MAX 8 +#define _POSIX_MQ_PRIO_MAX 32 +#define _POSIX_NAME_MAX 14 +#define _POSIX_NGROUPS_MAX 8 +#define _POSIX_OPEN_MAX 20 +#define _POSIX_PATH_MAX 256 +#define _POSIX_PIPE_BUF 512 +#define _POSIX_RE_DUP_MAX 255 +#define _POSIX_RTSIG_MAX 8 +#define _POSIX_SEM_NSEMS_MAX 256 +#define _POSIX_SEM_VALUE_MAX 32767 +#define _POSIX_SIGQUEUE_MAX 32 +#define _POSIX_SSIZE_MAX 32767 +#define _POSIX_STREAM_MAX 8 +#define _POSIX_SS_REPL_MAX 4 +#define _POSIX_SYMLINK_MAX 255 +#define _POSIX_SYMLOOP_MAX 8 +#define _POSIX_THREAD_DESTRUCTOR_ITERATIONS 4 +#define _POSIX_THREAD_KEYS_MAX 128 +#define _POSIX_THREAD_THREADS_MAX 64 +#define _POSIX_TIMER_MAX 32 +#define _POSIX_TRACE_EVENT_NAME_MAX 30 +#define _POSIX_TRACE_NAME_MAX 8 +#define _POSIX_TRACE_SYS_MAX 8 +#define _POSIX_TRACE_USER_EVENT_MAX 32 +#define _POSIX_TTY_NAME_MAX 9 +#define _POSIX_TZNAME_MAX 6 +#define _POSIX2_BC_BASE_MAX 99 +#define _POSIX2_BC_DIM_MAX 2048 +#define _POSIX2_BC_SCALE_MAX 99 +#define _POSIX2_BC_STRING_MAX 1000 +#define _POSIX2_CHARCLASS_NAME_MAX 14 +#define _POSIX2_COLL_WEIGHTS_MAX 2 +#define _POSIX2_EXPR_NEST_MAX 32 +#define _POSIX2_LINE_MAX 2048 +#define _POSIX2_RE_DUP_MAX 255 + +#define _XOPEN_IOV_MAX 16 +#define _XOPEN_NAME_MAX 255 +#define _XOPEN_PATH_MAX 1024 + +#endif diff --git a/src/include/math.h b/src/include/math.h index 443bf99..a890f26 100644 --- a/src/include/math.h +++ b/src/include/math.h @@ -166,6 +166,15 @@ static __inline unsigned long long __DOUBLE_BITS(double __f) #define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ #define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ +int __signbit(double); +int __signbitf(float); +int __signbitl(long double); + +#define signbit(x) ( \ + sizeof(x) == sizeof(float) ? (int)(__FLOAT_BITS(x)>>31) : \ + sizeof(x) == sizeof(double) ? (int)(__DOUBLE_BITS(x)>>63) : \ + __signbitl(x) ) + double acos(double); float acosf(float); double acosh(double); @@ -176,8 +185,10 @@ double asinh(double); float asinhf(float); double atan(double); float atanf(float); +long double atanl(long double); double atan2(double, double); float atan2f(float, float); +long double atan2l(long double, long double); double atanh(double); float atanhf(float); double cbrt(double); @@ -196,6 +207,7 @@ double expm1(double); float expm1f(float); double fabs(double); float fabsf(float); +long double fabsl(long double); double fdim(double, double); float fdimf(float, float); double floor(double); @@ -204,11 +216,14 @@ double fmod(double, double); float fmodf(float, float); double frexp(double, int *); float frexpf(float, int *); +long double frexpl(long double, int *); double hypot(double, double); float hypotf(float, float); +long double hypotl(long double, long double); double ldexp(double, int); float ldexpf(float, int); double log(double); +long double logl(long double); float logf(float); double log10(double); float log10f(float); @@ -238,6 +253,7 @@ double sinh(double); float sinhf(float); double sqrt(double); float sqrtf(float); +long double sqrtl(long double); double tan(double); float tanf(float); double tanh(double); @@ -250,6 +266,7 @@ double fmin(double, double); float fminf(float, float); float copysignf(float x, float y); double copysign(double x, double y); +long double copysignl(long double, long double); /* * libm kernel functions diff --git a/src/internal/complex_impl.h b/src/internal/complex_impl.h new file mode 100644 index 0000000..1de5965 --- /dev/null +++ b/src/internal/complex_impl.h @@ -0,0 +1,21 @@ +#ifndef _COMPLEX_IMPL_H +#define _COMPLEX_IMPL_H + +#include +#include "libm.h" + +#undef __CMPLX +#undef CMPLX +#undef CMPLXF +#undef CMPLXL + +#define __CMPLX(x, y, t) ((union { _Complex t __z; t __xy[2]; }){.__xy = {(x),(y)}}.__z) + +#define CMPLX(x, y) __CMPLX(x, y, double) +#define CMPLXF(x, y) __CMPLX(x, y, float) +#define CMPLXL(x, y) __CMPLX(x, y, long double) + +_hidden double complex __ldexp_cexp(double complex,int); +_hidden float complex __ldexp_cexpf(float complex,int); + +#endif diff --git a/src/internal/libm.h b/src/internal/libm.h index 1c44964..554c58f 100644 --- a/src/internal/libm.h +++ b/src/internal/libm.h @@ -58,4 +58,19 @@ union ldshape { #error Unsupported long double representation #endif +#ifdef __GNUC__ +#define predict_true(x) __builtin_expect(!!(x), 1) +#define predict_false(x) __builtin_expect(x, 0) +#else +#define predict_true(x) (x) +#define predict_false(x) (x) +#endif + +_hidden long double __polevll(long double, const long double *, int); +_hidden long double __p1evll(long double, const long double *, int); + +#if LDBL_MANT_DIG != DBL_MANT_DIG +_hidden long double __math_invalidl(long double); +#endif + #endif diff --git a/src/math/__invtrigl.c b/src/math/__invtrigl.c new file mode 100644 index 0000000..971e727 --- /dev/null +++ b/src/math/__invtrigl.c @@ -0,0 +1,63 @@ +#include +#include "__invtrigl.h" + +#if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 +static const long double +pS0 = 1.66666666666666666631e-01L, +pS1 = -4.16313987993683104320e-01L, +pS2 = 3.69068046323246813704e-01L, +pS3 = -1.36213932016738603108e-01L, +pS4 = 1.78324189708471965733e-02L, +pS5 = -2.19216428382605211588e-04L, +pS6 = -7.10526623669075243183e-06L, +qS1 = -2.94788392796209867269e+00L, +qS2 = 3.27309890266528636716e+00L, +qS3 = -1.68285799854822427013e+00L, +qS4 = 3.90699412641738801874e-01L, +qS5 = -3.14365703596053263322e-02L; + +const long double pio2_hi = 1.57079632679489661926L; +const long double pio2_lo = -2.50827880633416601173e-20L; + +/* used in asinl() and acosl() */ +/* R(x^2) is a rational approximation of (asin(x)-x)/x^3 with Remez algorithm */ +long double __invtrigl_R(long double z) +{ + long double p, q; + p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * (pS5 + z * pS6)))))); + q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * (qS4 + z * qS5)))); + return p / q; +} +#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 +static const long double +pS0 = 1.66666666666666666666666666666700314e-01L, +pS1 = -7.32816946414566252574527475428622708e-01L, +pS2 = 1.34215708714992334609030036562143589e+00L, +pS3 = -1.32483151677116409805070261790752040e+00L, +pS4 = 7.61206183613632558824485341162121989e-01L, +pS5 = -2.56165783329023486777386833928147375e-01L, +pS6 = 4.80718586374448793411019434585413855e-02L, +pS7 = -4.42523267167024279410230886239774718e-03L, +pS8 = 1.44551535183911458253205638280410064e-04L, +pS9 = -2.10558957916600254061591040482706179e-07L, +qS1 = -4.84690167848739751544716485245697428e+00L, +qS2 = 9.96619113536172610135016921140206980e+00L, +qS3 = -1.13177895428973036660836798461641458e+01L, +qS4 = 7.74004374389488266169304117714658761e+00L, +qS5 = -3.25871986053534084709023539900339905e+00L, +qS6 = 8.27830318881232209752469022352928864e-01L, +qS7 = -1.18768052702942805423330715206348004e-01L, +qS8 = 8.32600764660522313269101537926539470e-03L, +qS9 = -1.99407384882605586705979504567947007e-04L; + +const long double pio2_hi = 1.57079632679489661923132169163975140L; +const long double pio2_lo = 4.33590506506189051239852201302167613e-35L; + +long double __invtrigl_R(long double z) +{ + long double p, q; + p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * (pS5 + z * (pS6 + z * (pS7 + z * (pS8 + z * pS9))))))))); + q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * (qS4 + z * (qS5 + z * (qS6 + z * (qS7 + z * (qS8 + z * qS9)))))))); + return p / q; +} +#endif diff --git a/src/math/__invtrigl.h b/src/math/__invtrigl.h new file mode 100644 index 0000000..734679b --- /dev/null +++ b/src/math/__invtrigl.h @@ -0,0 +1,8 @@ +#include + +/* shared by acosl, asinl and atan2l */ +#define pio2_hi __pio2_hi +#define pio2_lo __pio2_lo +_hidden extern const long double pio2_hi, pio2_lo; + +_hidden long double __invtrigl_R(long double z); diff --git a/src/math/__math_invalidl.c b/src/math/__math_invalidl.c new file mode 100644 index 0000000..d25933f --- /dev/null +++ b/src/math/__math_invalidl.c @@ -0,0 +1,9 @@ +#include +#include "libm.h" + +#if LDBL_MANT_DIG != DBL_MANT_DIG +long double __math_invalidl(long double x) +{ + return (x - x) / (x - x); +} +#endif diff --git a/src/math/__polevll.c b/src/math/__polevll.c new file mode 100644 index 0000000..faf49fc --- /dev/null +++ b/src/math/__polevll.c @@ -0,0 +1,81 @@ +/* + * Evaluate polynomial + * + * + * SYNOPSIS: + * + * int N; + * long double x, y, coef[N+1], polevl[]; + * + * y = polevll( x, coef, N ); + * + * + * DESCRIPTION: + * + * Evaluates polynomial of degree N: + * + * 2 N + * y = C + C x + C x +...+ C x + * 0 1 2 N + * + * Coefficients are stored in reverse order: + * + * coef[0] = C , ..., coef[N] = C . + * N 0 + * + * The function p1evll() assumes that coef[N] = 1.0 and is + * omitted from the array. Its calling arguments are + * otherwise the same as polevll(). + * + * + * SPEED: + * + * In the interest of speed, there are no checks for out + * of bounds arithmetic. This routine is used by most of + * the functions in the library. Depending on available + * equipment features, the user may wish to rewrite the + * program in microcode or assembly language. + * + */ + +#include "libm.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +#else +/* + * Polynomial evaluator: + * P[0] x^n + P[1] x^(n-1) + ... + P[n] + */ +long double __polevll(long double x, const long double *P, int n) +{ + long double y; + + y = *P++; + do + { + y = y * x + *P++; + } + while (--n); + + return y; +} + +/* + * Polynomial evaluator: + * x^n + P[0] x^(n-1) + P[1] x^(n-2) + ... + P[n] + */ +long double __p1evll(long double x, const long double *P, int n) +{ + long double y; + + n -= 1; + y = x + *P++; + do + { + y = y * x + *P++; + } + while (--n); + + return y; +} +#endif diff --git a/src/math/__signbit.c b/src/math/__signbit.c new file mode 100644 index 0000000..e700b6b --- /dev/null +++ b/src/math/__signbit.c @@ -0,0 +1,13 @@ +#include "libm.h" + +// FIXME: macro in math.h +int __signbit(double x) +{ + union { + double d; + uint64_t i; + } y = { x }; + return y.i>>63; +} + + diff --git a/src/math/__signbitf.c b/src/math/__signbitf.c new file mode 100644 index 0000000..40ad3cf --- /dev/null +++ b/src/math/__signbitf.c @@ -0,0 +1,11 @@ +#include "libm.h" + +// FIXME: macro in math.h +int __signbitf(float x) +{ + union { + float f; + uint32_t i; + } y = { x }; + return y.i>>31; +} diff --git a/src/math/__signbitl.c b/src/math/__signbitl.c new file mode 100644 index 0000000..63b3dc5 --- /dev/null +++ b/src/math/__signbitl.c @@ -0,0 +1,14 @@ +#include "libm.h" + +#if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 +int __signbitl(long double x) +{ + union ldshape u = {x}; + return u.i.se >> 15; +} +#elif LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +int __signbitl(long double x) +{ + return __signbit(x); +} +#endif diff --git a/src/math/arm/fabs.c b/src/math/arm/fabs.c new file mode 100644 index 0000000..6e1d367 --- /dev/null +++ b/src/math/arm/fabs.c @@ -0,0 +1,15 @@ +#include + +#if __ARM_PCS_VFP && __ARM_FP&8 + +double fabs(double x) +{ + __asm__ ("vabs.f64 %P0, %P1" : "=w"(x) : "w"(x)); + return x; +} + +#else + +#include "../fabs.c" + +#endif diff --git a/src/math/arm/fabsf.c b/src/math/arm/fabsf.c new file mode 100644 index 0000000..4a217c9 --- /dev/null +++ b/src/math/arm/fabsf.c @@ -0,0 +1,15 @@ +#include + +#if __ARM_PCS_VFP && !BROKEN_VFP_ASM + +float fabsf(float x) +{ + __asm__ ("vabs.f32 %0, %1" : "=t"(x) : "t"(x)); + return x; +} + +#else + +#include "../fabsf.c" + +#endif diff --git a/src/math/arm/fma.c b/src/math/arm/fma.c new file mode 100644 index 0000000..2a9b8ef --- /dev/null +++ b/src/math/arm/fma.c @@ -0,0 +1,15 @@ +#include + +#if __ARM_FEATURE_FMA && __ARM_FP&8 && !__SOFTFP__ + +double fma(double x, double y, double z) +{ + __asm__ ("vfma.f64 %P0, %P1, %P2" : "+w"(z) : "w"(x), "w"(y)); + return z; +} + +#else + +#include "../fma.c" + +#endif diff --git a/src/math/arm/fmaf.c b/src/math/arm/fmaf.c new file mode 100644 index 0000000..a1793d2 --- /dev/null +++ b/src/math/arm/fmaf.c @@ -0,0 +1,15 @@ +#include + +#if __ARM_FEATURE_FMA && __ARM_FP&4 && !__SOFTFP__ && !BROKEN_VFP_ASM + +float fmaf(float x, float y, float z) +{ + __asm__ ("vfma.f32 %0, %1, %2" : "+t"(z) : "t"(x), "t"(y)); + return z; +} + +#else + +#include "../fmaf.c" + +#endif diff --git a/src/math/arm/sqrt.c b/src/math/arm/sqrt.c new file mode 100644 index 0000000..567e2e9 --- /dev/null +++ b/src/math/arm/sqrt.c @@ -0,0 +1,15 @@ +#include + +#if (__ARM_PCS_VFP || (__VFP_FP__ && !__SOFTFP__)) && (__ARM_FP&8) + +double sqrt(double x) +{ + __asm__ ("vsqrt.f64 %P0, %P1" : "=w"(x) : "w"(x)); + return x; +} + +#else + +#include "../sqrt.c" + +#endif diff --git a/src/math/arm/sqrtf.c b/src/math/arm/sqrtf.c new file mode 100644 index 0000000..3269329 --- /dev/null +++ b/src/math/arm/sqrtf.c @@ -0,0 +1,15 @@ +#include + +#if (__ARM_PCS_VFP || (__VFP_FP__ && !__SOFTFP__)) && !BROKEN_VFP_ASM + +float sqrtf(float x) +{ + __asm__ ("vsqrt.f32 %0, %1" : "=t"(x) : "t"(x)); + return x; +} + +#else + +#include "../sqrtf.c" + +#endif diff --git a/src/math/arm64/ceil.c b/src/math/arm64/ceil.c new file mode 100644 index 0000000..ac80c1d --- /dev/null +++ b/src/math/arm64/ceil.c @@ -0,0 +1,7 @@ +#include + +double ceil(double x) +{ + __asm__ ("frintp %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/arm64/ceilf.c b/src/math/arm64/ceilf.c new file mode 100644 index 0000000..1ef1e9c --- /dev/null +++ b/src/math/arm64/ceilf.c @@ -0,0 +1,7 @@ +#include + +float ceilf(float x) +{ + __asm__ ("frintp %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/arm64/fabs.c b/src/math/arm64/fabs.c new file mode 100644 index 0000000..5c3ecaf --- /dev/null +++ b/src/math/arm64/fabs.c @@ -0,0 +1,7 @@ +#include + +double fabs(double x) +{ + __asm__ ("fabs %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/arm64/fabsf.c b/src/math/arm64/fabsf.c new file mode 100644 index 0000000..7fde981 --- /dev/null +++ b/src/math/arm64/fabsf.c @@ -0,0 +1,7 @@ +#include + +float fabsf(float x) +{ + __asm__ ("fabs %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/arm64/floor.c b/src/math/arm64/floor.c new file mode 100644 index 0000000..50ffdb2 --- /dev/null +++ b/src/math/arm64/floor.c @@ -0,0 +1,7 @@ +#include + +double floor(double x) +{ + __asm__ ("frintm %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/arm64/floorf.c b/src/math/arm64/floorf.c new file mode 100644 index 0000000..8d007e9 --- /dev/null +++ b/src/math/arm64/floorf.c @@ -0,0 +1,7 @@ +#include + +float floorf(float x) +{ + __asm__ ("frintm %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/arm64/fma.c b/src/math/arm64/fma.c new file mode 100644 index 0000000..2450ea7 --- /dev/null +++ b/src/math/arm64/fma.c @@ -0,0 +1,7 @@ +#include + +double fma(double x, double y, double z) +{ + __asm__ ("fmadd %d0, %d1, %d2, %d3" : "=w"(x) : "w"(x), "w"(y), "w"(z)); + return x; +} diff --git a/src/math/arm64/fmaf.c b/src/math/arm64/fmaf.c new file mode 100644 index 0000000..9a14721 --- /dev/null +++ b/src/math/arm64/fmaf.c @@ -0,0 +1,7 @@ +#include + +float fmaf(float x, float y, float z) +{ + __asm__ ("fmadd %s0, %s1, %s2, %s3" : "=w"(x) : "w"(x), "w"(y), "w"(z)); + return x; +} diff --git a/src/math/arm64/fmax.c b/src/math/arm64/fmax.c new file mode 100644 index 0000000..86dcb3b --- /dev/null +++ b/src/math/arm64/fmax.c @@ -0,0 +1,7 @@ +#include + +double fmax(double x, double y) +{ + __asm__ ("fmaxnm %d0, %d1, %d2" : "=w"(x) : "w"(x), "w"(y)); + return x; +} diff --git a/src/math/arm64/fmaxf.c b/src/math/arm64/fmaxf.c new file mode 100644 index 0000000..ee5eac2 --- /dev/null +++ b/src/math/arm64/fmaxf.c @@ -0,0 +1,7 @@ +#include + +float fmaxf(float x, float y) +{ + __asm__ ("fmaxnm %s0, %s1, %s2" : "=w"(x) : "w"(x), "w"(y)); + return x; +} diff --git a/src/math/arm64/fmin.c b/src/math/arm64/fmin.c new file mode 100644 index 0000000..f1e9980 --- /dev/null +++ b/src/math/arm64/fmin.c @@ -0,0 +1,7 @@ +#include + +double fmin(double x, double y) +{ + __asm__ ("fminnm %d0, %d1, %d2" : "=w"(x) : "w"(x), "w"(y)); + return x; +} diff --git a/src/math/arm64/fminf.c b/src/math/arm64/fminf.c new file mode 100644 index 0000000..80468f6 --- /dev/null +++ b/src/math/arm64/fminf.c @@ -0,0 +1,7 @@ +#include + +float fminf(float x, float y) +{ + __asm__ ("fminnm %s0, %s1, %s2" : "=w"(x) : "w"(x), "w"(y)); + return x; +} diff --git a/src/math/arm64/llrint.c b/src/math/arm64/llrint.c new file mode 100644 index 0000000..a9e07a9 --- /dev/null +++ b/src/math/arm64/llrint.c @@ -0,0 +1,10 @@ +#include + +long long llrint(double x) +{ + long long n; + __asm__ ( + "frintx %d1, %d1\n" + "fcvtzs %x0, %d1\n" : "=r"(n), "+w"(x)); + return n; +} diff --git a/src/math/arm64/llrintf.c b/src/math/arm64/llrintf.c new file mode 100644 index 0000000..12b6804 --- /dev/null +++ b/src/math/arm64/llrintf.c @@ -0,0 +1,10 @@ +#include + +long long llrintf(float x) +{ + long long n; + __asm__ ( + "frintx %s1, %s1\n" + "fcvtzs %x0, %s1\n" : "=r"(n), "+w"(x)); + return n; +} diff --git a/src/math/arm64/llround.c b/src/math/arm64/llround.c new file mode 100644 index 0000000..e09ddd4 --- /dev/null +++ b/src/math/arm64/llround.c @@ -0,0 +1,8 @@ +#include + +long long llround(double x) +{ + long long n; + __asm__ ("fcvtas %x0, %d1" : "=r"(n) : "w"(x)); + return n; +} diff --git a/src/math/arm64/llroundf.c b/src/math/arm64/llroundf.c new file mode 100644 index 0000000..1669959 --- /dev/null +++ b/src/math/arm64/llroundf.c @@ -0,0 +1,8 @@ +#include + +long long llroundf(float x) +{ + long long n; + __asm__ ("fcvtas %x0, %s1" : "=r"(n) : "w"(x)); + return n; +} diff --git a/src/math/arm64/lrint.c b/src/math/arm64/lrint.c new file mode 100644 index 0000000..cb7785a --- /dev/null +++ b/src/math/arm64/lrint.c @@ -0,0 +1,10 @@ +#include + +long lrint(double x) +{ + long n; + __asm__ ( + "frintx %d1, %d1\n" + "fcvtzs %x0, %d1\n" : "=r"(n), "+w"(x)); + return n; +} diff --git a/src/math/arm64/lrintf.c b/src/math/arm64/lrintf.c new file mode 100644 index 0000000..4d750d6 --- /dev/null +++ b/src/math/arm64/lrintf.c @@ -0,0 +1,10 @@ +#include + +long lrintf(float x) +{ + long n; + __asm__ ( + "frintx %s1, %s1\n" + "fcvtzs %x0, %s1\n" : "=r"(n), "+w"(x)); + return n; +} diff --git a/src/math/arm64/lround.c b/src/math/arm64/lround.c new file mode 100644 index 0000000..85656c7 --- /dev/null +++ b/src/math/arm64/lround.c @@ -0,0 +1,8 @@ +#include + +long lround(double x) +{ + long n; + __asm__ ("fcvtas %x0, %d1" : "=r"(n) : "w"(x)); + return n; +} diff --git a/src/math/arm64/lroundf.c b/src/math/arm64/lroundf.c new file mode 100644 index 0000000..32e51f3 --- /dev/null +++ b/src/math/arm64/lroundf.c @@ -0,0 +1,8 @@ +#include + +long lroundf(float x) +{ + long n; + __asm__ ("fcvtas %x0, %s1" : "=r"(n) : "w"(x)); + return n; +} diff --git a/src/math/arm64/nearbyint.c b/src/math/arm64/nearbyint.c new file mode 100644 index 0000000..9c3fdb4 --- /dev/null +++ b/src/math/arm64/nearbyint.c @@ -0,0 +1,7 @@ +#include + +double nearbyint(double x) +{ + __asm__ ("frinti %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/arm64/nearbyintf.c b/src/math/arm64/nearbyintf.c new file mode 100644 index 0000000..8e7f61d --- /dev/null +++ b/src/math/arm64/nearbyintf.c @@ -0,0 +1,7 @@ +#include + +float nearbyintf(float x) +{ + __asm__ ("frinti %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/arm64/rint.c b/src/math/arm64/rint.c new file mode 100644 index 0000000..45b194b --- /dev/null +++ b/src/math/arm64/rint.c @@ -0,0 +1,7 @@ +#include + +double rint(double x) +{ + __asm__ ("frintx %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/arm64/rintf.c b/src/math/arm64/rintf.c new file mode 100644 index 0000000..1ae7dd2 --- /dev/null +++ b/src/math/arm64/rintf.c @@ -0,0 +1,7 @@ +#include + +float rintf(float x) +{ + __asm__ ("frintx %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/arm64/round.c b/src/math/arm64/round.c new file mode 100644 index 0000000..897a84c --- /dev/null +++ b/src/math/arm64/round.c @@ -0,0 +1,7 @@ +#include + +double round(double x) +{ + __asm__ ("frinta %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/arm64/roundf.c b/src/math/arm64/roundf.c new file mode 100644 index 0000000..91637ea --- /dev/null +++ b/src/math/arm64/roundf.c @@ -0,0 +1,7 @@ +#include + +float roundf(float x) +{ + __asm__ ("frinta %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/arm64/sqrt.c b/src/math/arm64/sqrt.c new file mode 100644 index 0000000..fe93c3e --- /dev/null +++ b/src/math/arm64/sqrt.c @@ -0,0 +1,7 @@ +#include + +double sqrt(double x) +{ + __asm__ ("fsqrt %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/arm64/sqrtf.c b/src/math/arm64/sqrtf.c new file mode 100644 index 0000000..275c7f3 --- /dev/null +++ b/src/math/arm64/sqrtf.c @@ -0,0 +1,7 @@ +#include + +float sqrtf(float x) +{ + __asm__ ("fsqrt %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/arm64/trunc.c b/src/math/arm64/trunc.c new file mode 100644 index 0000000..e592147 --- /dev/null +++ b/src/math/arm64/trunc.c @@ -0,0 +1,7 @@ +#include + +double trunc(double x) +{ + __asm__ ("frintz %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/arm64/truncf.c b/src/math/arm64/truncf.c new file mode 100644 index 0000000..20ef30f --- /dev/null +++ b/src/math/arm64/truncf.c @@ -0,0 +1,7 @@ +#include + +float truncf(float x) +{ + __asm__ ("frintz %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/atan2l.c b/src/math/atan2l.c new file mode 100644 index 0000000..72460a4 --- /dev/null +++ b/src/math/atan2l.c @@ -0,0 +1,91 @@ +#include "libm.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double atan2l(long double y, long double x) +{ + return atan2(y, x); +} +#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 +#include "__invtrigl.h" + +long double atan2l(long double y, long double x) +{ + union ldshape ux, uy; + long double z; + int m, ex, ey; + + if (isnan(x) || isnan(y)) + return x + y; + if (x == 1) + return atanl(y); + ux.f = x; + uy.f = y; + ex = ux.i.se & 0x7fff; + ey = uy.i.se & 0x7fff; + m = 2 * (ux.i.se >> 15) | uy.i.se >> 15; + if (y == 0) + { + switch (m) + { + case 0: + case 1: + return y; /* atan(+-0,+anything)=+-0 */ + case 2: + return 2 * pio2_hi; /* atan(+0,-anything) = pi */ + case 3: + return -2 * pio2_hi; /* atan(-0,-anything) =-pi */ + } + } + if (x == 0) + return m & 1 ? -pio2_hi : pio2_hi; + if (ex == 0x7fff) + { + if (ey == 0x7fff) + { + switch (m) + { + case 0: + return pio2_hi / 2; /* atan(+INF,+INF) */ + case 1: + return -pio2_hi / 2; /* atan(-INF,+INF) */ + case 2: + return 1.5 * pio2_hi; /* atan(+INF,-INF) */ + case 3: + return -1.5 * pio2_hi; /* atan(-INF,-INF) */ + } + } + else + { + switch (m) + { + case 0: + return 0.0; /* atan(+...,+INF) */ + case 1: + return -0.0; /* atan(-...,+INF) */ + case 2: + return 2 * pio2_hi; /* atan(+...,-INF) */ + case 3: + return -2 * pio2_hi; /* atan(-...,-INF) */ + } + } + } + if (ex + 120 < ey || ey == 0x7fff) + return m & 1 ? -pio2_hi : pio2_hi; + /* z = atan(|y/x|) without spurious underflow */ + if ((m & 2) && ey + 120 < ex) /* |y/x| < 0x1p-120, x<0 */ + z = 0.0; + else + z = atanl(fabsl(y / x)); + switch (m) + { + case 0: + return z; /* atan(+,+) */ + case 1: + return -z; /* atan(-,+) */ + case 2: + return 2 * pio2_hi - (z - 2 * pio2_lo); /* atan(+,-) */ + default: /* case 3 */ + return (z - 2 * pio2_lo) - 2 * pio2_hi; /* atan(-,-) */ + } +} +#endif diff --git a/src/math/atanl.c b/src/math/atanl.c new file mode 100644 index 0000000..3712511 --- /dev/null +++ b/src/math/atanl.c @@ -0,0 +1,188 @@ +#include "libm.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double atanl(long double x) +{ + return atan(x); +} +#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 + +#if LDBL_MANT_DIG == 64 +#define EXPMAN(u) ((u.i.se & 0x7fff)<<8 | (u.i.m>>55 & 0xff)) + +static const long double atanhi[] = +{ + 4.63647609000806116202e-01L, + 7.85398163397448309628e-01L, + 9.82793723247329067960e-01L, + 1.57079632679489661926e+00L, +}; + +static const long double atanlo[] = +{ + 1.18469937025062860669e-20L, + -1.25413940316708300586e-20L, + 2.55232234165405176172e-20L, + -2.50827880633416601173e-20L, +}; + +static const long double aT[] = +{ + 3.33333333333333333017e-01L, + -1.99999999999999632011e-01L, + 1.42857142857046531280e-01L, + -1.11111111100562372733e-01L, + 9.09090902935647302252e-02L, + -7.69230552476207730353e-02L, + 6.66661718042406260546e-02L, + -5.88158892835030888692e-02L, + 5.25499891539726639379e-02L, + -4.70119845393155721494e-02L, + 4.03539201366454414072e-02L, + -2.91303858419364158725e-02L, + 1.24822046299269234080e-02L, +}; + +static long double T_even(long double x) +{ + return aT[0] + x * (aT[2] + x * (aT[4] + x * (aT[6] + + x * (aT[8] + x * (aT[10] + x * aT[12]))))); +} + +static long double T_odd(long double x) +{ + return aT[1] + x * (aT[3] + x * (aT[5] + x * (aT[7] + + x * (aT[9] + x * aT[11])))); +} +#elif LDBL_MANT_DIG == 113 +#define EXPMAN(u) ((u.i.se & 0x7fff)<<8 | u.i.top>>8) + +static const long double atanhi[] = +{ + 4.63647609000806116214256231461214397e-01L, + 7.85398163397448309615660845819875699e-01L, + 9.82793723247329067985710611014666038e-01L, + 1.57079632679489661923132169163975140e+00L, +}; + +static const long double atanlo[] = +{ + 4.89509642257333492668618435220297706e-36L, + 2.16795253253094525619926100651083806e-35L, + -2.31288434538183565909319952098066272e-35L, + 4.33590506506189051239852201302167613e-35L, +}; + +static const long double aT[] = +{ + 3.33333333333333333333333333333333125e-01L, + -1.99999999999999999999999999999180430e-01L, + 1.42857142857142857142857142125269827e-01L, + -1.11111111111111111111110834490810169e-01L, + 9.09090909090909090908522355708623681e-02L, + -7.69230769230769230696553844935357021e-02L, + 6.66666666666666660390096773046256096e-02L, + -5.88235294117646671706582985209643694e-02L, + 5.26315789473666478515847092020327506e-02L, + -4.76190476189855517021024424991436144e-02L, + 4.34782608678695085948531993458097026e-02L, + -3.99999999632663469330634215991142368e-02L, + 3.70370363987423702891250829918659723e-02L, + -3.44827496515048090726669907612335954e-02L, + 3.22579620681420149871973710852268528e-02L, + -3.03020767654269261041647570626778067e-02L, + 2.85641979882534783223403715930946138e-02L, + -2.69824879726738568189929461383741323e-02L, + 2.54194698498808542954187110873675769e-02L, + -2.35083879708189059926183138130183215e-02L, + 2.04832358998165364349957325067131428e-02L, + -1.54489555488544397858507248612362957e-02L, + 8.64492360989278761493037861575248038e-03L, + -2.58521121597609872727919154569765469e-03L, +}; + +static long double T_even(long double x) +{ + return (aT[0] + x * (aT[2] + x * (aT[4] + x * (aT[6] + x * (aT[8] + + x * (aT[10] + x * (aT[12] + x * (aT[14] + x * (aT[16] + + x * (aT[18] + x * (aT[20] + x * aT[22]))))))))))); +} + +static long double T_odd(long double x) +{ + return (aT[1] + x * (aT[3] + x * (aT[5] + x * (aT[7] + x * (aT[9] + + x * (aT[11] + x * (aT[13] + x * (aT[15] + x * (aT[17] + + x * (aT[19] + x * (aT[21] + x * aT[23]))))))))))); +} +#endif + +long double atanl(long double x) +{ + union ldshape u = {x}; + long double w, s1, s2, z; + int id; + unsigned e = u.i.se & 0x7fff; + unsigned sign = u.i.se >> 15; + unsigned expman; + + if (e >= 0x3fff + LDBL_MANT_DIG + 1) /* if |x| is large, atan(x)~=pi/2 */ + { + if (isnan(x)) + return x; + return sign ? -atanhi[3] : atanhi[3]; + } + /* Extract the exponent and the first few bits of the mantissa. */ + expman = EXPMAN(u); + if (expman < ((0x3fff - 2) << 8) + 0xc0) /* |x| < 0.4375 */ + { + if (e < 0x3fff - (LDBL_MANT_DIG + 1) / 2) /* if |x| is small, atanl(x)~=x */ + { + /* raise underflow if subnormal */ + if (e == 0) + FORCE_EVAL((float)x); + return x; + } + id = -1; + } + else + { + x = fabsl(x); + if (expman < (0x3fff << 8) + 0x30) /* |x| < 1.1875 */ + { + if (expman < ((0x3fff - 1) << 8) + 0x60) /* 7/16 <= |x| < 11/16 */ + { + id = 0; + x = (2.0 * x - 1.0) / (2.0 + x); + } + else /* 11/16 <= |x| < 19/16 */ + { + id = 1; + x = (x - 1.0) / (x + 1.0); + } + } + else + { + if (expman < ((0x3fff + 1) << 8) + 0x38) /* |x| < 2.4375 */ + { + id = 2; + x = (x - 1.5) / (1.0 + 1.5 * x); + } + else /* 2.4375 <= |x| */ + { + id = 3; + x = -1.0 / x; + } + } + } + /* end of argument reduction */ + z = x * x; + w = z * z; + /* break sum aT[i]z**(i+1) into odd and even poly */ + s1 = z * T_even(w); + s2 = w * T_odd(w); + if (id < 0) + return x - x * (s1 + s2); + z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x); + return sign ? -z : z; +} +#endif diff --git a/src/math/copysignl.c b/src/math/copysignl.c new file mode 100644 index 0000000..9dd933c --- /dev/null +++ b/src/math/copysignl.c @@ -0,0 +1,16 @@ +#include "libm.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double copysignl(long double x, long double y) +{ + return copysign(x, y); +} +#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 +long double copysignl(long double x, long double y) +{ + union ldshape ux = {x}, uy = {y}; + ux.i.se &= 0x7fff; + ux.i.se |= uy.i.se & 0x8000; + return ux.f; +} +#endif diff --git a/src/math/fabsl.c b/src/math/fabsl.c new file mode 100644 index 0000000..c4f36ec --- /dev/null +++ b/src/math/fabsl.c @@ -0,0 +1,15 @@ +#include "libm.h" +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double fabsl(long double x) +{ + return fabs(x); +} +#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 +long double fabsl(long double x) +{ + union ldshape u = {x}; + + u.i.se &= 0x7fff; + return u.f; +} +#endif diff --git a/src/math/frexpl.c b/src/math/frexpl.c new file mode 100644 index 0000000..504931a --- /dev/null +++ b/src/math/frexpl.c @@ -0,0 +1,34 @@ +#include "libm.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double frexpl(long double x, int *e) +{ + return frexp(x, e); +} +#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 +long double frexpl(long double x, int *e) +{ + union ldshape u = {x}; + int ee = u.i.se & 0x7fff; + + if (!ee) + { + if (x) + { + x = frexpl(x * 0x1p120, e); + *e -= 120; + } + else *e = 0; + return x; + } + else if (ee == 0x7fff) + { + return x; + } + + *e = ee - 0x3ffe; + u.i.se &= 0x8000; + u.i.se |= 0x3ffe; + return u.f; +} +#endif diff --git a/src/math/hypotl.c b/src/math/hypotl.c new file mode 100644 index 0000000..479aa92 --- /dev/null +++ b/src/math/hypotl.c @@ -0,0 +1,66 @@ +#include "libm.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double hypotl(long double x, long double y) +{ + return hypot(x, y); +} +#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 +#if LDBL_MANT_DIG == 64 +#define SPLIT (0x1p32L+1) +#elif LDBL_MANT_DIG == 113 +#define SPLIT (0x1p57L+1) +#endif + +static void sq(long double *hi, long double *lo, long double x) +{ + long double xh, xl, xc; + xc = x*SPLIT; + xh = x - xc + xc; + xl = x - xh; + *hi = x*x; + *lo = xh*xh - *hi + 2*xh*xl + xl*xl; +} + +long double hypotl(long double x, long double y) +{ + union ldshape ux = {x}, uy = {y}; + int ex, ey; + long double hx, lx, hy, ly, z; + + ux.i.se &= 0x7fff; + uy.i.se &= 0x7fff; + if (ux.i.se < uy.i.se) { + ex = uy.i.se; + ey = ux.i.se; + x = uy.f; + y = ux.f; + } else { + ex = ux.i.se; + ey = uy.i.se; + x = ux.f; + y = uy.f; + } + + if (ex == 0x7fff && isinf(y)) + return y; + if (ex == 0x7fff || y == 0) + return x; + if (ex - ey > LDBL_MANT_DIG) + return x + y; + + z = 1; + if (ex > 0x3fff+8000) { + z = 0x1p10000L; + x *= 0x1p-10000L; + y *= 0x1p-10000L; + } else if (ey < 0x3fff-8000) { + z = 0x1p-10000L; + x *= 0x1p10000L; + y *= 0x1p10000L; + } + sq(&hx, &lx, x); + sq(&hy, &ly, y); + return z*sqrtl(ly+lx+hy+hx); +} +#endif diff --git a/src/math/logl.c b/src/math/logl.c new file mode 100644 index 0000000..824be45 --- /dev/null +++ b/src/math/logl.c @@ -0,0 +1,171 @@ +/* + * Natural logarithm, long double precision + * + * + * SYNOPSIS: + * + * long double x, y, logl(); + * + * y = logl( x ); + * + * + * DESCRIPTION: + * + * Returns the base e (2.718...) logarithm of x. + * + * The argument is separated into its exponent and fractional + * parts. If the exponent is between -1 and +1, the logarithm + * of the fraction is approximated by + * + * log(1+x) = x - 0.5 x**2 + x**3 P(x)/Q(x). + * + * Otherwise, setting z = 2(x-1)/(x+1), + * + * log(x) = log(1+z/2) - log(1-z/2) = z + z**3 P(z)/Q(z). + * + * + * ACCURACY: + * + * Relative error: + * arithmetic domain # trials peak rms + * IEEE 0.5, 2.0 150000 8.71e-20 2.75e-20 + * IEEE exp(+-10000) 100000 5.39e-20 2.34e-20 + * + * In the tests over the interval exp(+-10000), the logarithms + * of the random arguments were uniformly distributed over + * [-10000, +10000]. + */ + +#include "libm.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double logl(long double x) +{ + return log(x); +} +#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 +/* Coefficients for log(1+x) = x - x**2/2 + x**3 P(x)/Q(x) + * 1/sqrt(2) <= x < sqrt(2) + * Theoretical peak relative error = 2.32e-20 + */ +static const long double P[] = +{ + 4.5270000862445199635215E-5L, + 4.9854102823193375972212E-1L, + 6.5787325942061044846969E0L, + 2.9911919328553073277375E1L, + 6.0949667980987787057556E1L, + 5.7112963590585538103336E1L, + 2.0039553499201281259648E1L, +}; +static const long double Q[] = +{ + /* 1.0000000000000000000000E0,*/ + 1.5062909083469192043167E1L, + 8.3047565967967209469434E1L, + 2.2176239823732856465394E2L, + 3.0909872225312059774938E2L, + 2.1642788614495947685003E2L, + 6.0118660497603843919306E1L, +}; + +/* Coefficients for log(x) = z + z^3 P(z^2)/Q(z^2), + * where z = 2(x-1)/(x+1) + * 1/sqrt(2) <= x < sqrt(2) + * Theoretical peak relative error = 6.16e-22 + */ +static const long double R[4] = +{ + 1.9757429581415468984296E-3L, + -7.1990767473014147232598E-1L, + 1.0777257190312272158094E1L, + -3.5717684488096787370998E1L, +}; +static const long double S[4] = +{ + /* 1.00000000000000000000E0L,*/ + -2.6201045551331104417768E1L, + 1.9361891836232102174846E2L, + -4.2861221385716144629696E2L, + }; +static const long double C1 = 6.9314575195312500000000E-1L; +static const long double C2 = 1.4286068203094172321215E-6L; + +#define SQRTH 0.70710678118654752440L + +long double logl(long double x) +{ + long double y, z; + int e; + + if (isnan(x)) + return x; + if (x == INFINITY) + return x; + if (x <= 0.0) + { + if (x == 0.0) + return -1 / (x * x); /* -inf with divbyzero */ + return 0 / 0.0f; /* nan with invalid */ + } + + /* separate mantissa from exponent */ + /* Note, frexp is used so that denormal numbers + * will be handled properly. + */ + x = frexpl(x, &e); + + /* logarithm using log(x) = z + z**3 P(z)/Q(z), + * where z = 2(x-1)/(x+1) + */ + if (e > 2 || e < -2) + { + if (x < SQRTH) /* 2(2x-1)/(2x+1) */ + { + e -= 1; + z = x - 0.5; + y = 0.5 * z + 0.5; + } + else /* 2 (x-1)/(x+1) */ + { + z = x - 0.5; + z -= 0.5; + y = 0.5 * x + 0.5; + } + x = z / y; + z = x * x; + z = x * (z * __polevll(z, R, 3) / __p1evll(z, S, 3)); + z = z + e * C2; + z = z + x; + z = z + e * C1; + return z; + } + + /* logarithm using log(1+x) = x - .5x**2 + x**3 P(x)/Q(x) */ + if (x < SQRTH) + { + e -= 1; + x = 2.0 * x - 1.0; + } + else + { + x = x - 1.0; + } + z = x * x; + y = x * (z * __polevll(x, P, 6) / __p1evll(x, Q, 6)); + y = y + e * C2; + z = y - 0.5 * z; + /* Note, the sum of above terms does not exceed x/4, + * so it contributes at most about 1/4 lsb to the error. + */ + z = z + x; + z = z + e * C1; /* This sum has an error of 1/2 lsb. */ + return z; +} +#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 +// TODO: broken implementation to make things compile +long double logl(long double x) +{ + return log(x); +} +#endif diff --git a/src/math/riscv64/copysign.c b/src/math/riscv64/copysign.c new file mode 100644 index 0000000..c785417 --- /dev/null +++ b/src/math/riscv64/copysign.c @@ -0,0 +1,15 @@ +#include + +#if __riscv_flen >= 64 + +double copysign(double x, double y) +{ + __asm__ ("fsgnj.d %0, %1, %2" : "=f"(x) : "f"(x), "f"(y)); + return x; +} + +#else + +#include "../copysign.c" + +#endif diff --git a/src/math/riscv64/copysignf.c b/src/math/riscv64/copysignf.c new file mode 100644 index 0000000..a125611 --- /dev/null +++ b/src/math/riscv64/copysignf.c @@ -0,0 +1,15 @@ +#include + +#if __riscv_flen >= 32 + +float copysignf(float x, float y) +{ + __asm__ ("fsgnj.s %0, %1, %2" : "=f"(x) : "f"(x), "f"(y)); + return x; +} + +#else + +#include "../copysignf.c" + +#endif diff --git a/src/math/riscv64/fabs.c b/src/math/riscv64/fabs.c new file mode 100644 index 0000000..5290b6f --- /dev/null +++ b/src/math/riscv64/fabs.c @@ -0,0 +1,15 @@ +#include + +#if __riscv_flen >= 64 + +double fabs(double x) +{ + __asm__ ("fabs.d %0, %1" : "=f"(x) : "f"(x)); + return x; +} + +#else + +#include "../fabs.c" + +#endif diff --git a/src/math/riscv64/fabsf.c b/src/math/riscv64/fabsf.c new file mode 100644 index 0000000..f5032e3 --- /dev/null +++ b/src/math/riscv64/fabsf.c @@ -0,0 +1,15 @@ +#include + +#if __riscv_flen >= 32 + +float fabsf(float x) +{ + __asm__ ("fabs.s %0, %1" : "=f"(x) : "f"(x)); + return x; +} + +#else + +#include "../fabsf.c" + +#endif diff --git a/src/math/riscv64/fma.c b/src/math/riscv64/fma.c new file mode 100644 index 0000000..99b0571 --- /dev/null +++ b/src/math/riscv64/fma.c @@ -0,0 +1,15 @@ +#include + +#if __riscv_flen >= 64 + +double fma(double x, double y, double z) +{ + __asm__ ("fmadd.d %0, %1, %2, %3" : "=f"(x) : "f"(x), "f"(y), "f"(z)); + return x; +} + +#else + +#include "../fma.c" + +#endif diff --git a/src/math/riscv64/fmaf.c b/src/math/riscv64/fmaf.c new file mode 100644 index 0000000..f9dc47e --- /dev/null +++ b/src/math/riscv64/fmaf.c @@ -0,0 +1,15 @@ +#include + +#if __riscv_flen >= 32 + +float fmaf(float x, float y, float z) +{ + __asm__ ("fmadd.s %0, %1, %2, %3" : "=f"(x) : "f"(x), "f"(y), "f"(z)); + return x; +} + +#else + +#include "../fmaf.c" + +#endif diff --git a/src/math/riscv64/fmax.c b/src/math/riscv64/fmax.c new file mode 100644 index 0000000..023709c --- /dev/null +++ b/src/math/riscv64/fmax.c @@ -0,0 +1,15 @@ +#include + +#if __riscv_flen >= 64 + +double fmax(double x, double y) +{ + __asm__ ("fmax.d %0, %1, %2" : "=f"(x) : "f"(x), "f"(y)); + return x; +} + +#else + +#include "../fmax.c" + +#endif diff --git a/src/math/riscv64/fmaxf.c b/src/math/riscv64/fmaxf.c new file mode 100644 index 0000000..863d2bd --- /dev/null +++ b/src/math/riscv64/fmaxf.c @@ -0,0 +1,15 @@ +#include + +#if __riscv_flen >= 32 + +float fmaxf(float x, float y) +{ + __asm__ ("fmax.s %0, %1, %2" : "=f"(x) : "f"(x), "f"(y)); + return x; +} + +#else + +#include "../fmaxf.c" + +#endif diff --git a/src/math/riscv64/fmin.c b/src/math/riscv64/fmin.c new file mode 100644 index 0000000..a4e3b06 --- /dev/null +++ b/src/math/riscv64/fmin.c @@ -0,0 +1,15 @@ +#include + +#if __riscv_flen >= 64 + +double fmin(double x, double y) +{ + __asm__ ("fmin.d %0, %1, %2" : "=f"(x) : "f"(x), "f"(y)); + return x; +} + +#else + +#include "../fmin.c" + +#endif diff --git a/src/math/riscv64/fminf.c b/src/math/riscv64/fminf.c new file mode 100644 index 0000000..32156e8 --- /dev/null +++ b/src/math/riscv64/fminf.c @@ -0,0 +1,15 @@ +#include + +#if __riscv_flen >= 32 + +float fminf(float x, float y) +{ + __asm__ ("fmin.s %0, %1, %2" : "=f"(x) : "f"(x), "f"(y)); + return x; +} + +#else + +#include "../fminf.c" + +#endif diff --git a/src/math/riscv64/sqrt.c b/src/math/riscv64/sqrt.c new file mode 100644 index 0000000..867a504 --- /dev/null +++ b/src/math/riscv64/sqrt.c @@ -0,0 +1,15 @@ +#include + +#if __riscv_flen >= 64 + +double sqrt(double x) +{ + __asm__ ("fsqrt.d %0, %1" : "=f"(x) : "f"(x)); + return x; +} + +#else + +#include "../sqrt.c" + +#endif diff --git a/src/math/riscv64/sqrtf.c b/src/math/riscv64/sqrtf.c new file mode 100644 index 0000000..610c2cf --- /dev/null +++ b/src/math/riscv64/sqrtf.c @@ -0,0 +1,15 @@ +#include + +#if __riscv_flen >= 32 + +float sqrtf(float x) +{ + __asm__ ("fsqrt.s %0, %1" : "=f"(x) : "f"(x)); + return x; +} + +#else + +#include "../sqrtf.c" + +#endif diff --git a/src/math/sqrt_data.h b/src/math/sqrt_data.h new file mode 100644 index 0000000..f9d4d35 --- /dev/null +++ b/src/math/sqrt_data.h @@ -0,0 +1,13 @@ +#ifndef _SQRT_DATA_H +#define _SQRT_DATA_H + +#include +#include + +/* if x in [1,2): i = (int)(64*x); + if x in [2,4): i = (int)(32*x-64); + __rsqrt_tab[i]*2^-16 is estimating 1/sqrt(x) with small relative error: + |__rsqrt_tab[i]*0x1p-16*sqrt(x) - 1| < -0x1.fdp-9 < 2^-8 */ +extern _hidden const uint16_t __rsqrt_tab[128]; + +#endif diff --git a/src/math/sqrtl.c b/src/math/sqrtl.c new file mode 100644 index 0000000..1b657a4 --- /dev/null +++ b/src/math/sqrtl.c @@ -0,0 +1,271 @@ +#include +#include +#include +#include "libm.h" + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +long double sqrtl(long double x) +{ + return sqrt(x); +} +#elif (LDBL_MANT_DIG == 113 || LDBL_MANT_DIG == 64) && LDBL_MAX_EXP == 16384 +#include "sqrt_data.h" + +#define FENV_SUPPORT 1 + +typedef struct +{ + uint64_t hi; + uint64_t lo; +} u128; + +/* top: 16 bit sign+exponent, x: significand. */ +static inline long double mkldbl(uint64_t top, u128 x) +{ + union ldshape u; +#if LDBL_MANT_DIG == 113 + u.i2.hi = x.hi; + u.i2.lo = x.lo; + u.i2.hi &= 0x0000ffffffffffff; + u.i2.hi |= top << 48; +#elif LDBL_MANT_DIG == 64 + u.i.se = top; + u.i.m = x.lo; + /* force the top bit on non-zero (and non-subnormal) results. */ + if (top & 0x7fff) + u.i.m |= 0x8000000000000000; +#endif + return u.f; +} + +/* return: top 16 bit is sign+exp and following bits are the significand. */ +static inline u128 asu128(long double x) +{ + union ldshape u = {.f = x}; + u128 r; +#if LDBL_MANT_DIG == 113 + r.hi = u.i2.hi; + r.lo = u.i2.lo; +#elif LDBL_MANT_DIG == 64 + r.lo = u.i.m << 49; + /* ignore the top bit: pseudo numbers are not handled. */ + r.hi = u.i.m >> 15; + r.hi &= 0x0000ffffffffffff; + r.hi |= (uint64_t)u.i.se << 48; +#endif + return r; +} + +/* returns a*b*2^-32 - e, with error 0 <= e < 1. */ +static inline uint32_t mul32(uint32_t a, uint32_t b) +{ + return (uint64_t)a * b >> 32; +} + +/* returns a*b*2^-64 - e, with error 0 <= e < 3. */ +static inline uint64_t mul64(uint64_t a, uint64_t b) +{ + uint64_t ahi = a >> 32; + uint64_t alo = a & 0xffffffff; + uint64_t bhi = b >> 32; + uint64_t blo = b & 0xffffffff; + return ahi * bhi + (ahi * blo >> 32) + (alo * bhi >> 32); +} + +static inline u128 add64(u128 a, uint64_t b) +{ + u128 r; + r.lo = a.lo + b; + r.hi = a.hi; + if (r.lo < a.lo) + r.hi++; + return r; +} + +static inline u128 add128(u128 a, u128 b) +{ + u128 r; + r.lo = a.lo + b.lo; + r.hi = a.hi + b.hi; + if (r.lo < a.lo) + r.hi++; + return r; +} + +static inline u128 sub64(u128 a, uint64_t b) +{ + u128 r; + r.lo = a.lo - b; + r.hi = a.hi; + if (a.lo < b) + r.hi--; + return r; +} + +static inline u128 sub128(u128 a, u128 b) +{ + u128 r; + r.lo = a.lo - b.lo; + r.hi = a.hi - b.hi; + if (a.lo < b.lo) + r.hi--; + return r; +} + +/* a<= 64) + { + a.hi = a.lo << (n - 64); + a.lo = 0; + } + else + { + a.hi = (a.hi << n) | (a.lo >> (64 - n)); + a.lo = a.lo << n; + } + return a; +} + +/* a>>n, 0 <= n <= 127 */ +static inline u128 rsh(u128 a, int n) +{ + if (n == 0) + return a; + if (n >= 64) + { + a.lo = a.hi >> (n - 64); + a.hi = 0; + } + else + { + a.lo = (a.lo >> n) | (a.hi << (64 - n)); + a.hi = a.hi >> n; + } + return a; +} + +/* returns a*b exactly. */ +static inline u128 mul64_128(uint64_t a, uint64_t b) +{ + u128 r; + uint64_t ahi = a >> 32; + uint64_t alo = a & 0xffffffff; + uint64_t bhi = b >> 32; + uint64_t blo = b & 0xffffffff; + uint64_t lo1 = ((ahi * blo) & 0xffffffff) + ((alo * bhi) & 0xffffffff) + (alo * blo >> 32); + uint64_t lo2 = (alo * blo) & 0xffffffff; + r.hi = ahi * bhi + (ahi * blo >> 32) + (alo * bhi >> 32) + (lo1 >> 32); + r.lo = (lo1 << 32) + lo2; + return r; +} + +/* returns a*b*2^-128 - e, with error 0 <= e < 7. */ +static inline u128 mul128(u128 a, u128 b) +{ + u128 hi = mul64_128(a.hi, b.hi); + uint64_t m1 = mul64(a.hi, b.lo); + uint64_t m2 = mul64(a.lo, b.hi); + return add64(add64(hi, m1), m2); +} + +/* returns a*b % 2^128. */ +static inline u128 mul128_tail(u128 a, u128 b) +{ + u128 lo = mul64_128(a.lo, b.lo); + lo.hi += a.hi * b.lo + a.lo * b.hi; + return lo; +} + + +/* see sqrt.c for detailed comments. */ + +long double sqrtl(long double x) +{ + u128 ix, ml; + uint64_t top; + + ix = asu128(x); + top = ix.hi >> 48; + if (predict_false(top - 0x0001 >= 0x7fff - 0x0001)) + { + /* x < 0x1p-16382 or inf or nan. */ + if (2 * ix.hi == 0 && ix.lo == 0) + return x; + if (ix.hi == 0x7fff000000000000 && ix.lo == 0) + return x; + if (top >= 0x7fff) + return __math_invalidl(x); + /* x is subnormal, normalize it. */ + ix = asu128(x * 0x1p112); + top = ix.hi >> 48; + top -= 112; + } + + /* x = 4^e m; with int e and m in [1, 4) */ + int even = top & 1; + ml = lsh(ix, 15); + ml.hi |= 0x8000000000000000; + if (even) ml = rsh(ml, 1); + top = (top + 0x3fff) >> 1; + + /* r ~ 1/sqrt(m) */ + static const uint64_t three = 0xc0000000; + uint64_t r, s, d, u, i; + i = (ix.hi >> 42) % 128; + r = (uint32_t)__rsqrt_tab[i] << 16; + /* |r sqrt(m) - 1| < 0x1p-8 */ + s = mul32(ml.hi >> 32, r); + d = mul32(s, r); + u = three - d; + r = mul32(u, r) << 1; + /* |r sqrt(m) - 1| < 0x1.7bp-16, switch to 64bit */ + r = r << 32; + s = mul64(ml.hi, r); + d = mul64(s, r); + u = (three << 32) - d; + r = mul64(u, r) << 1; + /* |r sqrt(m) - 1| < 0x1.a5p-31 */ + s = mul64(u, s) << 1; + d = mul64(s, r); + u = (three << 32) - d; + r = mul64(u, r) << 1; + /* |r sqrt(m) - 1| < 0x1.c001p-59, switch to 128bit */ + + static const u128 threel = {.hi = three << 32, .lo = 0}; + u128 rl, sl, dl, ul; + rl.hi = r; + rl.lo = 0; + sl = mul128(ml, rl); + dl = mul128(sl, rl); + ul = sub128(threel, dl); + sl = mul128(ul, sl); /* repr: 3.125 */ + /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */ + sl = rsh(sub64(sl, 4), 125 - (LDBL_MANT_DIG - 1)); + /* s < sqrt(m) < s + 1 ULP + tiny */ + + long double y; + u128 d2, d1, d0; + d0 = sub128(lsh(ml, 2 * (LDBL_MANT_DIG - 1) - 126), mul128_tail(sl, sl)); + d1 = sub128(sl, d0); + d2 = add128(add64(sl, 1), d1); + sl = add64(sl, d1.hi >> 63); + y = mkldbl(top, sl); + if (FENV_SUPPORT) + { + /* handle rounding modes and inexact exception. */ + top = predict_false((d2.hi | d2.lo) == 0) ? 0 : 1; + top |= ((d1.hi ^ d2.hi) & 0x8000000000000000) >> 48; + y += mkldbl(top, (u128) + { + 0 + }); + } + return y; +} +#else +#error unsupported long double format +#endif diff --git a/src/math/x86-64/__invtrigl.s b/src/math/x86-64/__invtrigl.s new file mode 100644 index 0000000..e69de29 diff --git a/src/math/x86-64/acosl.s b/src/math/x86-64/acosl.s new file mode 100644 index 0000000..88e01b4 --- /dev/null +++ b/src/math/x86-64/acosl.s @@ -0,0 +1,16 @@ +# see ../i386/acos.s + +.global acosl +.type acosl,@function +acosl: + fldt 8(%rsp) +1: fld %st(0) + fld1 + fsub %st(0),%st(1) + fadd %st(2) + fmulp + fsqrt + fabs + fxch %st(1) + fpatan + ret diff --git a/src/math/x86-64/asinl.s b/src/math/x86-64/asinl.s new file mode 100644 index 0000000..ed212d9 --- /dev/null +++ b/src/math/x86-64/asinl.s @@ -0,0 +1,12 @@ +.global asinl +.type asinl,@function +asinl: + fldt 8(%rsp) +1: fld %st(0) + fld1 + fsub %st(0),%st(1) + fadd %st(2) + fmulp + fsqrt + fpatan + ret diff --git a/src/math/x86-64/atan2l.s b/src/math/x86-64/atan2l.s new file mode 100644 index 0000000..e5f0a3d --- /dev/null +++ b/src/math/x86-64/atan2l.s @@ -0,0 +1,7 @@ +.global atan2l +.type atan2l,@function +atan2l: + fldt 8(%rsp) + fldt 24(%rsp) + fpatan + ret diff --git a/src/math/x86-64/atanl.s b/src/math/x86-64/atanl.s new file mode 100644 index 0000000..df76de5 --- /dev/null +++ b/src/math/x86-64/atanl.s @@ -0,0 +1,7 @@ +.global atanl +.type atanl,@function +atanl: + fldt 8(%rsp) + fld1 + fpatan + ret diff --git a/src/math/x86-64/ceill.s b/src/math/x86-64/ceill.s new file mode 100644 index 0000000..f5cfa3b --- /dev/null +++ b/src/math/x86-64/ceill.s @@ -0,0 +1 @@ +# see floorl.s diff --git a/src/math/x86-64/exp2l.s b/src/math/x86-64/exp2l.s new file mode 100644 index 0000000..effab2b --- /dev/null +++ b/src/math/x86-64/exp2l.s @@ -0,0 +1,83 @@ +.global expm1l +.type expm1l,@function +expm1l: + fldt 8(%rsp) + fldl2e + fmulp + movl $0xc2820000,-4(%rsp) + flds -4(%rsp) + fucomip %st(1),%st + fld1 + jb 1f + # x*log2e <= -65, return -1 without underflow + fstp %st(1) + fchs + ret +1: fld %st(1) + fabs + fucomip %st(1),%st + fstp %st(0) + ja 1f + f2xm1 + ret +1: push %rax + call 1f + pop %rax + fld1 + fsubrp + ret + +.global exp2l +.type exp2l,@function +exp2l: + fldt 8(%rsp) +1: fld %st(0) + sub $16,%rsp + fstpt (%rsp) + mov 8(%rsp),%ax + and $0x7fff,%ax + cmp $0x3fff+13,%ax + jb 4f # |x| < 8192 + cmp $0x3fff+15,%ax + jae 3f # |x| >= 32768 + fsts (%rsp) + cmpl $0xc67ff800,(%rsp) + jb 2f # x > -16382 + movl $0x5f000000,(%rsp) + flds (%rsp) # 0x1p63 + fld %st(1) + fsub %st(1) + faddp + fucomip %st(1),%st + je 2f # x - 0x1p63 + 0x1p63 == x + movl $1,(%rsp) + flds (%rsp) # 0x1p-149 + fdiv %st(1) + fstps (%rsp) # raise underflow +2: fld1 + fld %st(1) + frndint + fxch %st(2) + fsub %st(2) # st(0)=x-rint(x), st(1)=1, st(2)=rint(x) + f2xm1 + faddp # 2^(x-rint(x)) +1: fscale + fstp %st(1) + add $16,%rsp + ret +3: xor %eax,%eax +4: cmp $0x3fff-64,%ax + fld1 + jb 1b # |x| < 0x1p-64 + fstpt (%rsp) + fistl 8(%rsp) + fildl 8(%rsp) + fsubrp %st(1) + addl $0x3fff,8(%rsp) + f2xm1 + fld1 + faddp # 2^(x-rint(x)) + fldt (%rsp) # 2^rint(x) + fmulp + add $16,%rsp + ret diff --git a/src/math/x86-64/expl.s b/src/math/x86-64/expl.s new file mode 100644 index 0000000..798261d --- /dev/null +++ b/src/math/x86-64/expl.s @@ -0,0 +1,101 @@ +# exp(x) = 2^hi + 2^hi (2^lo - 1) +# where hi+lo = log2e*x with 128bit precision +# exact log2e*x calculation depends on nearest rounding mode +# using the exact multiplication method of Dekker and Veltkamp + +.global expl +.type expl,@function +expl: + fldt 8(%rsp) + + # interesting case: 0x1p-32 <= |x| < 16384 + # check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13] + mov 16(%rsp), %ax + or $0x8000, %ax + sub $0xbfdf, %ax + cmp $45, %ax + jbe 2f + test %ax, %ax + fld1 + js 1f + # if |x|>=0x1p14 or nan return 2^trunc(x) + fscale + fstp %st(1) + ret + # if |x|<0x1p-32 return 1+x +1: faddp + ret + + # should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc + # it will be wrong on non-nearest rounding mode +2: fldl2e + subq $48, %rsp + # hi = log2e_hi*x + # 2^hi = exp2l(hi) + fmul %st(1),%st + fld %st(0) + fstpt (%rsp) + fstpt 16(%rsp) + fstpt 32(%rsp) + call exp2l@PLT + # if 2^hi == inf return 2^hi + fld %st(0) + fstpt (%rsp) + cmpw $0x7fff, 8(%rsp) + je 1f + fldt 32(%rsp) + fldt 16(%rsp) + # fpu stack: 2^hi x hi + # exact mult: x*log2e + fld %st(1) + # c = 0x1p32+1 + movq $0x41f0000000100000,%rax + pushq %rax + fldl (%rsp) + # xh = x - c*x + c*x + # xl = x - xh + fmulp + fld %st(2) + fsub %st(1), %st + faddp + fld %st(2) + fsub %st(1), %st + # yh = log2e_hi - c*log2e_hi + c*log2e_hi + movq $0x3ff7154765200000,%rax + pushq %rax + fldl (%rsp) + # fpu stack: 2^hi x hi xh xl yh + # lo = hi - xh*yh + xl*yh + fld %st(2) + fmul %st(1), %st + fsubp %st, %st(4) + fmul %st(1), %st + faddp %st, %st(3) + # yl = log2e_hi - yh + movq $0x3de705fc2f000000,%rax + pushq %rax + fldl (%rsp) + # fpu stack: 2^hi x lo xh xl yl + # lo += xh*yl + xl*yl + fmul %st, %st(2) + fmulp %st, %st(1) + fxch %st(2) + faddp + faddp + # log2e_lo + movq $0xbfbe,%rax + pushq %rax + movq $0x82f0025f2dc582ee,%rax + pushq %rax + fldt (%rsp) + addq $40,%rsp + # fpu stack: 2^hi x lo log2e_lo + # lo += log2e_lo*x + # return 2^hi + 2^hi (2^lo - 1) + fmulp %st, %st(2) + faddp + f2xm1 + fmul %st(1), %st + faddp +1: addq $48, %rsp + ret diff --git a/src/math/x86-64/expm1l.s b/src/math/x86-64/expm1l.s new file mode 100644 index 0000000..e773f08 --- /dev/null +++ b/src/math/x86-64/expm1l.s @@ -0,0 +1 @@ +# see exp2l.s diff --git a/src/math/x86-64/fabs.c b/src/math/x86-64/fabs.c new file mode 100644 index 0000000..1656247 --- /dev/null +++ b/src/math/x86-64/fabs.c @@ -0,0 +1,10 @@ +#include + +double fabs(double x) +{ + double t; + __asm__ ("pcmpeqd %0, %0" : "=x"(t)); // t = ~0 + __asm__ ("psrlq $1, %0" : "+x"(t)); // t >>= 1 + __asm__ ("andps %1, %0" : "+x"(x) : "x"(t)); // x &= t + return x; +} diff --git a/src/math/x86-64/fabsf.c b/src/math/x86-64/fabsf.c new file mode 100644 index 0000000..36ea748 --- /dev/null +++ b/src/math/x86-64/fabsf.c @@ -0,0 +1,10 @@ +#include + +float fabsf(float x) +{ + float t; + __asm__ ("pcmpeqd %0, %0" : "=x"(t)); // t = ~0 + __asm__ ("psrld $1, %0" : "+x"(t)); // t >>= 1 + __asm__ ("andps %1, %0" : "+x"(x) : "x"(t)); // x &= t + return x; +} diff --git a/src/math/x86-64/fabsl.c b/src/math/x86-64/fabsl.c new file mode 100644 index 0000000..cc1c9ed --- /dev/null +++ b/src/math/x86-64/fabsl.c @@ -0,0 +1,7 @@ +#include + +long double fabsl(long double x) +{ + __asm__ ("fabs" : "+t"(x)); + return x; +} diff --git a/src/math/x86-64/floorl.s b/src/math/x86-64/floorl.s new file mode 100644 index 0000000..80da466 --- /dev/null +++ b/src/math/x86-64/floorl.s @@ -0,0 +1,27 @@ +.global floorl +.type floorl,@function +floorl: + fldt 8(%rsp) +1: mov $0x7,%al +1: fstcw 8(%rsp) + mov 9(%rsp),%ah + mov %al,9(%rsp) + fldcw 8(%rsp) + frndint + mov %ah,9(%rsp) + fldcw 8(%rsp) + ret + +.global ceill +.type ceill,@function +ceill: + fldt 8(%rsp) + mov $0xb,%al + jmp 1b + +.global truncl +.type truncl,@function +truncl: + fldt 8(%rsp) + mov $0xf,%al + jmp 1b diff --git a/src/math/x86-64/fma.c b/src/math/x86-64/fma.c new file mode 100644 index 0000000..4dd53f2 --- /dev/null +++ b/src/math/x86-64/fma.c @@ -0,0 +1,23 @@ +#include + +#if __FMA__ + +double fma(double x, double y, double z) +{ + __asm__ ("vfmadd132sd %1, %2, %0" : "+x" (x) : "x" (y), "x" (z)); + return x; +} + +#elif __FMA4__ + +double fma(double x, double y, double z) +{ + __asm__ ("vfmaddsd %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z)); + return x; +} + +#else + +#include "../fma.c" + +#endif diff --git a/src/math/x86-64/fmaf.c b/src/math/x86-64/fmaf.c new file mode 100644 index 0000000..30b971f --- /dev/null +++ b/src/math/x86-64/fmaf.c @@ -0,0 +1,23 @@ +#include + +#if __FMA__ + +float fmaf(float x, float y, float z) +{ + __asm__ ("vfmadd132ss %1, %2, %0" : "+x" (x) : "x" (y), "x" (z)); + return x; +} + +#elif __FMA4__ + +float fmaf(float x, float y, float z) +{ + __asm__ ("vfmaddss %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z)); + return x; +} + +#else + +#include "../fmaf.c" + +#endif diff --git a/src/math/x86-64/fmodl.c b/src/math/x86-64/fmodl.c new file mode 100644 index 0000000..3daeab0 --- /dev/null +++ b/src/math/x86-64/fmodl.c @@ -0,0 +1,9 @@ +#include + +long double fmodl(long double x, long double y) +{ + unsigned short fpsr; + do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/src/math/x86-64/llrint.c b/src/math/x86-64/llrint.c new file mode 100644 index 0000000..dd38a72 --- /dev/null +++ b/src/math/x86-64/llrint.c @@ -0,0 +1,8 @@ +#include + +long long llrint(double x) +{ + long long r; + __asm__ ("cvtsd2si %1, %0" : "=r"(r) : "x"(x)); + return r; +} diff --git a/src/math/x86-64/llrintf.c b/src/math/x86-64/llrintf.c new file mode 100644 index 0000000..fc8625e --- /dev/null +++ b/src/math/x86-64/llrintf.c @@ -0,0 +1,8 @@ +#include + +long long llrintf(float x) +{ + long long r; + __asm__ ("cvtss2si %1, %0" : "=r"(r) : "x"(x)); + return r; +} diff --git a/src/math/x86-64/llrintl.c b/src/math/x86-64/llrintl.c new file mode 100644 index 0000000..c439ef2 --- /dev/null +++ b/src/math/x86-64/llrintl.c @@ -0,0 +1,8 @@ +#include + +long long llrintl(long double x) +{ + long long r; + __asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/src/math/x86-64/log10l.s b/src/math/x86-64/log10l.s new file mode 100644 index 0000000..48ea4af --- /dev/null +++ b/src/math/x86-64/log10l.s @@ -0,0 +1,7 @@ +.global log10l +.type log10l,@function +log10l: + fldlg2 + fldt 8(%rsp) + fyl2x + ret diff --git a/src/math/x86-64/log1pl.s b/src/math/x86-64/log1pl.s new file mode 100644 index 0000000..955c9db --- /dev/null +++ b/src/math/x86-64/log1pl.s @@ -0,0 +1,15 @@ +.global log1pl +.type log1pl,@function +log1pl: + mov 14(%rsp),%eax + fldln2 + and $0x7fffffff,%eax + fldt 8(%rsp) + cmp $0x3ffd9400,%eax + ja 1f + fyl2xp1 + ret +1: fld1 + faddp + fyl2x + ret diff --git a/src/math/x86-64/log2l.s b/src/math/x86-64/log2l.s new file mode 100644 index 0000000..ba08b9f --- /dev/null +++ b/src/math/x86-64/log2l.s @@ -0,0 +1,7 @@ +.global log2l +.type log2l,@function +log2l: + fld1 + fldt 8(%rsp) + fyl2x + ret diff --git a/src/math/x86-64/logl.s b/src/math/x86-64/logl.s new file mode 100644 index 0000000..20dd1f8 --- /dev/null +++ b/src/math/x86-64/logl.s @@ -0,0 +1,7 @@ +.global logl +.type logl,@function +logl: + fldln2 + fldt 8(%rsp) + fyl2x + ret diff --git a/src/math/x86-64/lrint.c b/src/math/x86-64/lrint.c new file mode 100644 index 0000000..a742fec --- /dev/null +++ b/src/math/x86-64/lrint.c @@ -0,0 +1,8 @@ +#include + +long lrint(double x) +{ + long r; + __asm__ ("cvtsd2si %1, %0" : "=r"(r) : "x"(x)); + return r; +} diff --git a/src/math/x86-64/lrintf.c b/src/math/x86-64/lrintf.c new file mode 100644 index 0000000..2ba5639 --- /dev/null +++ b/src/math/x86-64/lrintf.c @@ -0,0 +1,8 @@ +#include + +long lrintf(float x) +{ + long r; + __asm__ ("cvtss2si %1, %0" : "=r"(r) : "x"(x)); + return r; +} diff --git a/src/math/x86-64/lrintl.c b/src/math/x86-64/lrintl.c new file mode 100644 index 0000000..068e2e4 --- /dev/null +++ b/src/math/x86-64/lrintl.c @@ -0,0 +1,8 @@ +#include + +long lrintl(long double x) +{ + long r; + __asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/src/math/x86-64/remainderl.c b/src/math/x86-64/remainderl.c new file mode 100644 index 0000000..8cf7507 --- /dev/null +++ b/src/math/x86-64/remainderl.c @@ -0,0 +1,9 @@ +#include + +long double remainderl(long double x, long double y) +{ + unsigned short fpsr; + do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/src/math/x86-64/remquol.c b/src/math/x86-64/remquol.c new file mode 100644 index 0000000..60eef08 --- /dev/null +++ b/src/math/x86-64/remquol.c @@ -0,0 +1,32 @@ +#include + +long double remquol(long double x, long double y, int *quo) +{ + signed char *cx = (void *)&x, *cy = (void *)&y; + /* By ensuring that addresses of x and y cannot be discarded, + * this empty asm guides GCC into representing extraction of + * their sign bits as memory loads rather than making x and y + * not-address-taken internally and using bitfield operations, + * which in the end wouldn't work out, as extraction from FPU + * registers needs to go through memory anyway. This way GCC + * should manage to use incoming stack slots without spills. */ + __asm__ ("" :: "X"(cx), "X"(cy)); + + long double t = x; + unsigned fpsr; + do __asm__ ("fprem1; fnstsw %%ax" : "+t"(t), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + /* C0, C1, C3 flags in x87 status word carry low bits of quotient: + * 15 14 13 12 11 10 9 8 + * . C3 . . . C2 C1 C0 + * . b1 . . . 0 b0 b2 */ + unsigned char i = fpsr >> 8; + i = i>>4 | i<<4; + /* i[5:2] is now {b0 b2 ? b1}. Retrieve {0 b2 b1 b0} via + * in-register table lookup. */ + unsigned qbits = 0x7575313164642020 >> (i & 60); + qbits &= 7; + + *quo = (cx[9]^cy[9]) < 0 ? -qbits : qbits; + return t; +} diff --git a/src/math/x86-64/rintl.c b/src/math/x86-64/rintl.c new file mode 100644 index 0000000..e1a9207 --- /dev/null +++ b/src/math/x86-64/rintl.c @@ -0,0 +1,7 @@ +#include + +long double rintl(long double x) +{ + __asm__ ("frndint" : "+t"(x)); + return x; +} diff --git a/src/math/x86-64/sqrt.c b/src/math/x86-64/sqrt.c new file mode 100644 index 0000000..657e09e --- /dev/null +++ b/src/math/x86-64/sqrt.c @@ -0,0 +1,7 @@ +#include + +double sqrt(double x) +{ + __asm__ ("sqrtsd %1, %0" : "=x"(x) : "x"(x)); + return x; +} diff --git a/src/math/x86-64/sqrtf.c b/src/math/x86-64/sqrtf.c new file mode 100644 index 0000000..720baec --- /dev/null +++ b/src/math/x86-64/sqrtf.c @@ -0,0 +1,7 @@ +#include + +float sqrtf(float x) +{ + __asm__ ("sqrtss %1, %0" : "=x"(x) : "x"(x)); + return x; +} diff --git a/src/math/x86-64/sqrtl.c b/src/math/x86-64/sqrtl.c new file mode 100644 index 0000000..864cfcc --- /dev/null +++ b/src/math/x86-64/sqrtl.c @@ -0,0 +1,7 @@ +#include + +long double sqrtl(long double x) +{ + __asm__ ("fsqrt" : "+t"(x)); + return x; +} diff --git a/src/math/x86-64/truncl.s b/src/math/x86-64/truncl.s new file mode 100644 index 0000000..f5cfa3b --- /dev/null +++ b/src/math/x86-64/truncl.s @@ -0,0 +1 @@ +# see floorl.s diff --git a/src/math/x86/__invtrigl.s b/src/math/x86/__invtrigl.s new file mode 100644 index 0000000..e69de29 diff --git a/src/math/x86/acos.s b/src/math/x86/acos.s new file mode 100644 index 0000000..af423a2 --- /dev/null +++ b/src/math/x86/acos.s @@ -0,0 +1,18 @@ +# use acos(x) = atan2(fabs(sqrt((1-x)*(1+x))), x) + +.global acos +.type acos,@function +acos: + fldl 4(%esp) + fld %st(0) + fld1 + fsub %st(0),%st(1) + fadd %st(2) + fmulp + fsqrt + fabs # fix sign of zero (matters in downward rounding mode) + fxch %st(1) + fpatan + fstpl 4(%esp) + fldl 4(%esp) + ret diff --git a/src/math/x86/acosf.s b/src/math/x86/acosf.s new file mode 100644 index 0000000..d2cdfdb --- /dev/null +++ b/src/math/x86/acosf.s @@ -0,0 +1,16 @@ +.global acosf +.type acosf,@function +acosf: + flds 4(%esp) + fld %st(0) + fld1 + fsub %st(0),%st(1) + fadd %st(2) + fmulp + fsqrt + fabs # fix sign of zero (matters in downward rounding mode) + fxch %st(1) + fpatan + fstps 4(%esp) + flds 4(%esp) + ret diff --git a/src/math/x86/acosl.s b/src/math/x86/acosl.s new file mode 100644 index 0000000..599c823 --- /dev/null +++ b/src/math/x86/acosl.s @@ -0,0 +1,14 @@ +.global acosl +.type acosl,@function +acosl: + fldt 4(%esp) + fld %st(0) + fld1 + fsub %st(0),%st(1) + fadd %st(2) + fmulp + fsqrt + fabs # fix sign of zero (matters in downward rounding mode) + fxch %st(1) + fpatan + ret diff --git a/src/math/x86/asin.s b/src/math/x86/asin.s new file mode 100644 index 0000000..2bc8356 --- /dev/null +++ b/src/math/x86/asin.s @@ -0,0 +1,21 @@ +.global asin +.type asin,@function +asin: + fldl 4(%esp) + mov 8(%esp),%eax + add %eax,%eax + cmp $0x00200000,%eax + jb 1f + fld %st(0) + fld1 + fsub %st(0),%st(1) + fadd %st(2) + fmulp + fsqrt + fpatan + fstpl 4(%esp) + fldl 4(%esp) + ret + # subnormal x, return x with underflow +1: fsts 4(%esp) + ret diff --git a/src/math/x86/asinf.s b/src/math/x86/asinf.s new file mode 100644 index 0000000..0590975 --- /dev/null +++ b/src/math/x86/asinf.s @@ -0,0 +1,23 @@ +.global asinf +.type asinf,@function +asinf: + flds 4(%esp) + mov 4(%esp),%eax + add %eax,%eax + cmp $0x01000000,%eax + jb 1f + fld %st(0) + fld1 + fsub %st(0),%st(1) + fadd %st(2) + fmulp + fsqrt + fpatan + fstps 4(%esp) + flds 4(%esp) + ret + # subnormal x, return x with underflow +1: fld %st(0) + fmul %st(1) + fstps 4(%esp) + ret diff --git a/src/math/x86/asinl.s b/src/math/x86/asinl.s new file mode 100644 index 0000000..e973fc8 --- /dev/null +++ b/src/math/x86/asinl.s @@ -0,0 +1,12 @@ +.global asinl +.type asinl,@function +asinl: + fldt 4(%esp) + fld %st(0) + fld1 + fsub %st(0),%st(1) + fadd %st(2) + fmulp + fsqrt + fpatan + ret diff --git a/src/math/x86/atan.s b/src/math/x86/atan.s new file mode 100644 index 0000000..2c57f6b --- /dev/null +++ b/src/math/x86/atan.s @@ -0,0 +1,16 @@ +.global atan +.type atan,@function +atan: + fldl 4(%esp) + mov 8(%esp),%eax + add %eax,%eax + cmp $0x00200000,%eax + jb 1f + fld1 + fpatan + fstpl 4(%esp) + fldl 4(%esp) + ret + # subnormal x, return x with underflow +1: fsts 4(%esp) + ret diff --git a/src/math/x86/atan2.s b/src/math/x86/atan2.s new file mode 100644 index 0000000..8bc441b --- /dev/null +++ b/src/math/x86/atan2.s @@ -0,0 +1,15 @@ +.global atan2 +.type atan2,@function +atan2: + fldl 4(%esp) + fldl 12(%esp) + fpatan + fstpl 4(%esp) + fldl 4(%esp) + mov 8(%esp),%eax + add %eax,%eax + cmp $0x00200000,%eax + jae 1f + # subnormal x, return x with underflow + fsts 4(%esp) +1: ret diff --git a/src/math/x86/atan2f.s b/src/math/x86/atan2f.s new file mode 100644 index 0000000..3908c86 --- /dev/null +++ b/src/math/x86/atan2f.s @@ -0,0 +1,17 @@ +.global atan2f +.type atan2f,@function +atan2f: + flds 4(%esp) + flds 8(%esp) + fpatan + fstps 4(%esp) + flds 4(%esp) + mov 4(%esp),%eax + add %eax,%eax + cmp $0x01000000,%eax + jae 1f + # subnormal x, return x with underflow + fld %st(0) + fmul %st(1) + fstps 4(%esp) +1: ret diff --git a/src/math/x86/atan2l.s b/src/math/x86/atan2l.s new file mode 100644 index 0000000..adf6e10 --- /dev/null +++ b/src/math/x86/atan2l.s @@ -0,0 +1,7 @@ +.global atan2l +.type atan2l,@function +atan2l: + fldt 4(%esp) + fldt 16(%esp) + fpatan + ret diff --git a/src/math/x86/atanf.s b/src/math/x86/atanf.s new file mode 100644 index 0000000..c2cbe2e --- /dev/null +++ b/src/math/x86/atanf.s @@ -0,0 +1,18 @@ +.global atanf +.type atanf,@function +atanf: + flds 4(%esp) + mov 4(%esp),%eax + add %eax,%eax + cmp $0x01000000,%eax + jb 1f + fld1 + fpatan + fstps 4(%esp) + flds 4(%esp) + ret + # subnormal x, return x with underflow +1: fld %st(0) + fmul %st(1) + fstps 4(%esp) + ret diff --git a/src/math/x86/atanl.s b/src/math/x86/atanl.s new file mode 100644 index 0000000..c508bc4 --- /dev/null +++ b/src/math/x86/atanl.s @@ -0,0 +1,7 @@ +.global atanl +.type atanl,@function +atanl: + fldt 4(%esp) + fld1 + fpatan + ret diff --git a/src/math/x86/ceil.s b/src/math/x86/ceil.s new file mode 100644 index 0000000..bc29f15 --- /dev/null +++ b/src/math/x86/ceil.s @@ -0,0 +1 @@ +# see floor.s diff --git a/src/math/x86/ceilf.s b/src/math/x86/ceilf.s new file mode 100644 index 0000000..bc29f15 --- /dev/null +++ b/src/math/x86/ceilf.s @@ -0,0 +1 @@ +# see floor.s diff --git a/src/math/x86/ceill.s b/src/math/x86/ceill.s new file mode 100644 index 0000000..bc29f15 --- /dev/null +++ b/src/math/x86/ceill.s @@ -0,0 +1 @@ +# see floor.s diff --git a/src/math/x86/exp2l.s b/src/math/x86/exp2l.s new file mode 100644 index 0000000..8125761 --- /dev/null +++ b/src/math/x86/exp2l.s @@ -0,0 +1 @@ +# see exp_ld.s diff --git a/src/math/x86/exp_ld.s b/src/math/x86/exp_ld.s new file mode 100644 index 0000000..99cba01 --- /dev/null +++ b/src/math/x86/exp_ld.s @@ -0,0 +1,93 @@ +.global expm1l +.type expm1l,@function +expm1l: + fldt 4(%esp) + fldl2e + fmulp + mov $0xc2820000,%eax + push %eax + flds (%esp) + pop %eax + fucomp %st(1) + fnstsw %ax + sahf + fld1 + jb 1f + # x*log2e < -65, return -1 without underflow + fstp %st(1) + fchs + ret +1: fld %st(1) + fabs + fucom %st(1) + fnstsw %ax + fstp %st(0) + fstp %st(0) + sahf + ja 1f + f2xm1 + ret +1: call 1f + fld1 + fsubrp + ret + +.global exp2l +.global __exp2l +.hidden __exp2l +.type exp2l,@function +exp2l: +__exp2l: + fldt 4(%esp) +1: sub $12,%esp + fld %st(0) + fstpt (%esp) + mov 8(%esp),%ax + and $0x7fff,%ax + cmp $0x3fff+13,%ax + jb 4f # |x| < 8192 + cmp $0x3fff+15,%ax + jae 3f # |x| >= 32768 + fsts (%esp) + cmpl $0xc67ff800,(%esp) + jb 2f # x > -16382 + movl $0x5f000000,(%esp) + flds (%esp) # 0x1p63 + fld %st(1) + fsub %st(1) + faddp + fucomp %st(1) + fnstsw + sahf + je 2f # x - 0x1p63 + 0x1p63 == x + movl $1,(%esp) + flds (%esp) # 0x1p-149 + fdiv %st(1) + fstps (%esp) # raise underflow +2: fld1 + fld %st(1) + frndint + fxch %st(2) + fsub %st(2) # st(0)=x-rint(x), st(1)=1, st(2)=rint(x) + f2xm1 + faddp # 2^(x-rint(x)) +1: fscale + fstp %st(1) + add $12,%esp + ret +3: xor %eax,%eax +4: cmp $0x3fff-64,%ax + fld1 + jb 1b # |x| < 0x1p-64 + fstpt (%esp) + fistl 8(%esp) + fildl 8(%esp) + fsubrp %st(1) + addl $0x3fff,8(%esp) + f2xm1 + fld1 + faddp # 2^(x-rint(x)) + fldt (%esp) # 2^rint(x) + fmulp + add $12,%esp + ret diff --git a/src/math/x86/expl.s b/src/math/x86/expl.s new file mode 100644 index 0000000..b5124e8 --- /dev/null +++ b/src/math/x86/expl.s @@ -0,0 +1,101 @@ +# exp(x) = 2^hi + 2^hi (2^lo - 1) +# where hi+lo = log2e*x with 128bit precision +# exact log2e*x calculation depends on nearest rounding mode +# using the exact multiplication method of Dekker and Veltkamp + +.global expl +.type expl,@function +expl: + fldt 4(%esp) + + # interesting case: 0x1p-32 <= |x| < 16384 + # check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13] + mov 12(%esp), %ax + or $0x8000, %ax + sub $0xbfdf, %ax + cmp $45, %ax + jbe 2f + test %ax, %ax + fld1 + js 1f + # if |x|>=0x1p14 or nan return 2^trunc(x) + fscale + fstp %st(1) + ret + # if |x|<0x1p-32 return 1+x +1: faddp + ret + + # should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc + # it will be wrong on non-nearest rounding mode +2: fldl2e + subl $44, %esp + # hi = log2e_hi*x + # 2^hi = exp2l(hi) + fmul %st(1),%st + fld %st(0) + fstpt (%esp) + fstpt 16(%esp) + fstpt 32(%esp) +.hidden __exp2l + call __exp2l + # if 2^hi == inf return 2^hi + fld %st(0) + fstpt (%esp) + cmpw $0x7fff, 8(%esp) + je 1f + fldt 32(%esp) + fldt 16(%esp) + # fpu stack: 2^hi x hi + # exact mult: x*log2e + fld %st(1) + # c = 0x1p32+1 + pushl $0x41f00000 + pushl $0x00100000 + fldl (%esp) + # xh = x - c*x + c*x + # xl = x - xh + fmulp + fld %st(2) + fsub %st(1), %st + faddp + fld %st(2) + fsub %st(1), %st + # yh = log2e_hi - c*log2e_hi + c*log2e_hi + pushl $0x3ff71547 + pushl $0x65200000 + fldl (%esp) + # fpu stack: 2^hi x hi xh xl yh + # lo = hi - xh*yh + xl*yh + fld %st(2) + fmul %st(1), %st + fsubp %st, %st(4) + fmul %st(1), %st + faddp %st, %st(3) + # yl = log2e_hi - yh + pushl $0x3de705fc + pushl $0x2f000000 + fldl (%esp) + # fpu stack: 2^hi x lo xh xl yl + # lo += xh*yl + xl*yl + fmul %st, %st(2) + fmulp %st, %st(1) + fxch %st(2) + faddp + faddp + # log2e_lo + pushl $0xbfbe + pushl $0x82f0025f + pushl $0x2dc582ee + fldt (%esp) + addl $36,%esp + # fpu stack: 2^hi x lo log2e_lo + # lo += log2e_lo*x + # return 2^hi + 2^hi (2^lo - 1) + fmulp %st, %st(2) + faddp + f2xm1 + fmul %st(1), %st + faddp +1: addl $44, %esp + ret diff --git a/src/math/x86/expm1l.s b/src/math/x86/expm1l.s new file mode 100644 index 0000000..8125761 --- /dev/null +++ b/src/math/x86/expm1l.s @@ -0,0 +1 @@ +# see exp_ld.s diff --git a/src/math/x86/fabs.c b/src/math/x86/fabs.c new file mode 100644 index 0000000..3967278 --- /dev/null +++ b/src/math/x86/fabs.c @@ -0,0 +1,7 @@ +#include + +double fabs(double x) +{ + __asm__ ("fabs" : "+t"(x)); + return x; +} diff --git a/src/math/x86/fabsf.c b/src/math/x86/fabsf.c new file mode 100644 index 0000000..d882eee --- /dev/null +++ b/src/math/x86/fabsf.c @@ -0,0 +1,7 @@ +#include + +float fabsf(float x) +{ + __asm__ ("fabs" : "+t"(x)); + return x; +} diff --git a/src/math/x86/fabsl.c b/src/math/x86/fabsl.c new file mode 100644 index 0000000..cc1c9ed --- /dev/null +++ b/src/math/x86/fabsl.c @@ -0,0 +1,7 @@ +#include + +long double fabsl(long double x) +{ + __asm__ ("fabs" : "+t"(x)); + return x; +} diff --git a/src/math/x86/floor.s b/src/math/x86/floor.s new file mode 100644 index 0000000..46ba88d --- /dev/null +++ b/src/math/x86/floor.s @@ -0,0 +1,67 @@ +.global floorf +.type floorf,@function +floorf: + flds 4(%esp) + jmp 1f + +.global floorl +.type floorl,@function +floorl: + fldt 4(%esp) + jmp 1f + +.global floor +.type floor,@function +floor: + fldl 4(%esp) +1: mov $0x7,%al +1: fstcw 4(%esp) + mov 5(%esp),%ah + mov %al,5(%esp) + fldcw 4(%esp) + frndint + mov %ah,5(%esp) + fldcw 4(%esp) + ret + +.global ceil +.type ceil,@function +ceil: + fldl 4(%esp) + mov $0xb,%al + jmp 1b + +.global ceilf +.type ceilf,@function +ceilf: + flds 4(%esp) + mov $0xb,%al + jmp 1b + +.global ceill +.type ceill,@function +ceill: + fldt 4(%esp) + mov $0xb,%al + jmp 1b + +.global trunc +.type trunc,@function +trunc: + fldl 4(%esp) + mov $0xf,%al + jmp 1b + +.global truncf +.type truncf,@function +truncf: + flds 4(%esp) + mov $0xf,%al + jmp 1b + +.global truncl +.type truncl,@function +truncl: + fldt 4(%esp) + mov $0xf,%al + jmp 1b diff --git a/src/math/x86/floorf.s b/src/math/x86/floorf.s new file mode 100644 index 0000000..bc29f15 --- /dev/null +++ b/src/math/x86/floorf.s @@ -0,0 +1 @@ +# see floor.s diff --git a/src/math/x86/floorl.s b/src/math/x86/floorl.s new file mode 100644 index 0000000..bc29f15 --- /dev/null +++ b/src/math/x86/floorl.s @@ -0,0 +1 @@ +# see floor.s diff --git a/src/math/x86/fmod.c b/src/math/x86/fmod.c new file mode 100644 index 0000000..ea0c58d --- /dev/null +++ b/src/math/x86/fmod.c @@ -0,0 +1,10 @@ +#include + +double fmod(double x, double y) +{ + unsigned short fpsr; + // fprem does not introduce excess precision into x + do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/src/math/x86/fmodf.c b/src/math/x86/fmodf.c new file mode 100644 index 0000000..90b56ab --- /dev/null +++ b/src/math/x86/fmodf.c @@ -0,0 +1,10 @@ +#include + +float fmodf(float x, float y) +{ + unsigned short fpsr; + // fprem does not introduce excess precision into x + do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/src/math/x86/fmodl.c b/src/math/x86/fmodl.c new file mode 100644 index 0000000..3daeab0 --- /dev/null +++ b/src/math/x86/fmodl.c @@ -0,0 +1,9 @@ +#include + +long double fmodl(long double x, long double y) +{ + unsigned short fpsr; + do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/src/math/x86/hypot.s b/src/math/x86/hypot.s new file mode 100644 index 0000000..299c2e1 --- /dev/null +++ b/src/math/x86/hypot.s @@ -0,0 +1,45 @@ +.global hypot +.type hypot,@function +hypot: + mov 8(%esp),%eax + mov 16(%esp),%ecx + add %eax,%eax + add %ecx,%ecx + and %eax,%ecx + cmp $0xffe00000,%ecx + jae 2f + or 4(%esp),%eax + jnz 1f + fldl 12(%esp) + fabs + ret +1: mov 16(%esp),%eax + add %eax,%eax + or 12(%esp),%eax + jnz 1f + fldl 4(%esp) + fabs + ret +1: fldl 4(%esp) + fld %st(0) + fmulp + fldl 12(%esp) + fld %st(0) + fmulp + faddp + fsqrt + ret +2: sub $0xffe00000,%eax + or 4(%esp),%eax + jnz 1f + fldl 4(%esp) + fabs + ret +1: mov 16(%esp),%eax + add %eax,%eax + sub $0xffe00000,%eax + or 12(%esp),%eax + fldl 12(%esp) + jnz 1f + fabs +1: ret diff --git a/src/math/x86/hypotf.s b/src/math/x86/hypotf.s new file mode 100644 index 0000000..068935e --- /dev/null +++ b/src/math/x86/hypotf.s @@ -0,0 +1,42 @@ +.global hypotf +.type hypotf,@function +hypotf: + mov 4(%esp),%eax + mov 8(%esp),%ecx + add %eax,%eax + add %ecx,%ecx + and %eax,%ecx + cmp $0xff000000,%ecx + jae 2f + test %eax,%eax + jnz 1f + flds 8(%esp) + fabs + ret +1: mov 8(%esp),%eax + add %eax,%eax + jnz 1f + flds 4(%esp) + fabs + ret +1: flds 4(%esp) + fld %st(0) + fmulp + flds 8(%esp) + fld %st(0) + fmulp + faddp + fsqrt + ret +2: cmp $0xff000000,%eax + jnz 1f + flds 4(%esp) + fabs + ret +1: mov 8(%esp),%eax + add %eax,%eax + cmp $0xff000000,%eax + flds 8(%esp) + jnz 1f + fabs +1: ret diff --git a/src/math/x86/ldexp.s b/src/math/x86/ldexp.s new file mode 100644 index 0000000..c430f78 --- /dev/null +++ b/src/math/x86/ldexp.s @@ -0,0 +1 @@ +# see scalbn.s diff --git a/src/math/x86/ldexpf.s b/src/math/x86/ldexpf.s new file mode 100644 index 0000000..3f8e4b9 --- /dev/null +++ b/src/math/x86/ldexpf.s @@ -0,0 +1 @@ +# see scalbnf.s diff --git a/src/math/x86/ldexpl.s b/src/math/x86/ldexpl.s new file mode 100644 index 0000000..86fe562 --- /dev/null +++ b/src/math/x86/ldexpl.s @@ -0,0 +1 @@ +# see scalbnl.s diff --git a/src/math/x86/llrint.c b/src/math/x86/llrint.c new file mode 100644 index 0000000..aa40081 --- /dev/null +++ b/src/math/x86/llrint.c @@ -0,0 +1,8 @@ +#include + +long long llrint(double x) +{ + long long r; + __asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/src/math/x86/llrintf.c b/src/math/x86/llrintf.c new file mode 100644 index 0000000..c41a317 --- /dev/null +++ b/src/math/x86/llrintf.c @@ -0,0 +1,8 @@ +#include + +long long llrintf(float x) +{ + long long r; + __asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/src/math/x86/llrintl.c b/src/math/x86/llrintl.c new file mode 100644 index 0000000..c439ef2 --- /dev/null +++ b/src/math/x86/llrintl.c @@ -0,0 +1,8 @@ +#include + +long long llrintl(long double x) +{ + long long r; + __asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/src/math/x86/log.s b/src/math/x86/log.s new file mode 100644 index 0000000..08c5992 --- /dev/null +++ b/src/math/x86/log.s @@ -0,0 +1,9 @@ +.global log +.type log,@function +log: + fldln2 + fldl 4(%esp) + fyl2x + fstpl 4(%esp) + fldl 4(%esp) + ret diff --git a/src/math/x86/log10.s b/src/math/x86/log10.s new file mode 100644 index 0000000..120e91e --- /dev/null +++ b/src/math/x86/log10.s @@ -0,0 +1,9 @@ +.global log10 +.type log10,@function +log10: + fldlg2 + fldl 4(%esp) + fyl2x + fstpl 4(%esp) + fldl 4(%esp) + ret diff --git a/src/math/x86/log10f.s b/src/math/x86/log10f.s new file mode 100644 index 0000000..b055493 --- /dev/null +++ b/src/math/x86/log10f.s @@ -0,0 +1,9 @@ +.global log10f +.type log10f,@function +log10f: + fldlg2 + flds 4(%esp) + fyl2x + fstps 4(%esp) + flds 4(%esp) + ret diff --git a/src/math/x86/log10l.s b/src/math/x86/log10l.s new file mode 100644 index 0000000..aaa44f2 --- /dev/null +++ b/src/math/x86/log10l.s @@ -0,0 +1,7 @@ +.global log10l +.type log10l,@function +log10l: + fldlg2 + fldt 4(%esp) + fyl2x + ret diff --git a/src/math/x86/log1p.s b/src/math/x86/log1p.s new file mode 100644 index 0000000..f3c95f8 --- /dev/null +++ b/src/math/x86/log1p.s @@ -0,0 +1,25 @@ +.global log1p +.type log1p,@function +log1p: + mov 8(%esp),%eax + fldln2 + and $0x7fffffff,%eax + fldl 4(%esp) + cmp $0x3fd28f00,%eax + ja 1f + cmp $0x00100000,%eax + jb 2f + fyl2xp1 + fstpl 4(%esp) + fldl 4(%esp) + ret +1: fld1 + faddp + fyl2x + fstpl 4(%esp) + fldl 4(%esp) + ret + # subnormal x, return x with underflow +2: fsts 4(%esp) + fstp %st(1) + ret diff --git a/src/math/x86/log1pf.s b/src/math/x86/log1pf.s new file mode 100644 index 0000000..9f13d95 --- /dev/null +++ b/src/math/x86/log1pf.s @@ -0,0 +1,26 @@ +.global log1pf +.type log1pf,@function +log1pf: + mov 4(%esp),%eax + fldln2 + and $0x7fffffff,%eax + flds 4(%esp) + cmp $0x3e940000,%eax + ja 1f + cmp $0x00800000,%eax + jb 2f + fyl2xp1 + fstps 4(%esp) + flds 4(%esp) + ret +1: fld1 + faddp + fyl2x + fstps 4(%esp) + flds 4(%esp) + ret + # subnormal x, return x with underflow +2: fxch + fmul %st(1) + fstps 4(%esp) + ret diff --git a/src/math/x86/log1pl.s b/src/math/x86/log1pl.s new file mode 100644 index 0000000..a048ab6 --- /dev/null +++ b/src/math/x86/log1pl.s @@ -0,0 +1,15 @@ +.global log1pl +.type log1pl,@function +log1pl: + mov 10(%esp),%eax + fldln2 + and $0x7fffffff,%eax + fldt 4(%esp) + cmp $0x3ffd9400,%eax + ja 1f + fyl2xp1 + ret +1: fld1 + faddp + fyl2x + ret diff --git a/src/math/x86/log2.s b/src/math/x86/log2.s new file mode 100644 index 0000000..7eff0b6 --- /dev/null +++ b/src/math/x86/log2.s @@ -0,0 +1,9 @@ +.global log2 +.type log2,@function +log2: + fld1 + fldl 4(%esp) + fyl2x + fstpl 4(%esp) + fldl 4(%esp) + ret diff --git a/src/math/x86/log2f.s b/src/math/x86/log2f.s new file mode 100644 index 0000000..b32fa2f --- /dev/null +++ b/src/math/x86/log2f.s @@ -0,0 +1,9 @@ +.global log2f +.type log2f,@function +log2f: + fld1 + flds 4(%esp) + fyl2x + fstps 4(%esp) + flds 4(%esp) + ret diff --git a/src/math/x86/log2l.s b/src/math/x86/log2l.s new file mode 100644 index 0000000..c58f56f --- /dev/null +++ b/src/math/x86/log2l.s @@ -0,0 +1,7 @@ +.global log2l +.type log2l,@function +log2l: + fld1 + fldt 4(%esp) + fyl2x + ret diff --git a/src/math/x86/logf.s b/src/math/x86/logf.s new file mode 100644 index 0000000..4d0346a --- /dev/null +++ b/src/math/x86/logf.s @@ -0,0 +1,9 @@ +.global logf +.type logf,@function +logf: + fldln2 + flds 4(%esp) + fyl2x + fstps 4(%esp) + flds 4(%esp) + ret diff --git a/src/math/x86/logl.s b/src/math/x86/logl.s new file mode 100644 index 0000000..d4e3339 --- /dev/null +++ b/src/math/x86/logl.s @@ -0,0 +1,7 @@ +.global logl +.type logl,@function +logl: + fldln2 + fldt 4(%esp) + fyl2x + ret diff --git a/src/math/x86/lrint.c b/src/math/x86/lrint.c new file mode 100644 index 0000000..89563ab --- /dev/null +++ b/src/math/x86/lrint.c @@ -0,0 +1,8 @@ +#include + +long lrint(double x) +{ + long r; + __asm__ ("fistpl %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/src/math/x86/lrintf.c b/src/math/x86/lrintf.c new file mode 100644 index 0000000..0bbf29d --- /dev/null +++ b/src/math/x86/lrintf.c @@ -0,0 +1,8 @@ +#include + +long lrintf(float x) +{ + long r; + __asm__ ("fistpl %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/src/math/x86/lrintl.c b/src/math/x86/lrintl.c new file mode 100644 index 0000000..eb8c090 --- /dev/null +++ b/src/math/x86/lrintl.c @@ -0,0 +1,8 @@ +#include + +long lrintl(long double x) +{ + long r; + __asm__ ("fistpl %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/src/math/x86/remainder.c b/src/math/x86/remainder.c new file mode 100644 index 0000000..5b41ed9 --- /dev/null +++ b/src/math/x86/remainder.c @@ -0,0 +1,12 @@ +#include + +double remainder(double x, double y) +{ + unsigned short fpsr; + // fprem1 does not introduce excess precision into x + do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} + +_weak_alias(remainder, drem); diff --git a/src/math/x86/remainderf.c b/src/math/x86/remainderf.c new file mode 100644 index 0000000..e86fb62 --- /dev/null +++ b/src/math/x86/remainderf.c @@ -0,0 +1,12 @@ +#include + +float remainderf(float x, float y) +{ + unsigned short fpsr; + // fprem1 does not introduce excess precision into x + do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} + +_weak_alias(remainderf, dremf); diff --git a/src/math/x86/remainderl.c b/src/math/x86/remainderl.c new file mode 100644 index 0000000..8cf7507 --- /dev/null +++ b/src/math/x86/remainderl.c @@ -0,0 +1,9 @@ +#include + +long double remainderl(long double x, long double y) +{ + unsigned short fpsr; + do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/src/math/x86/remquo.s b/src/math/x86/remquo.s new file mode 100644 index 0000000..598e754 --- /dev/null +++ b/src/math/x86/remquo.s @@ -0,0 +1,50 @@ +.global remquof +.type remquof,@function +remquof: + mov 12(%esp),%ecx + flds 8(%esp) + flds 4(%esp) + mov 11(%esp),%dh + xor 7(%esp),%dh + jmp 1f + +.global remquol +.type remquol,@function +remquol: + mov 28(%esp),%ecx + fldt 16(%esp) + fldt 4(%esp) + mov 25(%esp),%dh + xor 13(%esp),%dh + jmp 1f + +.global remquo +.type remquo,@function +remquo: + mov 20(%esp),%ecx + fldl 12(%esp) + fldl 4(%esp) + mov 19(%esp),%dh + xor 11(%esp),%dh +1: fprem1 + fnstsw %ax + sahf + jp 1b + fstp %st(1) + mov %ah,%dl + shr %dl + and $1,%dl + mov %ah,%al + shr $5,%al + and $2,%al + or %al,%dl + mov %ah,%al + shl $2,%al + and $4,%al + or %al,%dl + test %dh,%dh + jns 1f + neg %dl +1: movsbl %dl,%edx + mov %edx,(%ecx) + ret diff --git a/src/math/x86/remquof.s b/src/math/x86/remquof.s new file mode 100644 index 0000000..511a6bc --- /dev/null +++ b/src/math/x86/remquof.s @@ -0,0 +1 @@ +# see remquo.s diff --git a/src/math/x86/remquol.s b/src/math/x86/remquol.s new file mode 100644 index 0000000..511a6bc --- /dev/null +++ b/src/math/x86/remquol.s @@ -0,0 +1 @@ +# see remquo.s diff --git a/src/math/x86/rint.c b/src/math/x86/rint.c new file mode 100644 index 0000000..a5276a6 --- /dev/null +++ b/src/math/x86/rint.c @@ -0,0 +1,7 @@ +#include + +double rint(double x) +{ + __asm__ ("frndint" : "+t"(x)); + return x; +} diff --git a/src/math/x86/rintf.c b/src/math/x86/rintf.c new file mode 100644 index 0000000..bb4121a --- /dev/null +++ b/src/math/x86/rintf.c @@ -0,0 +1,7 @@ +#include + +float rintf(float x) +{ + __asm__ ("frndint" : "+t"(x)); + return x; +} diff --git a/src/math/x86/rintl.c b/src/math/x86/rintl.c new file mode 100644 index 0000000..e1a9207 --- /dev/null +++ b/src/math/x86/rintl.c @@ -0,0 +1,7 @@ +#include + +long double rintl(long double x) +{ + __asm__ ("frndint" : "+t"(x)); + return x; +} diff --git a/src/math/x86/scalbln.s b/src/math/x86/scalbln.s new file mode 100644 index 0000000..c430f78 --- /dev/null +++ b/src/math/x86/scalbln.s @@ -0,0 +1 @@ +# see scalbn.s diff --git a/src/math/x86/scalblnf.s b/src/math/x86/scalblnf.s new file mode 100644 index 0000000..3f8e4b9 --- /dev/null +++ b/src/math/x86/scalblnf.s @@ -0,0 +1 @@ +# see scalbnf.s diff --git a/src/math/x86/scalblnl.s b/src/math/x86/scalblnl.s new file mode 100644 index 0000000..86fe562 --- /dev/null +++ b/src/math/x86/scalblnl.s @@ -0,0 +1 @@ +# see scalbnl.s diff --git a/src/math/x86/scalbn.s b/src/math/x86/scalbn.s new file mode 100644 index 0000000..8bf302f --- /dev/null +++ b/src/math/x86/scalbn.s @@ -0,0 +1,33 @@ +.global ldexp +.type ldexp,@function +ldexp: + nop + +.global scalbln +.type scalbln,@function +scalbln: + nop + +.global scalbn +.type scalbn,@function +scalbn: + mov 12(%esp),%eax + add $0x3ffe,%eax + cmp $0x7ffd,%eax + jb 1f + sub $0x3ffe,%eax + sar $31,%eax + xor $0xfff,%eax + add $0x3ffe,%eax +1: inc %eax + fldl 4(%esp) + mov %eax,12(%esp) + mov $0x80000000,%eax + mov %eax,8(%esp) + xor %eax,%eax + mov %eax,4(%esp) + fldt 4(%esp) + fmulp + fstpl 4(%esp) + fldl 4(%esp) + ret diff --git a/src/math/x86/scalbnf.s b/src/math/x86/scalbnf.s new file mode 100644 index 0000000..9cb9ef5 --- /dev/null +++ b/src/math/x86/scalbnf.s @@ -0,0 +1,32 @@ +.global ldexpf +.type ldexpf,@function +ldexpf: + nop + +.global scalblnf +.type scalblnf,@function +scalblnf: + nop + +.global scalbnf +.type scalbnf,@function +scalbnf: + mov 8(%esp),%eax + add $0x3fe,%eax + cmp $0x7fd,%eax + jb 1f + sub $0x3fe,%eax + sar $31,%eax + xor $0x1ff,%eax + add $0x3fe,%eax +1: inc %eax + shl $20,%eax + flds 4(%esp) + mov %eax,8(%esp) + xor %eax,%eax + mov %eax,4(%esp) + fldl 4(%esp) + fmulp + fstps 4(%esp) + flds 4(%esp) + ret diff --git a/src/math/x86/scalbnl.s b/src/math/x86/scalbnl.s new file mode 100644 index 0000000..54414c2 --- /dev/null +++ b/src/math/x86/scalbnl.s @@ -0,0 +1,32 @@ +.global ldexpl +.type ldexpl,@function +ldexpl: + nop + +.global scalblnl +.type scalblnl,@function +scalblnl: + nop + +.global scalbnl +.type scalbnl,@function +scalbnl: + mov 16(%esp),%eax + add $0x3ffe,%eax + cmp $0x7ffd,%eax + jae 1f + inc %eax + fldt 4(%esp) + mov %eax,12(%esp) + mov $0x80000000,%eax + mov %eax,8(%esp) + xor %eax,%eax + mov %eax,4(%esp) + fldt 4(%esp) + fmulp + ret +1: fildl 16(%esp) + fldt 4(%esp) + fscale + fstp %st(1) + ret diff --git a/src/math/x86/sqrt.c b/src/math/x86/sqrt.c new file mode 100644 index 0000000..934fbcc --- /dev/null +++ b/src/math/x86/sqrt.c @@ -0,0 +1,15 @@ +#include "libm.h" + +double sqrt(double x) +{ + union ldshape ux; + unsigned fpsr; + __asm__ ("fsqrt; fnstsw %%ax": "=t"(ux.f), "=a"(fpsr) : "0"(x)); + if ((ux.i.m & 0x7ff) != 0x400) + return (double)ux.f; + /* Rounding to double would have encountered an exact halfway case. + Adjust mantissa downwards if fsqrt rounded up, else upwards. + (result of fsqrt could not have been exact) */ + ux.i.m ^= (fpsr & 0x200) + 0x300; + return (double)ux.f; +} diff --git a/src/math/x86/sqrtf.c b/src/math/x86/sqrtf.c new file mode 100644 index 0000000..41c65c2 --- /dev/null +++ b/src/math/x86/sqrtf.c @@ -0,0 +1,12 @@ +#include + +float sqrtf(float x) +{ + long double t; + /* The long double result has sufficient precision so that + * second rounding to float still keeps the returned value + * correctly rounded, see Pierre Roux, "Innocuous Double + * Rounding of Basic Arithmetic Operations". */ + __asm__ ("fsqrt" : "=t"(t) : "0"(x)); + return (float)t; +} diff --git a/src/math/x86/sqrtl.c b/src/math/x86/sqrtl.c new file mode 100644 index 0000000..864cfcc --- /dev/null +++ b/src/math/x86/sqrtl.c @@ -0,0 +1,7 @@ +#include + +long double sqrtl(long double x) +{ + __asm__ ("fsqrt" : "+t"(x)); + return x; +} diff --git a/src/math/x86/trunc.s b/src/math/x86/trunc.s new file mode 100644 index 0000000..bc29f15 --- /dev/null +++ b/src/math/x86/trunc.s @@ -0,0 +1 @@ +# see floor.s diff --git a/src/math/x86/truncf.s b/src/math/x86/truncf.s new file mode 100644 index 0000000..bc29f15 --- /dev/null +++ b/src/math/x86/truncf.s @@ -0,0 +1 @@ +# see floor.s diff --git a/src/math/x86/truncl.s b/src/math/x86/truncl.s new file mode 100644 index 0000000..bc29f15 --- /dev/null +++ b/src/math/x86/truncl.s @@ -0,0 +1 @@ +# see floor.s -- Gitee