From 516aa02104c3344903bdda078b7c87f71f94938d Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Wed, 26 Mar 2025 11:07:52 +1100 Subject: [PATCH] py/objint_longlong: Add arithmetic overflow checks. Long long big integer support now raises an exception on overflow rather than returning an undefined result. Also adds an error when shifting by a negative value. The new arithmetic checks are added in the misc.h header. This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton --- py/misc.h | 105 ++++++++++++++++++++++++++++-- py/objint_longlong.c | 51 +++++++++++---- tests/basics/int_64_basics.py | 13 +++- tests/extmod/uctypes_addressof.py | 7 +- 4 files changed, 154 insertions(+), 22 deletions(-) diff --git a/py/misc.h b/py/misc.h index 5d0893bbdd..e034485838 100644 --- a/py/misc.h +++ b/py/misc.h @@ -33,10 +33,15 @@ #include #include #include +#include typedef unsigned char byte; typedef unsigned int uint; +#ifndef __has_builtin +#define __has_builtin(x) (0) +#endif + /** generic ops *************************************************/ #ifndef MIN @@ -374,26 +379,23 @@ static inline bool mp_check(bool value) { static inline uint32_t mp_popcount(uint32_t x) { return __popcnt(x); } -#else +#else // _MSC_VER #define mp_clz(x) __builtin_clz(x) #define mp_clzl(x) __builtin_clzl(x) #define mp_clzll(x) __builtin_clzll(x) #define mp_ctz(x) __builtin_ctz(x) #define mp_check(x) (x) -#if defined __has_builtin #if __has_builtin(__builtin_popcount) #define mp_popcount(x) __builtin_popcount(x) -#endif -#endif -#if !defined(mp_popcount) +#else static inline uint32_t mp_popcount(uint32_t x) { x = x - ((x >> 1) & 0x55555555); x = (x & 0x33333333) + ((x >> 2) & 0x33333333); x = (x + (x >> 4)) & 0x0F0F0F0F; return (x * 0x01010101) >> 24; } -#endif -#endif +#endif // __has_builtin(__builtin_popcount) +#endif // _MSC_VER #define MP_FIT_UNSIGNED(bits, value) (((value) & (~0U << (bits))) == 0) #define MP_FIT_SIGNED(bits, value) \ @@ -426,4 +428,93 @@ static inline uint32_t mp_clz_mpi(mp_int_t x) { #endif } +// Overflow-checked operations for long long + +// Integer overflow builtins were added to GCC 5, but __has_builtin only in GCC 10 +// +// Note that the builtins has a defined result when overflow occurs, whereas the custom +// functions below don't update the result if an overflow would occur (to avoid UB). +#define MP_GCC_HAS_BUILTIN_OVERFLOW (__GNUC__ >= 5) + +#if __has_builtin(__builtin_umulll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW +#define mp_mul_ull_overflow __builtin_umulll_overflow +#else +inline static bool mp_mul_ull_overflow(unsigned long long int x, unsigned long long int y, unsigned long long int *res) { + if (y > 0 && x > (ULLONG_MAX / y)) { + return true; // overflow + } + *res = x * y; + return false; +} +#endif + +#if __has_builtin(__builtin_smulll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW +#define mp_mul_ll_overflow __builtin_smulll_overflow +#else +inline static bool mp_mul_ll_overflow(long long int x, long long int y, long long int *res) { + bool overflow; + + // Check for multiply overflow; see CERT INT32-C + if (x > 0) { // x is positive + if (y > 0) { // x and y are positive + overflow = (x > (LLONG_MAX / y)); + } else { // x positive, y nonpositive + overflow = (y < (LLONG_MIN / x)); + } // x positive, y nonpositive + } else { // x is nonpositive + if (y > 0) { // x is nonpositive, y is positive + overflow = (x < (LLONG_MIN / y)); + } else { // x and y are nonpositive + overflow = (x != 0 && y < (LLONG_MAX / x)); + } // End if x and y are nonpositive + } // End if x is nonpositive + + if (!overflow) { + *res = x * y; + } + + return overflow; +} +#endif + +#if __has_builtin(__builtin_saddll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW +#define mp_add_ll_overflow __builtin_saddll_overflow +#else +inline static bool mp_add_ll_overflow(long long int lhs, long long int rhs, long long int *res) { + bool overflow; + + if (rhs > 0) { + overflow = (lhs > LLONG_MAX - rhs); + } else { + overflow = (lhs < LLONG_MIN - rhs); + } + + if (!overflow) { + *res = lhs + rhs; + } + + return overflow; +} +#endif + +#if __has_builtin(__builtin_ssubll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW +#define mp_sub_ll_overflow __builtin_ssubll_overflow +#else +inline static bool mp_sub_ll_overflow(long long int lhs, long long int rhs, long long int *res) { + bool overflow; + + if (rhs > 0) { + overflow = (lhs < LLONG_MIN + rhs); + } else { + overflow = (lhs > LLONG_MAX + rhs); + } + + if (!overflow) { + *res = lhs - rhs; + } + + return overflow; +} +#endif + #endif // MICROPY_INCLUDED_PY_MISC_H diff --git a/py/objint_longlong.c b/py/objint_longlong.c index 5b60eb65ad..db09503215 100644 --- a/py/objint_longlong.c +++ b/py/objint_longlong.c @@ -31,6 +31,7 @@ #include "py/smallint.h" #include "py/objint.h" #include "py/runtime.h" +#include "py/misc.h" #if MICROPY_PY_BUILTINS_FLOAT #include @@ -43,6 +44,10 @@ const mp_obj_int_t mp_sys_maxsize_obj = {{&mp_type_int}, MP_SSIZE_MAX}; #endif +static void raise_long_long_overflow(void) { + mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("result overflows long long storage")); +} + mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf) { int delta = 1; if (!big_endian) { @@ -120,7 +125,6 @@ mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in) { // small int if the value fits without truncation case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT((mp_int_t)o->val); - case MP_UNARY_OP_POSITIVE: return o_in; case MP_UNARY_OP_NEGATIVE: @@ -147,6 +151,8 @@ mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in) { mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) { long long lhs_val; long long rhs_val; + bool overflow = false; + long long result; if (mp_obj_is_small_int(lhs_in)) { lhs_val = MP_OBJ_SMALL_INT_VALUE(lhs_in); @@ -167,13 +173,16 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i switch (op) { case MP_BINARY_OP_ADD: case MP_BINARY_OP_INPLACE_ADD: - return mp_obj_new_int_from_ll(lhs_val + rhs_val); + overflow = mp_add_ll_overflow(lhs_val, rhs_val, &result); + break; case MP_BINARY_OP_SUBTRACT: case MP_BINARY_OP_INPLACE_SUBTRACT: - return mp_obj_new_int_from_ll(lhs_val - rhs_val); + overflow = mp_sub_ll_overflow(lhs_val, rhs_val, &result); + break; case MP_BINARY_OP_MULTIPLY: case MP_BINARY_OP_INPLACE_MULTIPLY: - return mp_obj_new_int_from_ll(lhs_val * rhs_val); + overflow = mp_mul_ll_overflow(lhs_val, rhs_val, &result); + break; case MP_BINARY_OP_FLOOR_DIVIDE: case MP_BINARY_OP_INPLACE_FLOOR_DIVIDE: if (rhs_val == 0) { @@ -199,9 +208,21 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i case MP_BINARY_OP_LSHIFT: case MP_BINARY_OP_INPLACE_LSHIFT: - return mp_obj_new_int_from_ll(lhs_val << (int)rhs_val); + if ((int)rhs_val < 0) { + // negative shift not allowed + mp_raise_ValueError(MP_ERROR_TEXT("negative shift count")); + } + result = lhs_val << (int)rhs_val; + // Left-shifting of negative values is implementation defined in C, but assume compiler + // will give us typical 2s complement behaviour unless the value overflows + overflow = rhs_val > 0 && ((lhs_val >= 0 && result < lhs_val) || (lhs_val < 0 && result > lhs_val)); + break; case MP_BINARY_OP_RSHIFT: case MP_BINARY_OP_INPLACE_RSHIFT: + if ((int)rhs_val < 0) { + // negative shift not allowed + mp_raise_ValueError(MP_ERROR_TEXT("negative shift count")); + } return mp_obj_new_int_from_ll(lhs_val >> (int)rhs_val); case MP_BINARY_OP_POWER: @@ -213,18 +234,18 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i mp_raise_ValueError(MP_ERROR_TEXT("negative power with no float support")); #endif } - long long ans = 1; - while (rhs_val > 0) { + result = 1; + while (rhs_val > 0 && !overflow) { if (rhs_val & 1) { - ans *= lhs_val; + overflow = mp_mul_ll_overflow(result, lhs_val, &result); } - if (rhs_val == 1) { + if (rhs_val == 1 || overflow) { break; } rhs_val /= 2; - lhs_val *= lhs_val; + overflow = mp_mul_ll_overflow(lhs_val, lhs_val, &lhs_val); } - return mp_obj_new_int_from_ll(ans); + break; } case MP_BINARY_OP_LESS: @@ -242,6 +263,12 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i return MP_OBJ_NULL; // op not supported } + if (overflow) { + raise_long_long_overflow(); + } + + return mp_obj_new_int_from_ll(result); + zero_division: mp_raise_msg(&mp_type_ZeroDivisionError, MP_ERROR_TEXT("divide by zero")); } @@ -267,7 +294,7 @@ mp_obj_t mp_obj_new_int_from_ll(long long val) { mp_obj_t mp_obj_new_int_from_ull(unsigned long long val) { // TODO raise an exception if the unsigned long long won't fit if (val >> (sizeof(unsigned long long) * 8 - 1) != 0) { - mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("ulonglong too large")); + raise_long_long_overflow(); } return mp_obj_new_int_from_ll(val); } diff --git a/tests/basics/int_64_basics.py b/tests/basics/int_64_basics.py index 73a06b64b1..289ea49b65 100644 --- a/tests/basics/int_64_basics.py +++ b/tests/basics/int_64_basics.py @@ -117,10 +117,21 @@ x = -4611686018427387904 # big # sys.maxsize is a constant bigint, so test it's compatible with dynamic ones import sys if hasattr(sys, "maxsize"): - print(sys.maxsize + 1 - 1 == sys.maxsize) + print(sys.maxsize - 1 + 1 == sys.maxsize) else: print(True) # No maxsize property in this config # test extraction of big int value via mp_obj_get_int_maybe x = 1 << 62 print('a' * (x + 4 - x)) + +# negative shifts are invalid +try: + print((1 << 48) >> -4) +except ValueError as e: + print(e) + +try: + print((1 << 48) << -6) +except ValueError as e: + print(e) diff --git a/tests/extmod/uctypes_addressof.py b/tests/extmod/uctypes_addressof.py index c83089d0f7..213fcc05ee 100644 --- a/tests/extmod/uctypes_addressof.py +++ b/tests/extmod/uctypes_addressof.py @@ -12,5 +12,8 @@ for i in range(8): print(uctypes.addressof(uctypes.bytearray_at(1 << i, 8))) # Test address that is bigger than the greatest small-int but still within the address range. -large_addr = maxsize + 1 -print(uctypes.addressof(uctypes.bytearray_at(large_addr, 8)) == large_addr) +try: + large_addr = maxsize + 1 + print(uctypes.addressof(uctypes.bytearray_at(large_addr, 8)) == large_addr) +except OverflowError: + print(True) # systems with 64-bit bigints will overflow on the above operation