diff -uNrp gcc.old/gcc/config/arm/elf.h gcc-3.3.2/gcc/config/arm/elf.h --- gcc.old/gcc/config/arm/elf.h 2002-11-21 16:29:24.000000000 -0500 +++ gcc-3.3.2/gcc/config/arm/elf.h 2004-02-03 14:21:54.000000000 -0500 @@ -46,7 +46,7 @@ Boston, MA 02111-1307, USA. */ #ifndef SUBTARGET_ASM_FLOAT_SPEC #define SUBTARGET_ASM_FLOAT_SPEC "\ -%{mapcs-float:-mfloat} %{msoft-float:-mno-fpu}" +%{mapcs-float:-mfloat} %{mhard-float:-mfpu=fpa} %{!mhard-float: %{msoft-float:-mfpu=softfpa} %{!msoft-float:-mfpu=softvfp}}" #endif #ifndef ASM_SPEC diff -uNrp gcc.old/gcc/config/arm/ieee754-df.S gcc-3.3.2/gcc/config/arm/ieee754-df.S --- gcc.old/gcc/config/arm/ieee754-df.S 1969-12-31 19:00:00.000000000 -0500 +++ gcc-3.3.2/gcc/config/arm/ieee754-df.S 2004-02-03 14:21:54.000000000 -0500 @@ -0,0 +1,1224 @@ +/* ieee754-df.S double-precision floating point support for ARM + + Copyright (C) 2003 Free Software Foundation, Inc. + Contributed by Nicolas Pitre (nico@cam.org) + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + In addition to the permissions in the GNU General Public License, the + Free Software Foundation gives you unlimited permission to link the + compiled version of this file into combinations with other programs, + and to distribute those combinations without any restriction coming + from the use of this file. (The General Public License restrictions + do apply in other respects; for example, they cover modification of + the file, and distribution when not linked into a combine + executable.) + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* + * Notes: + * + * The goal of this code is to be as fast as possible. This is + * not meant to be easy to understand for the casual reader. + * For slightly simpler code please see the single precision version + * of this file. + * + * Only the default rounding mode is intended for best performances. + * Exceptions aren't supported yet, but that can be added quite easily + * if necessary without impacting performances. + */ + + +@ For FPA, float words are always big-endian. +@ For VFP, floats words follow the memory system mode. +#if defined(__VFP_FP__) && !defined(__ARMEB__) +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#else +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#endif + + +#ifdef L_negdf2 + +ARM_FUNC_START negdf2 + @ flip sign bit + eor xh, xh, #0x80000000 + RET + + FUNC_END negdf2 + +#endif + +#ifdef L_addsubdf3 + +ARM_FUNC_START subdf3 + @ flip sign bit of second arg + eor yh, yh, #0x80000000 +#if defined(__thumb__) && !defined(__THUMB_INTERWORK__) + b 1f @ Skip Thumb-code prologue +#endif + +ARM_FUNC_START adddf3 + +1: @ Compare both args, return zero if equal but the sign. + teq xl, yl + eoreq ip, xh, yh + teqeq ip, #0x80000000 + beq LSYM(Lad_z) + + @ If first arg is 0 or -0, return second arg. + @ If second arg is 0 or -0, return first arg. + orrs ip, xl, xh, lsl #1 + moveq xl, yl + moveq xh, yh + orrnes ip, yl, yh, lsl #1 + RETc(eq) + + stmfd sp!, {r4, r5, lr} + + @ Mask out exponents. + mov ip, #0x7f000000 + orr ip, ip, #0x00f00000 + and r4, xh, ip + and r5, yh, ip + + @ If either of them is 0x7ff, result will be INF or NAN + teq r4, ip + teqne r5, ip + beq LSYM(Lad_i) + + @ Compute exponent difference. Make largest exponent in r4, + @ corresponding arg in xh-xl, and positive exponent difference in r5. + subs r5, r5, r4 + rsblt r5, r5, #0 + ble 1f + add r4, r4, r5 + eor yl, xl, yl + eor yh, xh, yh + eor xl, yl, xl + eor xh, yh, xh + eor yl, xl, yl + eor yh, xh, yh +1: + + @ If exponent difference is too large, return largest argument + @ already in xh-xl. We need up to 54 bit to handle proper rounding + @ of 0x1p54 - 1.1. + cmp r5, #(54 << 20) + RETLDM "r4, r5" hi + + @ Convert mantissa to signed integer. + tst xh, #0x80000000 + bic xh, xh, ip, lsl #1 + orr xh, xh, #0x00100000 + beq 1f + rsbs xl, xl, #0 + rsc xh, xh, #0 +1: + tst yh, #0x80000000 + bic yh, yh, ip, lsl #1 + orr yh, yh, #0x00100000 + beq 1f + rsbs yl, yl, #0 + rsc yh, yh, #0 +1: + @ If exponent == difference, one or both args were denormalized. + @ Since this is not common case, rescale them off line. + teq r4, r5 + beq LSYM(Lad_d) +LSYM(Lad_x): + @ Scale down second arg with exponent difference. + @ Apply shift one bit left to first arg and the rest to second arg + @ to simplify things later, but only if exponent does not become 0. + mov ip, #0 + movs r5, r5, lsr #20 + beq 3f + teq r4, #(1 << 20) + beq 1f + movs xl, xl, lsl #1 + adc xh, ip, xh, lsl #1 + sub r4, r4, #(1 << 20) + subs r5, r5, #1 + beq 3f + + @ Shift yh-yl right per r5, keep leftover bits into ip. +1: rsbs lr, r5, #32 + blt 2f + mov ip, yl, lsl lr + mov yl, yl, lsr r5 + orr yl, yl, yh, lsl lr + mov yh, yh, asr r5 + b 3f +2: sub r5, r5, #32 + add lr, lr, #32 + cmp yl, #1 + adc ip, ip, yh, lsl lr + mov yl, yh, asr r5 + mov yh, yh, asr #32 +3: + @ the actual addition + adds xl, xl, yl + adc xh, xh, yh + + @ We now have a result in xh-xl-ip. + @ Keep absolute value in xh-xl-ip, sign in r5. + ands r5, xh, #0x80000000 + bpl LSYM(Lad_p) + rsbs ip, ip, #0 + rscs xl, xl, #0 + rsc xh, xh, #0 + + @ Determine how to normalize the result. +LSYM(Lad_p): + cmp xh, #0x00100000 + bcc LSYM(Lad_l) + cmp xh, #0x00200000 + bcc LSYM(Lad_r0) + cmp xh, #0x00400000 + bcc LSYM(Lad_r1) + + @ Result needs to be shifted right. + movs xh, xh, lsr #1 + movs xl, xl, rrx + movs ip, ip, rrx + orrcs ip, ip, #1 + add r4, r4, #(1 << 20) +LSYM(Lad_r1): + movs xh, xh, lsr #1 + movs xl, xl, rrx + movs ip, ip, rrx + orrcs ip, ip, #1 + add r4, r4, #(1 << 20) + + @ Our result is now properly aligned into xh-xl, remaining bits in ip. + @ Round with MSB of ip. If halfway between two numbers, round towards + @ LSB of xl = 0. +LSYM(Lad_r0): + adds xl, xl, ip, lsr #31 + adc xh, xh, #0 + teq ip, #0x80000000 + biceq xl, xl, #1 + + @ One extreme rounding case may add a new MSB. Adjust exponent. + @ That MSB will be cleared when exponent is merged below. + tst xh, #0x00200000 + addne r4, r4, #(1 << 20) + + @ Make sure we did not bust our exponent. + adds ip, r4, #(1 << 20) + bmi LSYM(Lad_o) + + @ Pack final result together. +LSYM(Lad_e): + bic xh, xh, #0x00300000 + orr xh, xh, r4 + orr xh, xh, r5 + RETLDM "r4, r5" + +LSYM(Lad_l): + @ Result must be shifted left and exponent adjusted. + @ No rounding necessary since ip will always be 0. +#if __ARM_ARCH__ < 5 + + teq xh, #0 + movne r3, #-11 + moveq r3, #21 + moveq xh, xl + moveq xl, #0 + mov r2, xh + movs ip, xh, lsr #16 + moveq r2, r2, lsl #16 + addeq r3, r3, #16 + tst r2, #0xff000000 + moveq r2, r2, lsl #8 + addeq r3, r3, #8 + tst r2, #0xf0000000 + moveq r2, r2, lsl #4 + addeq r3, r3, #4 + tst r2, #0xc0000000 + moveq r2, r2, lsl #2 + addeq r3, r3, #2 + tst r2, #0x80000000 + addeq r3, r3, #1 + +#else + + teq xh, #0 + moveq xh, xl + moveq xl, #0 + clz r3, xh + addeq r3, r3, #32 + sub r3, r3, #11 + +#endif + + @ determine how to shift the value. + subs r2, r3, #32 + bge 2f + adds r2, r2, #12 + ble 1f + + @ shift value left 21 to 31 bits, or actually right 11 to 1 bits + @ since a register switch happened above. + add ip, r2, #20 + rsb r2, r2, #12 + mov xl, xh, lsl ip + mov xh, xh, lsr r2 + b 3f + + @ actually shift value left 1 to 20 bits, which might also represent + @ 32 to 52 bits if counting the register switch that happened earlier. +1: add r2, r2, #20 +2: rsble ip, r2, #32 + mov xh, xh, lsl r2 + orrle xh, xh, xl, lsr ip + movle xl, xl, lsl r2 + + @ adjust exponent accordingly. +3: subs r4, r4, r3, lsl #20 + bgt LSYM(Lad_e) + + @ Exponent too small, denormalize result. + @ Find out proper shift value. + mvn r4, r4, asr #20 + subs r4, r4, #30 + bge 2f + adds r4, r4, #12 + bgt 1f + + @ shift result right of 1 to 20 bits, sign is in r5. + add r4, r4, #20 + rsb r2, r4, #32 + mov xl, xl, lsr r4 + orr xl, xl, xh, lsl r2 + orr xh, r5, xh, lsr r4 + RETLDM "r4, r5" + + @ shift result right of 21 to 31 bits, or left 11 to 1 bits after + @ a register switch from xh to xl. +1: rsb r4, r4, #12 + rsb r2, r4, #32 + mov xl, xl, lsr r2 + orr xl, xl, xh, lsl r4 + mov xh, r5 + RETLDM "r4, r5" + + @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch + @ from xh to xl. +2: mov xl, xh, lsr r4 + mov xh, r5 + RETLDM "r4, r5" + + @ Adjust exponents for denormalized arguments. +LSYM(Lad_d): + teq r4, #0 + eoreq xh, xh, #0x00100000 + addeq r4, r4, #(1 << 20) + eor yh, yh, #0x00100000 + subne r5, r5, #(1 << 20) + b LSYM(Lad_x) + + @ Result is x - x = 0, unless x = INF or NAN. +LSYM(Lad_z): + sub ip, ip, #0x00100000 @ ip becomes 0x7ff00000 + and r2, xh, ip + teq r2, ip + orreq xh, ip, #0x00080000 + movne xh, #0 + mov xl, #0 + RET + + @ Overflow: return INF. +LSYM(Lad_o): + orr xh, r5, #0x7f000000 + orr xh, xh, #0x00f00000 + mov xl, #0 + RETLDM "r4, r5" + + @ At least one of x or y is INF/NAN. + @ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN) + @ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN) + @ if either is NAN: return NAN + @ if opposite sign: return NAN + @ return xh-xl (which is INF or -INF) +LSYM(Lad_i): + teq r4, ip + movne xh, yh + movne xl, yl + teqeq r5, ip + RETLDM "r4, r5" ne + + orrs r4, xl, xh, lsl #12 + orreqs r4, yl, yh, lsl #12 + teqeq xh, yh + orrne xh, r5, #0x00080000 + movne xl, #0 + RETLDM "r4, r5" + + FUNC_END subdf3 + FUNC_END adddf3 + +ARM_FUNC_START floatunsidf + teq r0, #0 + moveq r1, #0 + RETc(eq) + stmfd sp!, {r4, r5, lr} + mov r4, #(0x400 << 20) @ initial exponent + add r4, r4, #((52-1) << 20) + mov r5, #0 @ sign bit is 0 + mov xl, r0 + mov xh, #0 + b LSYM(Lad_l) + + FUNC_END floatunsidf + +ARM_FUNC_START floatsidf + teq r0, #0 + moveq r1, #0 + RETc(eq) + stmfd sp!, {r4, r5, lr} + mov r4, #(0x400 << 20) @ initial exponent + add r4, r4, #((52-1) << 20) + ands r5, r0, #0x80000000 @ sign bit in r5 + rsbmi r0, r0, #0 @ absolute value + mov xl, r0 + mov xh, #0 + b LSYM(Lad_l) + + FUNC_END floatsidf + +ARM_FUNC_START extendsfdf2 + movs r2, r0, lsl #1 + beq 1f @ value is 0.0 or -0.0 + mov xh, r2, asr #3 @ stretch exponent + mov xh, xh, rrx @ retrieve sign bit + mov xl, r2, lsl #28 @ retrieve remaining bits + ands r2, r2, #0xff000000 @ isolate exponent + beq 2f @ exponent was 0 but not mantissa + teq r2, #0xff000000 @ check if INF or NAN + eorne xh, xh, #0x38000000 @ fixup exponent otherwise. + RET + +1: mov xh, r0 + mov xl, #0 + RET + +2: @ value was denormalized. We can normalize it now. + stmfd sp!, {r4, r5, lr} + mov r4, #(0x380 << 20) @ setup corresponding exponent + add r4, r4, #(1 << 20) + and r5, xh, #0x80000000 @ move sign bit in r5 + bic xh, xh, #0x80000000 + b LSYM(Lad_l) + + FUNC_END extendsfdf2 + +#endif /* L_addsubdf3 */ + +#ifdef L_muldivdf3 + +ARM_FUNC_START muldf3 + + stmfd sp!, {r4, r5, r6, lr} + + @ Mask out exponents. + mov ip, #0x7f000000 + orr ip, ip, #0x00f00000 + and r4, xh, ip + and r5, yh, ip + + @ Trap any INF/NAN. + teq r4, ip + teqne r5, ip + beq LSYM(Lml_s) + + @ Trap any multiplication by 0. + orrs r6, xl, xh, lsl #1 + orrnes r6, yl, yh, lsl #1 + beq LSYM(Lml_z) + + @ Shift exponents right one bit to make room for overflow bit. + @ If either of them is 0, scale denormalized arguments off line. + @ Then add both exponents together. + movs r4, r4, lsr #1 + teqne r5, #0 + beq LSYM(Lml_d) +LSYM(Lml_x): + add r4, r4, r5, asr #1 + + @ Preserve final sign in r4 along with exponent for now. + teq xh, yh + orrmi r4, r4, #0x8000 + + @ Convert mantissa to unsigned integer. + bic xh, xh, ip, lsl #1 + bic yh, yh, ip, lsl #1 + orr xh, xh, #0x00100000 + orr yh, yh, #0x00100000 + +#if __ARM_ARCH__ < 4 + + @ Well, no way to make it shorter without the umull instruction. + @ We must perform that 53 x 53 bit multiplication by hand. + stmfd sp!, {r7, r8, r9, sl, fp} + mov r7, xl, lsr #16 + mov r8, yl, lsr #16 + mov r9, xh, lsr #16 + mov sl, yh, lsr #16 + bic xl, xl, r7, lsl #16 + bic yl, yl, r8, lsl #16 + bic xh, xh, r9, lsl #16 + bic yh, yh, sl, lsl #16 + mul ip, xl, yl + mul fp, xl, r8 + mov lr, #0 + adds ip, ip, fp, lsl #16 + adc lr, lr, fp, lsr #16 + mul fp, r7, yl + adds ip, ip, fp, lsl #16 + adc lr, lr, fp, lsr #16 + mul fp, xl, sl + mov r5, #0 + adds lr, lr, fp, lsl #16 + adc r5, r5, fp, lsr #16 + mul fp, r7, yh + adds lr, lr, fp, lsl #16 + adc r5, r5, fp, lsr #16 + mul fp, xh, r8 + adds lr, lr, fp, lsl #16 + adc r5, r5, fp, lsr #16 + mul fp, r9, yl + adds lr, lr, fp, lsl #16 + adc r5, r5, fp, lsr #16 + mul fp, xh, sl + mul r6, r9, sl + adds r5, r5, fp, lsl #16 + adc r6, r6, fp, lsr #16 + mul fp, r9, yh + adds r5, r5, fp, lsl #16 + adc r6, r6, fp, lsr #16 + mul fp, xl, yh + adds lr, lr, fp + mul fp, r7, sl + adcs r5, r5, fp + mul fp, xh, yl + adc r6, r6, #0 + adds lr, lr, fp + mul fp, r9, r8 + adcs r5, r5, fp + mul fp, r7, r8 + adc r6, r6, #0 + adds lr, lr, fp + mul fp, xh, yh + adcs r5, r5, fp + adc r6, r6, #0 + ldmfd sp!, {r7, r8, r9, sl, fp} + +#else + + @ Here is the actual multiplication: 53 bits * 53 bits -> 106 bits. + umull ip, lr, xl, yl + mov r5, #0 + umlal lr, r5, xl, yh + umlal lr, r5, xh, yl + mov r6, #0 + umlal r5, r6, xh, yh + +#endif + + @ The LSBs in ip are only significant for the final rounding. + @ Fold them into one bit of lr. + teq ip, #0 + orrne lr, lr, #1 + + @ Put final sign in xh. + mov xh, r4, lsl #16 + bic r4, r4, #0x8000 + + @ Adjust result if one extra MSB appeared (one of four times). + tst r6, #(1 << 9) + beq 1f + add r4, r4, #(1 << 19) + movs r6, r6, lsr #1 + movs r5, r5, rrx + movs lr, lr, rrx + orrcs lr, lr, #1 +1: + @ Scale back to 53 bits. + @ xh contains sign bit already. + orr xh, xh, r6, lsl #12 + orr xh, xh, r5, lsr #20 + mov xl, r5, lsl #12 + orr xl, xl, lr, lsr #20 + + @ Apply exponent bias, check range for underflow. + sub r4, r4, #0x00f80000 + subs r4, r4, #0x1f000000 + ble LSYM(Lml_u) + + @ Round the result. + movs lr, lr, lsl #12 + bpl 1f + adds xl, xl, #1 + adc xh, xh, #0 + teq lr, #0x80000000 + biceq xl, xl, #1 + + @ Rounding may have produced an extra MSB here. + @ The extra bit is cleared before merging the exponent below. + tst xh, #0x00200000 + addne r4, r4, #(1 << 19) +1: + @ Check exponent for overflow. + adds ip, r4, #(1 << 19) + tst ip, #(1 << 30) + bne LSYM(Lml_o) + + @ Add final exponent. + bic xh, xh, #0x00300000 + orr xh, xh, r4, lsl #1 + RETLDM "r4, r5, r6" + + @ Result is 0, but determine sign anyway. +LSYM(Lml_z): + eor xh, xh, yh +LSYM(Ldv_z): + bic xh, xh, #0x7fffffff + mov xl, #0 + RETLDM "r4, r5, r6" + + @ Check if denormalized result is possible, otherwise return signed 0. +LSYM(Lml_u): + cmn r4, #(53 << 19) + movle xl, #0 + bicle xh, xh, #0x7fffffff + RETLDM "r4, r5, r6" le + + @ Find out proper shift value. +LSYM(Lml_r): + mvn r4, r4, asr #19 + subs r4, r4, #30 + bge 2f + adds r4, r4, #12 + bgt 1f + + @ shift result right of 1 to 20 bits, preserve sign bit, round, etc. + add r4, r4, #20 + rsb r5, r4, #32 + mov r3, xl, lsl r5 + mov xl, xl, lsr r4 + orr xl, xl, xh, lsl r5 + movs xh, xh, lsl #1 + mov xh, xh, lsr r4 + mov xh, xh, rrx + adds xl, xl, r3, lsr #31 + adc xh, xh, #0 + teq lr, #0 + teqeq r3, #0x80000000 + biceq xl, xl, #1 + RETLDM "r4, r5, r6" + + @ shift result right of 21 to 31 bits, or left 11 to 1 bits after + @ a register switch from xh to xl. Then round. +1: rsb r4, r4, #12 + rsb r5, r4, #32 + mov r3, xl, lsl r4 + mov xl, xl, lsr r5 + orr xl, xl, xh, lsl r4 + bic xh, xh, #0x7fffffff + adds xl, xl, r3, lsr #31 + adc xh, xh, #0 + teq lr, #0 + teqeq r3, #0x80000000 + biceq xl, xl, #1 + RETLDM "r4, r5, r6" + + @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch + @ from xh to xl. Leftover bits are in r3-r6-lr for rounding. +2: rsb r5, r4, #32 + mov r6, xl, lsl r5 + mov r3, xl, lsr r4 + orr r3, r3, xh, lsl r5 + mov xl, xh, lsr r4 + bic xh, xh, #0x7fffffff + adds xl, xl, r3, lsr #31 + adc xh, xh, #0 + orrs r6, r6, lr + teqeq r3, #0x80000000 + biceq xl, xl, #1 + RETLDM "r4, r5, r6" + + @ One or both arguments are denormalized. + @ Scale them leftwards and preserve sign bit. +LSYM(Lml_d): + mov lr, #0 + teq r4, #0 + bne 2f + and r6, xh, #0x80000000 +1: movs xl, xl, lsl #1 + adc xh, lr, xh, lsl #1 + tst xh, #0x00100000 + subeq r4, r4, #(1 << 19) + beq 1b + orr xh, xh, r6 + teq r5, #0 + bne LSYM(Lml_x) +2: and r6, yh, #0x80000000 +3: movs yl, yl, lsl #1 + adc yh, lr, yh, lsl #1 + tst yh, #0x00100000 + subeq r5, r5, #(1 << 20) + beq 3b + orr yh, yh, r6 + b LSYM(Lml_x) + + @ One or both args are INF or NAN. +LSYM(Lml_s): + orrs r6, xl, xh, lsl #1 + orrnes r6, yl, yh, lsl #1 + beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN + teq r4, ip + bne 1f + orrs r6, xl, xh, lsl #12 + bne LSYM(Lml_n) @ NAN * -> NAN +1: teq r5, ip + bne LSYM(Lml_i) + orrs r6, yl, yh, lsl #12 + bne LSYM(Lml_n) @ * NAN -> NAN + + @ Result is INF, but we need to determine its sign. +LSYM(Lml_i): + eor xh, xh, yh + + @ Overflow: return INF (sign already in xh). +LSYM(Lml_o): + and xh, xh, #0x80000000 + orr xh, xh, #0x7f000000 + orr xh, xh, #0x00f00000 + mov xl, #0 + RETLDM "r4, r5, r6" + + @ Return NAN. +LSYM(Lml_n): + mov xh, #0x7f000000 + orr xh, xh, #0x00f80000 + RETLDM "r4, r5, r6" + + FUNC_END muldf3 + +ARM_FUNC_START divdf3 + + stmfd sp!, {r4, r5, r6, lr} + + @ Mask out exponents. + mov ip, #0x7f000000 + orr ip, ip, #0x00f00000 + and r4, xh, ip + and r5, yh, ip + + @ Trap any INF/NAN or zeroes. + teq r4, ip + teqne r5, ip + orrnes r6, xl, xh, lsl #1 + orrnes r6, yl, yh, lsl #1 + beq LSYM(Ldv_s) + + @ Shift exponents right one bit to make room for overflow bit. + @ If either of them is 0, scale denormalized arguments off line. + @ Then substract divisor exponent from dividend''s. + movs r4, r4, lsr #1 + teqne r5, #0 + beq LSYM(Ldv_d) +LSYM(Ldv_x): + sub r4, r4, r5, asr #1 + + @ Preserve final sign into lr. + eor lr, xh, yh + + @ Convert mantissa to unsigned integer. + @ Dividend -> r5-r6, divisor -> yh-yl. + mov r5, #0x10000000 + mov yh, yh, lsl #12 + orr yh, r5, yh, lsr #4 + orr yh, yh, yl, lsr #24 + movs yl, yl, lsl #8 + mov xh, xh, lsl #12 + teqeq yh, r5 + beq LSYM(Ldv_1) + orr r5, r5, xh, lsr #4 + orr r5, r5, xl, lsr #24 + mov r6, xl, lsl #8 + + @ Initialize xh with final sign bit. + and xh, lr, #0x80000000 + + @ Ensure result will land to known bit position. + cmp r5, yh + cmpeq r6, yl + bcs 1f + sub r4, r4, #(1 << 19) + movs yh, yh, lsr #1 + mov yl, yl, rrx +1: + @ Apply exponent bias, check range for over/underflow. + add r4, r4, #0x1f000000 + add r4, r4, #0x00f80000 + cmn r4, #(53 << 19) + ble LSYM(Ldv_z) + cmp r4, ip, lsr #1 + bge LSYM(Lml_o) + + @ Perform first substraction to align result to a nibble. + subs r6, r6, yl + sbc r5, r5, yh + movs yh, yh, lsr #1 + mov yl, yl, rrx + mov xl, #0x00100000 + mov ip, #0x00080000 + + @ The actual division loop. +1: subs lr, r6, yl + sbcs lr, r5, yh + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip + movs yh, yh, lsr #1 + mov yl, yl, rrx + subs lr, r6, yl + sbcs lr, r5, yh + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip, lsr #1 + movs yh, yh, lsr #1 + mov yl, yl, rrx + subs lr, r6, yl + sbcs lr, r5, yh + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip, lsr #2 + movs yh, yh, lsr #1 + mov yl, yl, rrx + subs lr, r6, yl + sbcs lr, r5, yh + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip, lsr #3 + + orrs lr, r5, r6 + beq 2f + mov r5, r5, lsl #4 + orr r5, r5, r6, lsr #28 + mov r6, r6, lsl #4 + mov yh, yh, lsl #3 + orr yh, yh, yl, lsr #29 + mov yl, yl, lsl #3 + movs ip, ip, lsr #4 + bne 1b + + @ We are done with a word of the result. + @ Loop again for the low word if this pass was for the high word. + tst xh, #0x00100000 + bne 3f + orr xh, xh, xl + mov xl, #0 + mov ip, #0x80000000 + b 1b +2: + @ Be sure result starts in the high word. + tst xh, #0x00100000 + orreq xh, xh, xl + moveq xl, #0 +3: + @ Check if denormalized result is needed. + cmp r4, #0 + ble LSYM(Ldv_u) + + @ Apply proper rounding. + subs ip, r5, yh + subeqs ip, r6, yl + adcs xl, xl, #0 + adc xh, xh, #0 + teq ip, #0 + biceq xl, xl, #1 + + @ Add exponent to result. + bic xh, xh, #0x00100000 + orr xh, xh, r4, lsl #1 + RETLDM "r4, r5, r6" + + @ Division by 0x1p*: shortcut a lot of code. +LSYM(Ldv_1): + and lr, lr, #0x80000000 + orr xh, lr, xh, lsr #12 + add r4, r4, #0x1f000000 + add r4, r4, #0x00f80000 + cmp r4, ip, lsr #1 + bge LSYM(Lml_o) + cmp r4, #0 + orrgt xh, xh, r4, lsl #1 + RETLDM "r4, r5, r6" gt + + cmn r4, #(53 << 19) + ble LSYM(Ldv_z) + orr xh, xh, #0x00100000 + mov lr, #0 + b LSYM(Lml_r) + + @ Result must be denormalized: put remainder in lr for + @ rounding considerations. +LSYM(Ldv_u): + orr lr, r5, r6 + b LSYM(Lml_r) + + @ One or both arguments are denormalized. + @ Scale them leftwards and preserve sign bit. +LSYM(Ldv_d): + mov lr, #0 + teq r4, #0 + bne 2f + and r6, xh, #0x80000000 +1: movs xl, xl, lsl #1 + adc xh, lr, xh, lsl #1 + tst xh, #0x00100000 + subeq r4, r4, #(1 << 19) + beq 1b + orr xh, xh, r6 + teq r5, #0 + bne LSYM(Ldv_x) +2: and r6, yh, #0x80000000 +3: movs yl, yl, lsl #1 + adc yh, lr, yh, lsl #1 + tst yh, #0x00100000 + subeq r5, r5, #(1 << 20) + beq 3b + orr yh, yh, r6 + b LSYM(Ldv_x) + + @ One or both arguments is either INF, NAN or zero. +LSYM(Ldv_s): + teq r4, ip + teqeq r5, ip + beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN + teq r4, ip + bne 1f + orrs r4, xl, xh, lsl #12 + bne LSYM(Lml_n) @ NAN / -> NAN + b LSYM(Lml_i) @ INF / -> INF +1: teq r5, ip + bne 2f + orrs r5, yl, yh, lsl #12 + bne LSYM(Lml_n) @ / NAN -> NAN + b LSYM(Lml_z) @ / INF -> 0 +2: @ One or both arguments are 0. + orrs r4, xl, xh, lsl #1 + bne LSYM(Lml_i) @ / 0 -> INF + orrs r5, yl, yh, lsl #1 + bne LSYM(Lml_z) @ 0 / -> 0 + b LSYM(Lml_n) @ 0 / 0 -> NAN + + FUNC_END divdf3 + +#endif /* L_muldivdf3 */ + +#ifdef L_cmpdf2 + +FUNC_START gedf2 +ARM_FUNC_START gtdf2 + mov ip, #-1 + b 1f + +FUNC_START ledf2 +ARM_FUNC_START ltdf2 + mov ip, #1 + b 1f + +FUNC_START nedf2 +FUNC_START eqdf2 +ARM_FUNC_START cmpdf2 + mov ip, #1 @ how should we specify unordered here? + +1: stmfd sp!, {r4, r5, lr} + + @ Trap any INF/NAN first. + mov lr, #0x7f000000 + orr lr, lr, #0x00f00000 + and r4, xh, lr + and r5, yh, lr + teq r4, lr + teqne r5, lr + beq 3f + + @ Test for equality. + @ Note that 0.0 is equal to -0.0. +2: orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0 + orreqs ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0 + teqne xh, yh @ or xh == yh + teqeq xl, yl @ and xl == yl + moveq r0, #0 @ then equal. + RETLDM "r4, r5" eq + + @ Check for sign difference. + teq xh, yh + movmi r0, xh, asr #31 + orrmi r0, r0, #1 + RETLDM "r4, r5" mi + + @ Compare exponents. + cmp r4, r5 + + @ Compare mantissa if exponents are equal. + moveq xh, xh, lsl #12 + cmpeq xh, yh, lsl #12 + cmpeq xl, yl + movcs r0, yh, asr #31 + mvncc r0, yh, asr #31 + orr r0, r0, #1 + RETLDM "r4, r5" + + @ Look for a NAN. +3: teq r4, lr + bne 4f + orrs xl, xl, xh, lsl #12 + bne 5f @ x is NAN +4: teq r5, lr + bne 2b + orrs yl, yl, yh, lsl #12 + beq 2b @ y is not NAN +5: mov r0, ip @ return unordered code from ip + RETLDM "r4, r5" + + FUNC_END gedf2 + FUNC_END gtdf2 + FUNC_END ledf2 + FUNC_END ltdf2 + FUNC_END nedf2 + FUNC_END eqdf2 + FUNC_END cmpdf2 + +#endif /* L_cmpdf2 */ + +#ifdef L_unorddf2 + +ARM_FUNC_START unorddf2 + str lr, [sp, #-4]! + mov ip, #0x7f000000 + orr ip, ip, #0x00f00000 + and lr, xh, ip + teq lr, ip + bne 1f + orrs xl, xl, xh, lsl #12 + bne 3f @ x is NAN +1: and lr, yh, ip + teq lr, ip + bne 2f + orrs yl, yl, yh, lsl #12 + bne 3f @ y is NAN +2: mov r0, #0 @ arguments are ordered. + RETLDM + +3: mov r0, #1 @ arguments are unordered. + RETLDM + + FUNC_END unorddf2 + +#endif /* L_unorddf2 */ + +#ifdef L_fixdfsi + +ARM_FUNC_START fixdfsi + orrs ip, xl, xh, lsl #1 + beq 1f @ value is 0. + + mov r3, r3, rrx @ preserve C flag (the actual sign) + + @ check exponent range. + mov ip, #0x7f000000 + orr ip, ip, #0x00f00000 + and r2, xh, ip + teq r2, ip + beq 2f @ value is INF or NAN + bic ip, ip, #0x40000000 + cmp r2, ip + bcc 1f @ value is too small + add ip, ip, #(31 << 20) + cmp r2, ip + bcs 3f @ value is too large + + rsb r2, r2, ip + mov ip, xh, lsl #11 + orr ip, ip, #0x80000000 + orr ip, ip, xl, lsr #21 + mov r2, r2, lsr #20 + tst r3, #0x80000000 @ the sign bit + mov r0, ip, lsr r2 + rsbne r0, r0, #0 + RET + +1: mov r0, #0 + RET + +2: orrs xl, xl, xh, lsl #12 + bne 4f @ r0 is NAN. +3: ands r0, r3, #0x80000000 @ the sign bit + moveq r0, #0x7fffffff @ maximum signed positive si + RET + +4: mov r0, #0 @ How should we convert NAN? + RET + + FUNC_END fixdfsi + +#endif /* L_fixdfsi */ + +#ifdef L_fixunsdfsi + +ARM_FUNC_START fixunsdfsi + orrs ip, xl, xh, lsl #1 + movcss r0, #0 @ value is negative + RETc(eq) @ or 0 (xl, xh overlap r0) + + @ check exponent range. + mov ip, #0x7f000000 + orr ip, ip, #0x00f00000 + and r2, xh, ip + teq r2, ip + beq 2f @ value is INF or NAN + bic ip, ip, #0x40000000 + cmp r2, ip + bcc 1f @ value is too small + add ip, ip, #(31 << 20) + cmp r2, ip + bhi 3f @ value is too large + + rsb r2, r2, ip + mov ip, xh, lsl #11 + orr ip, ip, #0x80000000 + orr ip, ip, xl, lsr #21 + mov r2, r2, lsr #20 + mov r0, ip, lsr r2 + RET + +1: mov r0, #0 + RET + +2: orrs xl, xl, xh, lsl #12 + bne 4f @ value is NAN. +3: mov r0, #0xffffffff @ maximum unsigned si + RET + +4: mov r0, #0 @ How should we convert NAN? + RET + + FUNC_END fixunsdfsi + +#endif /* L_fixunsdfsi */ + +#ifdef L_truncdfsf2 + +ARM_FUNC_START truncdfsf2 + orrs r2, xl, xh, lsl #1 + moveq r0, r2, rrx + RETc(eq) @ value is 0.0 or -0.0 + + @ check exponent range. + mov ip, #0x7f000000 + orr ip, ip, #0x00f00000 + and r2, ip, xh + teq r2, ip + beq 2f @ value is INF or NAN + bic xh, xh, ip + cmp r2, #(0x380 << 20) + bls 4f @ value is too small + + @ shift and round mantissa +1: movs r3, xl, lsr #29 + adc r3, r3, xh, lsl #3 + + @ if halfway between two numbers, round towards LSB = 0. + mov xl, xl, lsl #3 + teq xl, #0x80000000 + biceq r3, r3, #1 + + @ rounding might have created an extra MSB. If so adjust exponent. + tst r3, #0x00800000 + addne r2, r2, #(1 << 20) + bicne r3, r3, #0x00800000 + + @ check exponent for overflow + mov ip, #(0x400 << 20) + orr ip, ip, #(0x07f << 20) + cmp r2, ip + bcs 3f @ overflow + + @ adjust exponent, merge with sign bit and mantissa. + movs xh, xh, lsl #1 + mov r2, r2, lsl #4 + orr r0, r3, r2, rrx + eor r0, r0, #0x40000000 + RET + +2: @ chech for NAN + orrs xl, xl, xh, lsl #12 + movne r0, #0x7f000000 + orrne r0, r0, #0x00c00000 + RETc(ne) @ return NAN + +3: @ return INF with sign + and r0, xh, #0x80000000 + orr r0, r0, #0x7f000000 + orr r0, r0, #0x00800000 + RET + +4: @ check if denormalized value is possible + subs r2, r2, #((0x380 - 24) << 20) + andle r0, xh, #0x80000000 @ too small, return signed 0. + RETc(le) + + @ denormalize value so we can resume with the code above afterwards. + orr xh, xh, #0x00100000 + mov r2, r2, lsr #20 + rsb r2, r2, #25 + cmp r2, #20 + bgt 6f + + rsb ip, r2, #32 + mov r3, xl, lsl ip + mov xl, xl, lsr r2 + orr xl, xl, xh, lsl ip + movs xh, xh, lsl #1 + mov xh, xh, lsr r2 + mov xh, xh, rrx +5: teq r3, #0 @ fold r3 bits into the LSB + orrne xl, xl, #1 @ for rounding considerations. + mov r2, #(0x380 << 20) @ equivalent to the 0 float exponent + b 1b + +6: rsb r2, r2, #(12 + 20) + rsb ip, r2, #32 + mov r3, xl, lsl r2 + mov xl, xl, lsr ip + orr xl, xl, xh, lsl r2 + and xh, xh, #0x80000000 + b 5b + + FUNC_END truncdfsf2 + +#endif /* L_truncdfsf2 */ diff -uNrp gcc.old/gcc/config/arm/ieee754-sf.S gcc-3.3.2/gcc/config/arm/ieee754-sf.S --- gcc.old/gcc/config/arm/ieee754-sf.S 1969-12-31 19:00:00.000000000 -0500 +++ gcc-3.3.2/gcc/config/arm/ieee754-sf.S 2004-02-03 14:21:54.000000000 -0500 @@ -0,0 +1,815 @@ +/* ieee754-sf.S single-precision floating point support for ARM + + Copyright (C) 2003 Free Software Foundation, Inc. + Contributed by Nicolas Pitre (nico@cam.org) + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + In addition to the permissions in the GNU General Public License, the + Free Software Foundation gives you unlimited permission to link the + compiled version of this file into combinations with other programs, + and to distribute those combinations without any restriction coming + from the use of this file. (The General Public License restrictions + do apply in other respects; for example, they cover modification of + the file, and distribution when not linked into a combine + executable.) + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* + * Notes: + * + * The goal of this code is to be as fast as possible. This is + * not meant to be easy to understand for the casual reader. + * + * Only the default rounding mode is intended for best performances. + * Exceptions aren't supported yet, but that can be added quite easily + * if necessary without impacting performances. + */ + +#ifdef L_negsf2 + +ARM_FUNC_START negsf2 + eor r0, r0, #0x80000000 @ flip sign bit + RET + + FUNC_END negsf2 + +#endif + +#ifdef L_addsubsf3 + +ARM_FUNC_START subsf3 + eor r1, r1, #0x80000000 @ flip sign bit of second arg +#if defined(__thumb__) && !defined(__THUMB_INTERWORK__) + b 1f @ Skip Thumb-code prologue +#endif + +ARM_FUNC_START addsf3 + +1: @ Compare both args, return zero if equal but the sign. + eor r2, r0, r1 + teq r2, #0x80000000 + beq LSYM(Lad_z) + + @ If first arg is 0 or -0, return second arg. + @ If second arg is 0 or -0, return first arg. + bics r2, r0, #0x80000000 + moveq r0, r1 + bicnes r2, r1, #0x80000000 + RETc(eq) + + @ Mask out exponents. + mov ip, #0xff000000 + and r2, r0, ip, lsr #1 + and r3, r1, ip, lsr #1 + + @ If either of them is 255, result will be INF or NAN + teq r2, ip, lsr #1 + teqne r3, ip, lsr #1 + beq LSYM(Lad_i) + + @ Compute exponent difference. Make largest exponent in r2, + @ corresponding arg in r0, and positive exponent difference in r3. + subs r3, r3, r2 + addgt r2, r2, r3 + eorgt r1, r0, r1 + eorgt r0, r1, r0 + eorgt r1, r0, r1 + rsblt r3, r3, #0 + + @ If exponent difference is too large, return largest argument + @ already in r0. We need up to 25 bit to handle proper rounding + @ of 0x1p25 - 1.1. + cmp r3, #(25 << 23) + RETc(hi) + + @ Convert mantissa to signed integer. + tst r0, #0x80000000 + orr r0, r0, #0x00800000 + bic r0, r0, #0xff000000 + rsbne r0, r0, #0 + tst r1, #0x80000000 + orr r1, r1, #0x00800000 + bic r1, r1, #0xff000000 + rsbne r1, r1, #0 + + @ If exponent == difference, one or both args were denormalized. + @ Since this is not common case, rescale them off line. + teq r2, r3 + beq LSYM(Lad_d) +LSYM(Lad_x): + + @ Scale down second arg with exponent difference. + @ Apply shift one bit left to first arg and the rest to second arg + @ to simplify things later, but only if exponent does not become 0. + movs r3, r3, lsr #23 + teqne r2, #(1 << 23) + movne r0, r0, lsl #1 + subne r2, r2, #(1 << 23) + subne r3, r3, #1 + + @ Shift second arg into ip, keep leftover bits into r1. + mov ip, r1, asr r3 + rsb r3, r3, #32 + mov r1, r1, lsl r3 + + add r0, r0, ip @ the actual addition + + @ We now have a 64 bit result in r0-r1. + @ Keep absolute value in r0-r1, sign in r3. + ands r3, r0, #0x80000000 + bpl LSYM(Lad_p) + rsbs r1, r1, #0 + rsc r0, r0, #0 + + @ Determine how to normalize the result. +LSYM(Lad_p): + cmp r0, #0x00800000 + bcc LSYM(Lad_l) + cmp r0, #0x01000000 + bcc LSYM(Lad_r0) + cmp r0, #0x02000000 + bcc LSYM(Lad_r1) + + @ Result needs to be shifted right. + movs r0, r0, lsr #1 + mov r1, r1, rrx + add r2, r2, #(1 << 23) +LSYM(Lad_r1): + movs r0, r0, lsr #1 + mov r1, r1, rrx + add r2, r2, #(1 << 23) + + @ Our result is now properly aligned into r0, remaining bits in r1. + @ Round with MSB of r1. If halfway between two numbers, round towards + @ LSB of r0 = 0. +LSYM(Lad_r0): + add r0, r0, r1, lsr #31 + teq r1, #0x80000000 + biceq r0, r0, #1 + + @ Rounding may have added a new MSB. Adjust exponent. + @ That MSB will be cleared when exponent is merged below. + tst r0, #0x01000000 + addne r2, r2, #(1 << 23) + + @ Make sure we did not bust our exponent. + cmp r2, #(254 << 23) + bhi LSYM(Lad_o) + + @ Pack final result together. +LSYM(Lad_e): + bic r0, r0, #0x01800000 + orr r0, r0, r2 + orr r0, r0, r3 + RET + + @ Result must be shifted left. + @ No rounding necessary since r1 will always be 0. +LSYM(Lad_l): + +#if __ARM_ARCH__ < 5 + + movs ip, r0, lsr #12 + moveq r0, r0, lsl #12 + subeq r2, r2, #(12 << 23) + tst r0, #0x00ff0000 + moveq r0, r0, lsl #8 + subeq r2, r2, #(8 << 23) + tst r0, #0x00f00000 + moveq r0, r0, lsl #4 + subeq r2, r2, #(4 << 23) + tst r0, #0x00c00000 + moveq r0, r0, lsl #2 + subeq r2, r2, #(2 << 23) + tst r0, #0x00800000 + moveq r0, r0, lsl #1 + subeq r2, r2, #(1 << 23) + cmp r2, #0 + bgt LSYM(Lad_e) + +#else + + clz ip, r0 + sub ip, ip, #8 + mov r0, r0, lsl ip + subs r2, r2, ip, lsl #23 + bgt LSYM(Lad_e) + +#endif + + @ Exponent too small, denormalize result. + mvn r2, r2, asr #23 + add r2, r2, #2 + orr r0, r3, r0, lsr r2 + RET + + @ Fixup and adjust bit position for denormalized arguments. + @ Note that r2 must not remain equal to 0. +LSYM(Lad_d): + teq r2, #0 + eoreq r0, r0, #0x00800000 + addeq r2, r2, #(1 << 23) + eor r1, r1, #0x00800000 + subne r3, r3, #(1 << 23) + b LSYM(Lad_x) + + @ Result is x - x = 0, unless x is INF or NAN. +LSYM(Lad_z): + mov ip, #0xff000000 + and r2, r0, ip, lsr #1 + teq r2, ip, lsr #1 + moveq r0, ip, asr #2 + movne r0, #0 + RET + + @ Overflow: return INF. +LSYM(Lad_o): + orr r0, r3, #0x7f000000 + orr r0, r0, #0x00800000 + RET + + @ At least one of r0/r1 is INF/NAN. + @ if r0 != INF/NAN: return r1 (which is INF/NAN) + @ if r1 != INF/NAN: return r0 (which is INF/NAN) + @ if r0 or r1 is NAN: return NAN + @ if opposite sign: return NAN + @ return r0 (which is INF or -INF) +LSYM(Lad_i): + teq r2, ip, lsr #1 + movne r0, r1 + teqeq r3, ip, lsr #1 + RETc(ne) + movs r2, r0, lsl #9 + moveqs r2, r1, lsl #9 + teqeq r0, r1 + orrne r0, r3, #0x00400000 @ NAN + RET + + FUNC_END addsf3 + FUNC_END subsf3 + +ARM_FUNC_START floatunsisf + mov r3, #0 + b 1f + +ARM_FUNC_START floatsisf + ands r3, r0, #0x80000000 + rsbmi r0, r0, #0 + +1: teq r0, #0 + RETc(eq) + + mov r1, #0 + mov r2, #((127 + 23) << 23) + tst r0, #0xfc000000 + beq LSYM(Lad_p) + + @ We need to scale the value a little before branching to code above. + tst r0, #0xf0000000 + movne r1, r0, lsl #28 + movne r0, r0, lsr #4 + addne r2, r2, #(4 << 23) + tst r0, #0x0c000000 + beq LSYM(Lad_p) + mov r1, r1, lsr #2 + orr r1, r1, r0, lsl #30 + mov r0, r0, lsr #2 + add r2, r2, #(2 << 23) + b LSYM(Lad_p) + + FUNC_END floatsisf + FUNC_END floatunsisf + +#endif /* L_addsubsf3 */ + +#ifdef L_muldivsf3 + +ARM_FUNC_START mulsf3 + + @ Mask out exponents. + mov ip, #0xff000000 + and r2, r0, ip, lsr #1 + and r3, r1, ip, lsr #1 + + @ Trap any INF/NAN. + teq r2, ip, lsr #1 + teqne r3, ip, lsr #1 + beq LSYM(Lml_s) + + @ Trap any multiplication by 0. + bics ip, r0, #0x80000000 + bicnes ip, r1, #0x80000000 + beq LSYM(Lml_z) + + @ Shift exponents right one bit to make room for overflow bit. + @ If either of them is 0, scale denormalized arguments off line. + @ Then add both exponents together. + movs r2, r2, lsr #1 + teqne r3, #0 + beq LSYM(Lml_d) +LSYM(Lml_x): + add r2, r2, r3, asr #1 + + @ Preserve final sign in r2 along with exponent for now. + teq r0, r1 + orrmi r2, r2, #0x8000 + + @ Convert mantissa to unsigned integer. + bic r0, r0, #0xff000000 + bic r1, r1, #0xff000000 + orr r0, r0, #0x00800000 + orr r1, r1, #0x00800000 + +#if __ARM_ARCH__ < 4 + + @ Well, no way to make it shorter without the umull instruction. + @ We must perform that 24 x 24 -> 48 bit multiplication by hand. + stmfd sp!, {r4, r5} + mov r4, r0, lsr #16 + mov r5, r1, lsr #16 + bic r0, r0, #0x00ff0000 + bic r1, r1, #0x00ff0000 + mul ip, r4, r5 + mul r3, r0, r1 + mul r0, r5, r0 + mla r0, r4, r1, r0 + adds r3, r3, r0, lsl #16 + adc ip, ip, r0, lsr #16 + ldmfd sp!, {r4, r5} + +#else + + umull r3, ip, r0, r1 @ The actual multiplication. + +#endif + + @ Put final sign in r0. + mov r0, r2, lsl #16 + bic r2, r2, #0x8000 + + @ Adjust result if one extra MSB appeared. + @ The LSB may be lost but this never changes the result in this case. + tst ip, #(1 << 15) + addne r2, r2, #(1 << 22) + movnes ip, ip, lsr #1 + movne r3, r3, rrx + + @ Apply exponent bias, check range for underflow. + subs r2, r2, #(127 << 22) + ble LSYM(Lml_u) + + @ Scale back to 24 bits with rounding. + @ r0 contains sign bit already. + orrs r0, r0, r3, lsr #23 + adc r0, r0, ip, lsl #9 + + @ If halfway between two numbers, rounding should be towards LSB = 0. + mov r3, r3, lsl #9 + teq r3, #0x80000000 + biceq r0, r0, #1 + + @ Note: rounding may have produced an extra MSB here. + @ The extra bit is cleared before merging the exponent below. + tst r0, #0x01000000 + addne r2, r2, #(1 << 22) + + @ Check for exponent overflow + cmp r2, #(255 << 22) + bge LSYM(Lml_o) + + @ Add final exponent. + bic r0, r0, #0x01800000 + orr r0, r0, r2, lsl #1 + RET + + @ Result is 0, but determine sign anyway. +LSYM(Lml_z): eor r0, r0, r1 + bic r0, r0, #0x7fffffff + RET + + @ Check if denormalized result is possible, otherwise return signed 0. +LSYM(Lml_u): + cmn r2, #(24 << 22) + RETc(le) + + @ Find out proper shift value. + mvn r1, r2, asr #22 + subs r1, r1, #7 + bgt LSYM(Lml_ur) + + @ Shift value left, round, etc. + add r1, r1, #32 + orrs r0, r0, r3, lsr r1 + rsb r1, r1, #32 + adc r0, r0, ip, lsl r1 + mov ip, r3, lsl r1 + teq ip, #0x80000000 + biceq r0, r0, #1 + RET + + @ Shift value right, round, etc. + @ Note: r1 must not be 0 otherwise carry does not get set. +LSYM(Lml_ur): + orrs r0, r0, ip, lsr r1 + adc r0, r0, #0 + rsb r1, r1, #32 + mov ip, ip, lsl r1 + teq r3, #0 + teqeq ip, #0x80000000 + biceq r0, r0, #1 + RET + + @ One or both arguments are denormalized. + @ Scale them leftwards and preserve sign bit. +LSYM(Lml_d): + teq r2, #0 + and ip, r0, #0x80000000 +1: moveq r0, r0, lsl #1 + tsteq r0, #0x00800000 + subeq r2, r2, #(1 << 22) + beq 1b + orr r0, r0, ip + teq r3, #0 + and ip, r1, #0x80000000 +2: moveq r1, r1, lsl #1 + tsteq r1, #0x00800000 + subeq r3, r3, #(1 << 23) + beq 2b + orr r1, r1, ip + b LSYM(Lml_x) + + @ One or both args are INF or NAN. +LSYM(Lml_s): + teq r0, #0x0 + teqne r1, #0x0 + teqne r0, #0x80000000 + teqne r1, #0x80000000 + beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN + teq r2, ip, lsr #1 + bne 1f + movs r2, r0, lsl #9 + bne LSYM(Lml_n) @ NAN * -> NAN +1: teq r3, ip, lsr #1 + bne LSYM(Lml_i) + movs r3, r1, lsl #9 + bne LSYM(Lml_n) @ * NAN -> NAN + + @ Result is INF, but we need to determine its sign. +LSYM(Lml_i): + eor r0, r0, r1 + + @ Overflow: return INF (sign already in r0). +LSYM(Lml_o): + and r0, r0, #0x80000000 + orr r0, r0, #0x7f000000 + orr r0, r0, #0x00800000 + RET + + @ Return NAN. +LSYM(Lml_n): + mov r0, #0x7f000000 + orr r0, r0, #0x00c00000 + RET + + FUNC_END mulsf3 + +ARM_FUNC_START divsf3 + + @ Mask out exponents. + mov ip, #0xff000000 + and r2, r0, ip, lsr #1 + and r3, r1, ip, lsr #1 + + @ Trap any INF/NAN or zeroes. + teq r2, ip, lsr #1 + teqne r3, ip, lsr #1 + bicnes ip, r0, #0x80000000 + bicnes ip, r1, #0x80000000 + beq LSYM(Ldv_s) + + @ Shift exponents right one bit to make room for overflow bit. + @ If either of them is 0, scale denormalized arguments off line. + @ Then substract divisor exponent from dividend''s. + movs r2, r2, lsr #1 + teqne r3, #0 + beq LSYM(Ldv_d) +LSYM(Ldv_x): + sub r2, r2, r3, asr #1 + + @ Preserve final sign into ip. + eor ip, r0, r1 + + @ Convert mantissa to unsigned integer. + @ Dividend -> r3, divisor -> r1. + mov r3, #0x10000000 + movs r1, r1, lsl #9 + mov r0, r0, lsl #9 + beq LSYM(Ldv_1) + orr r1, r3, r1, lsr #4 + orr r3, r3, r0, lsr #4 + + @ Initialize r0 (result) with final sign bit. + and r0, ip, #0x80000000 + + @ Ensure result will land to known bit position. + cmp r3, r1 + subcc r2, r2, #(1 << 22) + movcc r3, r3, lsl #1 + + @ Apply exponent bias, check range for over/underflow. + add r2, r2, #(127 << 22) + cmn r2, #(24 << 22) + RETc(le) + cmp r2, #(255 << 22) + bge LSYM(Lml_o) + + @ The actual division loop. + mov ip, #0x00800000 +1: cmp r3, r1 + subcs r3, r3, r1 + orrcs r0, r0, ip + cmp r3, r1, lsr #1 + subcs r3, r3, r1, lsr #1 + orrcs r0, r0, ip, lsr #1 + cmp r3, r1, lsr #2 + subcs r3, r3, r1, lsr #2 + orrcs r0, r0, ip, lsr #2 + cmp r3, r1, lsr #3 + subcs r3, r3, r1, lsr #3 + orrcs r0, r0, ip, lsr #3 + movs r3, r3, lsl #4 + movnes ip, ip, lsr #4 + bne 1b + + @ Check if denormalized result is needed. + cmp r2, #0 + ble LSYM(Ldv_u) + + @ Apply proper rounding. + cmp r3, r1 + addcs r0, r0, #1 + biceq r0, r0, #1 + + @ Add exponent to result. + bic r0, r0, #0x00800000 + orr r0, r0, r2, lsl #1 + RET + + @ Division by 0x1p*: let''s shortcut a lot of code. +LSYM(Ldv_1): + and ip, ip, #0x80000000 + orr r0, ip, r0, lsr #9 + add r2, r2, #(127 << 22) + cmp r2, #(255 << 22) + bge LSYM(Lml_o) + cmp r2, #0 + orrgt r0, r0, r2, lsl #1 + RETc(gt) + cmn r2, #(24 << 22) + movle r0, ip + RETc(le) + orr r0, r0, #0x00800000 + mov r3, #0 + + @ Result must be denormalized: prepare parameters to use code above. + @ r3 already contains remainder for rounding considerations. +LSYM(Ldv_u): + bic ip, r0, #0x80000000 + and r0, r0, #0x80000000 + mvn r1, r2, asr #22 + add r1, r1, #2 + b LSYM(Lml_ur) + + @ One or both arguments are denormalized. + @ Scale them leftwards and preserve sign bit. +LSYM(Ldv_d): + teq r2, #0 + and ip, r0, #0x80000000 +1: moveq r0, r0, lsl #1 + tsteq r0, #0x00800000 + subeq r2, r2, #(1 << 22) + beq 1b + orr r0, r0, ip + teq r3, #0 + and ip, r1, #0x80000000 +2: moveq r1, r1, lsl #1 + tsteq r1, #0x00800000 + subeq r3, r3, #(1 << 23) + beq 2b + orr r1, r1, ip + b LSYM(Ldv_x) + + @ One or both arguments is either INF, NAN or zero. +LSYM(Ldv_s): + mov ip, #0xff000000 + teq r2, ip, lsr #1 + teqeq r3, ip, lsr #1 + beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN + teq r2, ip, lsr #1 + bne 1f + movs r2, r0, lsl #9 + bne LSYM(Lml_n) @ NAN / -> NAN + b LSYM(Lml_i) @ INF / -> INF +1: teq r3, ip, lsr #1 + bne 2f + movs r3, r1, lsl #9 + bne LSYM(Lml_n) @ / NAN -> NAN + b LSYM(Lml_z) @ / INF -> 0 +2: @ One or both arguments are 0. + bics r2, r0, #0x80000000 + bne LSYM(Lml_i) @ / 0 -> INF + bics r3, r1, #0x80000000 + bne LSYM(Lml_z) @ 0 / -> 0 + b LSYM(Lml_n) @ 0 / 0 -> NAN + + FUNC_END divsf3 + +#endif /* L_muldivsf3 */ + +#ifdef L_cmpsf2 + +FUNC_START gesf2 +ARM_FUNC_START gtsf2 + mov r3, #-1 + b 1f + +FUNC_START lesf2 +ARM_FUNC_START ltsf2 + mov r3, #1 + b 1f + +FUNC_START nesf2 +FUNC_START eqsf2 +ARM_FUNC_START cmpsf2 + mov r3, #1 @ how should we specify unordered here? + +1: @ Trap any INF/NAN first. + mov ip, #0xff000000 + and r2, r1, ip, lsr #1 + teq r2, ip, lsr #1 + and r2, r0, ip, lsr #1 + teqne r2, ip, lsr #1 + beq 3f + + @ Test for equality. + @ Note that 0.0 is equal to -0.0. +2: orr r3, r0, r1 + bics r3, r3, #0x80000000 @ either 0.0 or -0.0 + teqne r0, r1 @ or both the same + moveq r0, #0 + RETc(eq) + + @ Check for sign difference. The N flag is set if it is the case. + @ If so, return sign of r0. + movmi r0, r0, asr #31 + orrmi r0, r0, #1 + RETc(mi) + + @ Compare exponents. + and r3, r1, ip, lsr #1 + cmp r2, r3 + + @ Compare mantissa if exponents are equal + moveq r0, r0, lsl #9 + cmpeq r0, r1, lsl #9 + movcs r0, r1, asr #31 + mvncc r0, r1, asr #31 + orr r0, r0, #1 + RET + + @ Look for a NAN. +3: and r2, r1, ip, lsr #1 + teq r2, ip, lsr #1 + bne 4f + movs r2, r1, lsl #9 + bne 5f @ r1 is NAN +4: and r2, r0, ip, lsr #1 + teq r2, ip, lsr #1 + bne 2b + movs ip, r0, lsl #9 + beq 2b @ r0 is not NAN +5: mov r0, r3 @ return unordered code from r3. + RET + + FUNC_END gesf2 + FUNC_END gtsf2 + FUNC_END lesf2 + FUNC_END ltsf2 + FUNC_END nesf2 + FUNC_END eqsf2 + FUNC_END cmpsf2 + +#endif /* L_cmpsf2 */ + +#ifdef L_unordsf2 + +ARM_FUNC_START unordsf2 + mov ip, #0xff000000 + and r2, r1, ip, lsr #1 + teq r2, ip, lsr #1 + bne 1f + movs r2, r1, lsl #9 + bne 3f @ r1 is NAN +1: and r2, r0, ip, lsr #1 + teq r2, ip, lsr #1 + bne 2f + movs r2, r0, lsl #9 + bne 3f @ r0 is NAN +2: mov r0, #0 @ arguments are ordered. + RET +3: mov r0, #1 @ arguments are unordered. + RET + + FUNC_END unordsf2 + +#endif /* L_unordsf2 */ + +#ifdef L_fixsfsi + +ARM_FUNC_START fixsfsi + movs r0, r0, lsl #1 + RETc(eq) @ value is 0. + + mov r1, r1, rrx @ preserve C flag (the actual sign) + + @ check exponent range. + and r2, r0, #0xff000000 + cmp r2, #(127 << 24) + movcc r0, #0 @ value is too small + RETc(cc) + cmp r2, #((127 + 31) << 24) + bcs 1f @ value is too large + + mov r0, r0, lsl #7 + orr r0, r0, #0x80000000 + mov r2, r2, lsr #24 + rsb r2, r2, #(127 + 31) + tst r1, #0x80000000 @ the sign bit + mov r0, r0, lsr r2 + rsbne r0, r0, #0 + RET + +1: teq r2, #0xff000000 + bne 2f + movs r0, r0, lsl #8 + bne 3f @ r0 is NAN. +2: ands r0, r1, #0x80000000 @ the sign bit + moveq r0, #0x7fffffff @ the maximum signed positive si + RET + +3: mov r0, #0 @ What should we convert NAN to? + RET + + FUNC_END fixsfsi + +#endif /* L_fixsfsi */ + +#ifdef L_fixunssfsi + +ARM_FUNC_START fixunssfsi + movs r0, r0, lsl #1 + movcss r0, #0 @ value is negative... + RETc(eq) @ ... or 0. + + + @ check exponent range. + and r2, r0, #0xff000000 + cmp r2, #(127 << 24) + movcc r0, #0 @ value is too small + RETc(cc) + cmp r2, #((127 + 32) << 24) + bcs 1f @ value is too large + + mov r0, r0, lsl #7 + orr r0, r0, #0x80000000 + mov r2, r2, lsr #24 + rsb r2, r2, #(127 + 31) + mov r0, r0, lsr r2 + RET + +1: teq r2, #0xff000000 + bne 2f + movs r0, r0, lsl #8 + bne 3f @ r0 is NAN. +2: mov r0, #0xffffffff @ maximum unsigned si + RET + +3: mov r0, #0 @ What should we convert NAN to? + RET + + FUNC_END fixunssfsi + +#endif /* L_fixunssfsi */ diff -uNrp gcc.old/gcc/config/arm/lib1funcs.asm gcc-3.3.2/gcc/config/arm/lib1funcs.asm --- gcc.old/gcc/config/arm/lib1funcs.asm 2001-09-18 06:02:37.000000000 -0400 +++ gcc-3.3.2/gcc/config/arm/lib1funcs.asm 2004-02-03 14:21:54.000000000 -0500 @@ -51,74 +51,117 @@ Boston, MA 02111-1307, USA. */ #endif #define TYPE(x) .type SYM(x),function #define SIZE(x) .size SYM(x), . - SYM(x) +#define LSYM(x) .x #else #define __PLT__ #define TYPE(x) #define SIZE(x) +#define LSYM(x) x #endif /* Function end macros. Variants for 26 bit APCS and interworking. */ +@ This selects the minimum architecture level required. +#define __ARM_ARCH__ 3 + +#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \ + || defined(__ARM_ARCH_4T__) +/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with + long multiply instructions. That includes v3M. */ +# undef __ARM_ARCH__ +# define __ARM_ARCH__ 4 +#endif + +#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \ + || defined(__ARM_ARCH_5TE__) +# undef __ARM_ARCH__ +# define __ARM_ARCH__ 5 +#endif + +/* How to return from a function call depends on the architecture variant. */ + #ifdef __APCS_26__ + # define RET movs pc, lr # define RETc(x) mov##x##s pc, lr -# define RETCOND ^ + +#elif (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__) + +# define RET bx lr +# define RETc(x) bx##x lr + +# if (__ARM_ARCH__ == 4) \ + && (defined(__thumb__) || defined(__THUMB_INTERWORK__)) +# define __INTERWORKING__ +# endif + +#else + +# define RET mov pc, lr +# define RETc(x) mov##x pc, lr + +#endif + +/* Don't pass dirn, it's there just to get token pasting right. */ + +.macro RETLDM regs=, cond=, dirn=ia +#ifdef __APCS_26__ + .ifc "\regs","" + ldm\cond\dirn sp!, {pc}^ + .else + ldm\cond\dirn sp!, {\regs, pc}^ + .endif +#elif defined (__INTERWORKING__) + .ifc "\regs","" + ldr\cond lr, [sp], #4 + .else + ldm\cond\dirn sp!, {\regs, lr} + .endif + bx\cond lr +#else + .ifc "\regs","" + ldr\cond pc, [sp], #4 + .else + ldm\cond\dirn sp!, {\regs, pc} + .endif +#endif +.endm + + .macro ARM_LDIV0 -Ldiv0: +LSYM(Ldiv0): str lr, [sp, #-4]! bl SYM (__div0) __PLT__ mov r0, #0 @ About as wrong as it could be. - ldmia sp!, {pc}^ + RETLDM .endm -#else -# ifdef __THUMB_INTERWORK__ -# define RET bx lr -# define RETc(x) bx##x lr + + .macro THUMB_LDIV0 -Ldiv0: +LSYM(Ldiv0): push { lr } bl SYM (__div0) mov r0, #0 @ About as wrong as it could be. +#if defined (__INTERWORKING__) pop { r1 } bx r1 -.endm -.macro ARM_LDIV0 -Ldiv0: - str lr, [sp, #-4]! - bl SYM (__div0) __PLT__ - mov r0, #0 @ About as wrong as it could be. - ldr lr, [sp], #4 - bx lr -.endm -# else -# define RET mov pc, lr -# define RETc(x) mov##x pc, lr -.macro THUMB_LDIV0 -Ldiv0: - push { lr } - bl SYM (__div0) - mov r0, #0 @ About as wrong as it could be. +#else pop { pc } -.endm -.macro ARM_LDIV0 -Ldiv0: - str lr, [sp, #-4]! - bl SYM (__div0) __PLT__ - mov r0, #0 @ About as wrong as it could be. - ldmia sp!, {pc} -.endm -# endif -# define RETCOND #endif +.endm .macro FUNC_END name -Ldiv0: + SIZE (__\name) +.endm + +.macro DIV_FUNC_END name +LSYM(Ldiv0): #ifdef __thumb__ THUMB_LDIV0 #else ARM_LDIV0 #endif - SIZE (__\name) + FUNC_END \name .endm .macro THUMB_FUNC_START name @@ -147,7 +190,24 @@ SYM (\name): THUMB_FUNC SYM (__\name): .endm - + +/* Special function that will always be coded in ARM assembly, even if + in Thumb-only compilation. */ + +#if defined(__thumb__) && !defined(__THUMB_INTERWORK__) +.macro ARM_FUNC_START name + FUNC_START \name + bx pc + nop + .arm +_L__\name: /* A hook to tell gdb that we've switched to ARM */ +.endm +#else +.macro ARM_FUNC_START name + FUNC_START \name +.endm +#endif + /* Register aliases. */ work .req r4 @ XXXX is this safe ? @@ -156,16 +216,17 @@ divisor .req r1 overdone .req r2 result .req r2 curbit .req r3 +#if 0 ip .req r12 sp .req r13 lr .req r14 pc .req r15 - +#endif /* ------------------------------------------------------------------------ */ -/* Bodies of the divsion and modulo routines. */ +/* Bodies of the division and modulo routines. */ /* ------------------------------------------------------------------------ */ .macro ARM_DIV_MOD_BODY modulo -Loop1: +LSYM(Loop1): @ Unless the divisor is very big, shift it up in multiples of @ four bits, since this is the amount of unwinding in the main @ division loop. Continue shifting until the divisor is @@ -174,18 +235,18 @@ Loop1: cmplo divisor, dividend movlo divisor, divisor, lsl #4 movlo curbit, curbit, lsl #4 - blo Loop1 + blo LSYM(Loop1) -Lbignum: +LSYM(Lbignum): @ For very big divisors, we must shift it a bit at a time, or @ we will be in danger of overflowing. cmp divisor, #0x80000000 cmplo divisor, dividend movlo divisor, divisor, lsl #1 movlo curbit, curbit, lsl #1 - blo Lbignum + blo LSYM(Lbignum) -Loop3: +LSYM(Loop3): @ Test for possible subtractions. On the final pass, this may @ subtract too much from the dividend ... @@ -226,10 +287,10 @@ Loop3: cmp dividend, #0 @ Early termination? movnes curbit, curbit, lsr #4 @ No, any more bits to do? movne divisor, divisor, lsr #4 - bne Loop3 + bne LSYM(Loop3) .if \modulo -Lfixup_dividend: +LSYM(Lfixup_dividend): @ Any subtractions that we should not have done will be recorded in @ the top three bits of OVERDONE. Exactly which were not needed @ are governed by the position of the bit, stored in IP. @@ -241,7 +302,7 @@ Lfixup_dividend: @ the bit in ip could be in the top two bits which might then match @ with one of the smaller RORs. tstne ip, #0x7 - beq Lgot_result + beq LSYM(Lgot_result) tst overdone, ip, ror #3 addne dividend, dividend, divisor, lsr #3 tst overdone, ip, ror #2 @@ -250,39 +311,39 @@ Lfixup_dividend: addne dividend, dividend, divisor, lsr #1 .endif -Lgot_result: +LSYM(Lgot_result): .endm /* ------------------------------------------------------------------------ */ .macro THUMB_DIV_MOD_BODY modulo @ Load the constant 0x10000000 into our work register. mov work, #1 lsl work, #28 -Loop1: +LSYM(Loop1): @ Unless the divisor is very big, shift it up in multiples of @ four bits, since this is the amount of unwinding in the main @ division loop. Continue shifting until the divisor is @ larger than the dividend. cmp divisor, work - bhs Lbignum + bhs LSYM(Lbignum) cmp divisor, dividend - bhs Lbignum + bhs LSYM(Lbignum) lsl divisor, #4 lsl curbit, #4 - b Loop1 -Lbignum: + b LSYM(Loop1) +LSYM(Lbignum): @ Set work to 0x80000000 lsl work, #3 -Loop2: +LSYM(Loop2): @ For very big divisors, we must shift it a bit at a time, or @ we will be in danger of overflowing. cmp divisor, work - bhs Loop3 + bhs LSYM(Loop3) cmp divisor, dividend - bhs Loop3 + bhs LSYM(Loop3) lsl divisor, #1 lsl curbit, #1 - b Loop2 -Loop3: + b LSYM(Loop2) +LSYM(Loop3): @ Test for possible subtractions ... .if \modulo @ ... On the final pass, this may subtract too much from the dividend, @@ -290,79 +351,79 @@ Loop3: @ afterwards. mov overdone, #0 cmp dividend, divisor - blo Lover1 + blo LSYM(Lover1) sub dividend, dividend, divisor -Lover1: +LSYM(Lover1): lsr work, divisor, #1 cmp dividend, work - blo Lover2 + blo LSYM(Lover2) sub dividend, dividend, work mov ip, curbit mov work, #1 ror curbit, work orr overdone, curbit mov curbit, ip -Lover2: +LSYM(Lover2): lsr work, divisor, #2 cmp dividend, work - blo Lover3 + blo LSYM(Lover3) sub dividend, dividend, work mov ip, curbit mov work, #2 ror curbit, work orr overdone, curbit mov curbit, ip -Lover3: +LSYM(Lover3): lsr work, divisor, #3 cmp dividend, work - blo Lover4 + blo LSYM(Lover4) sub dividend, dividend, work mov ip, curbit mov work, #3 ror curbit, work orr overdone, curbit mov curbit, ip -Lover4: +LSYM(Lover4): mov ip, curbit .else @ ... and note which bits are done in the result. On the final pass, @ this may subtract too much from the dividend, but the result will be ok, @ since the "bit" will have been shifted out at the bottom. cmp dividend, divisor - blo Lover1 + blo LSYM(Lover1) sub dividend, dividend, divisor orr result, result, curbit -Lover1: +LSYM(Lover1): lsr work, divisor, #1 cmp dividend, work - blo Lover2 + blo LSYM(Lover2) sub dividend, dividend, work lsr work, curbit, #1 orr result, work -Lover2: +LSYM(Lover2): lsr work, divisor, #2 cmp dividend, work - blo Lover3 + blo LSYM(Lover3) sub dividend, dividend, work lsr work, curbit, #2 orr result, work -Lover3: +LSYM(Lover3): lsr work, divisor, #3 cmp dividend, work - blo Lover4 + blo LSYM(Lover4) sub dividend, dividend, work lsr work, curbit, #3 orr result, work -Lover4: +LSYM(Lover4): .endif cmp dividend, #0 @ Early termination? - beq Lover5 + beq LSYM(Lover5) lsr curbit, #4 @ No, any more bits to do? - beq Lover5 + beq LSYM(Lover5) lsr divisor, #4 - b Loop3 -Lover5: + b LSYM(Loop3) +LSYM(Lover5): .if \modulo @ Any subtractions that we should not have done will be recorded in @ the top three bits of "overdone". Exactly which were not needed @@ -370,7 +431,7 @@ Lover5: mov work, #0xe lsl work, #28 and overdone, work - beq Lgot_result + beq LSYM(Lgot_result) @ If we terminated early, because dividend became zero, then the @ bit in ip will not be in the bottom nibble, and we should not @@ -381,33 +442,33 @@ Lover5: mov curbit, ip mov work, #0x7 tst curbit, work - beq Lgot_result + beq LSYM(Lgot_result) mov curbit, ip mov work, #3 ror curbit, work tst overdone, curbit - beq Lover6 + beq LSYM(Lover6) lsr work, divisor, #3 add dividend, work -Lover6: +LSYM(Lover6): mov curbit, ip mov work, #2 ror curbit, work tst overdone, curbit - beq Lover7 + beq LSYM(Lover7) lsr work, divisor, #2 add dividend, work -Lover7: +LSYM(Lover7): mov curbit, ip mov work, #1 ror curbit, work tst overdone, curbit - beq Lgot_result + beq LSYM(Lgot_result) lsr work, divisor, #1 add dividend, work .endif -Lgot_result: +LSYM(Lgot_result): .endm /* ------------------------------------------------------------------------ */ /* Start of the Real Functions */ @@ -419,13 +480,13 @@ Lgot_result: #ifdef __thumb__ cmp divisor, #0 - beq Ldiv0 + beq LSYM(Ldiv0) mov curbit, #1 mov result, #0 push { work } cmp dividend, divisor - blo Lgot_result + blo LSYM(Lgot_result) THUMB_DIV_MOD_BODY 0 @@ -436,11 +497,11 @@ Lgot_result: #else /* ARM version. */ cmp divisor, #0 - beq Ldiv0 + beq LSYM(Ldiv0) mov curbit, #1 mov result, #0 cmp dividend, divisor - blo Lgot_result + blo LSYM(Lgot_result) ARM_DIV_MOD_BODY 0 @@ -449,7 +510,7 @@ Lgot_result: #endif /* ARM version */ - FUNC_END udivsi3 + DIV_FUNC_END udivsi3 #endif /* L_udivsi3 */ /* ------------------------------------------------------------------------ */ @@ -460,13 +521,13 @@ Lgot_result: #ifdef __thumb__ cmp divisor, #0 - beq Ldiv0 + beq LSYM(Ldiv0) mov curbit, #1 cmp dividend, divisor - bhs Lover10 + bhs LSYM(Lover10) RET -Lover10: +LSYM(Lover10): push { work } THUMB_DIV_MOD_BODY 1 @@ -477,7 +538,7 @@ Lover10: #else /* ARM version. */ cmp divisor, #0 - beq Ldiv0 + beq LSYM(Ldiv0) cmp divisor, #1 cmpne dividend, divisor moveq dividend, #0 @@ -490,7 +551,7 @@ Lover10: #endif /* ARM version. */ - FUNC_END umodsi3 + DIV_FUNC_END umodsi3 #endif /* L_umodsi3 */ /* ------------------------------------------------------------------------ */ @@ -500,7 +561,7 @@ Lover10: #ifdef __thumb__ cmp divisor, #0 - beq Ldiv0 + beq LSYM(Ldiv0) push { work } mov work, dividend @@ -509,24 +570,24 @@ Lover10: mov curbit, #1 mov result, #0 cmp divisor, #0 - bpl Lover10 + bpl LSYM(Lover10) neg divisor, divisor @ Loops below use unsigned. -Lover10: +LSYM(Lover10): cmp dividend, #0 - bpl Lover11 + bpl LSYM(Lover11) neg dividend, dividend -Lover11: +LSYM(Lover11): cmp dividend, divisor - blo Lgot_result + blo LSYM(Lgot_result) THUMB_DIV_MOD_BODY 0 mov r0, result mov work, ip cmp work, #0 - bpl Lover12 + bpl LSYM(Lover12) neg r0, r0 -Lover12: +LSYM(Lover12): pop { work } RET @@ -537,11 +598,11 @@ Lover12: mov result, #0 cmp divisor, #0 rsbmi divisor, divisor, #0 @ Loops below use unsigned. - beq Ldiv0 + beq LSYM(Ldiv0) cmp dividend, #0 rsbmi dividend, dividend, #0 cmp dividend, divisor - blo Lgot_result + blo LSYM(Lgot_result) ARM_DIV_MOD_BODY 0 @@ -552,7 +613,7 @@ Lover12: #endif /* ARM version */ - FUNC_END divsi3 + DIV_FUNC_END divsi3 #endif /* L_divsi3 */ /* ------------------------------------------------------------------------ */ @@ -564,29 +625,29 @@ Lover12: mov curbit, #1 cmp divisor, #0 - beq Ldiv0 - bpl Lover10 + beq LSYM(Ldiv0) + bpl LSYM(Lover10) neg divisor, divisor @ Loops below use unsigned. -Lover10: +LSYM(Lover10): push { work } @ Need to save the sign of the dividend, unfortunately, we need @ work later on. Must do this after saving the original value of @ the work register, because we will pop this value off first. push { dividend } cmp dividend, #0 - bpl Lover11 + bpl LSYM(Lover11) neg dividend, dividend -Lover11: +LSYM(Lover11): cmp dividend, divisor - blo Lgot_result + blo LSYM(Lgot_result) THUMB_DIV_MOD_BODY 1 pop { work } cmp work, #0 - bpl Lover12 + bpl LSYM(Lover12) neg dividend, dividend -Lover12: +LSYM(Lover12): pop { work } RET @@ -594,14 +655,14 @@ Lover12: cmp divisor, #0 rsbmi divisor, divisor, #0 @ Loops below use unsigned. - beq Ldiv0 + beq LSYM(Ldiv0) @ Need to save the sign of the dividend, unfortunately, we need @ ip later on; this is faster than pushing lr and using that. str dividend, [sp, #-4]! cmp dividend, #0 @ Test dividend against zero rsbmi dividend, dividend, #0 @ If negative make positive cmp dividend, divisor @ else if zero return zero - blo Lgot_result @ if smaller return dividend + blo LSYM(Lgot_result) @ if smaller return dividend mov curbit, #1 ARM_DIV_MOD_BODY 1 @@ -613,7 +674,7 @@ Lover12: #endif /* ARM version */ - FUNC_END modsi3 + DIV_FUNC_END modsi3 #endif /* L_modsi3 */ /* ------------------------------------------------------------------------ */ @@ -623,7 +684,7 @@ Lover12: RET - SIZE (__div0) + FUNC_END div0 #endif /* L_divmodsi_tools */ /* ------------------------------------------------------------------------ */ @@ -636,22 +697,18 @@ Lover12: #define __NR_getpid (__NR_SYSCALL_BASE+ 20) #define __NR_kill (__NR_SYSCALL_BASE+ 37) + .code 32 FUNC_START div0 stmfd sp!, {r1, lr} swi __NR_getpid cmn r0, #1000 - ldmhsfd sp!, {r1, pc}RETCOND @ not much we can do + RETLDM r1 hs mov r1, #SIGFPE swi __NR_kill -#ifdef __THUMB_INTERWORK__ - ldmfd sp!, {r1, lr} - bx lr -#else - ldmfd sp!, {r1, pc}RETCOND -#endif + RETLDM r1 - SIZE (__div0) + FUNC_END div0 #endif /* L_dvmd_lnx */ /* ------------------------------------------------------------------------ */ @@ -720,24 +777,23 @@ Lover12: .code 32 .globl _arm_return -_arm_return: - ldmia r13!, {r12} - bx r12 +_arm_return: + RETLDM .code 16 -.macro interwork register - .code 16 +.macro interwork register + .code 16 THUMB_FUNC_START _interwork_call_via_\register - bx pc + bx pc nop - - .code 32 - .globl .Lchange_\register -.Lchange_\register: + + .code 32 + .globl LSYM(Lchange_\register) +LSYM(Lchange_\register): tst \register, #1 - stmeqdb r13!, {lr} + streq lr, [sp, #-4]! adreq lr, _arm_return bx \register @@ -779,3 +835,7 @@ _arm_return: SIZE (_interwork_call_via_lr) #endif /* L_interwork_call_via_rX */ + +#include "ieee754-df.S" +#include "ieee754-sf.S" + diff -uNrp gcc.old/gcc/config/arm/linux-elf.h gcc-3.3.2/gcc/config/arm/linux-elf.h --- gcc.old/gcc/config/arm/linux-elf.h 2003-09-16 11:39:23.000000000 -0400 +++ gcc-3.3.2/gcc/config/arm/linux-elf.h 2004-02-03 14:21:54.000000000 -0500 @@ -30,15 +30,31 @@ Boston, MA 02111-1307, USA. */ /* Do not assume anything about header files. */ #define NO_IMPLICIT_EXTERN_C -/* Default is to use APCS-32 mode. */ +/* + * Default is to use APCS-32 mode with soft-vfp. + * The old Linux default for floats can be achieved with -mhard-float + * or with the configure --with-float=hard option. + * If -msoft-float or --with-float=soft is used then software float + * support will be used just like the default but with the legacy + * big endian word ordering for double float representation instead. + */ + #undef TARGET_DEFAULT -#define TARGET_DEFAULT (ARM_FLAG_APCS_32 | ARM_FLAG_MMU_TRAPS) +#define TARGET_DEFAULT \ + ( ARM_FLAG_APCS_32 | \ + ARM_FLAG_SOFT_FLOAT | ARM_FLAG_VFP | \ + ARM_FLAG_MMU_TRAPS ) + +#undef SUBTARGET_EXTRA_ASM_SPEC +#define SUBTARGET_EXTRA_ASM_SPEC "%{!mcpu=*:-mcpu=xscale} \ + %{mhard-float:-mfpu=fpa} \ + %{!mhard-float: %{msoft-float:-mfpu=softfpa} %{!msoft-float:-mfpu=softvfp}}" #define SUBTARGET_EXTRA_LINK_SPEC " -m armelf_linux -p" #undef MULTILIB_DEFAULTS #define MULTILIB_DEFAULTS \ - { "marm", "mlittle-endian", "mhard-float", "mapcs-32", "mno-thumb-interwork" } + { "marm", "mlittle-endian", "mapcs-32", "mno-thumb-interwork" } #define CPP_APCS_PC_DEFAULT_SPEC "-D__APCS_32__" diff -uNrp gcc.old/gcc/config/arm/t-linux gcc-3.3.2/gcc/config/arm/t-linux --- gcc.old/gcc/config/arm/t-linux 2001-05-16 23:15:49.000000000 -0400 +++ gcc-3.3.2/gcc/config/arm/t-linux 2004-02-03 14:21:54.000000000 -0500 @@ -7,7 +7,10 @@ LIBGCC2_DEBUG_CFLAGS = -g0 ENQUIRE= LIB1ASMSRC = arm/lib1funcs.asm -LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx +LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx \ + _negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi \ + _truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \ + _fixsfsi _fixunssfsi # MULTILIB_OPTIONS = mhard-float/msoft-float # MULTILIB_DIRNAMES = hard-float soft-float diff -uNrp gcc.old/gcc/config/arm/xscale-elf.h gcc-3.3.2/gcc/config/arm/xscale-elf.h --- gcc.old/gcc/config/arm/xscale-elf.h 2002-05-20 13:07:04.000000000 -0400 +++ gcc-3.3.2/gcc/config/arm/xscale-elf.h 2004-02-03 14:21:54.000000000 -0500 @@ -28,7 +28,7 @@ Boston, MA 02111-1307, USA. */ #define SUBTARGET_CPU_DEFAULT TARGET_CPU_xscale #endif -#define SUBTARGET_EXTRA_ASM_SPEC "%{!mcpu=*:-mcpu=xscale} %{!mhard-float:-mno-fpu}" +#define SUBTARGET_EXTRA_ASM_SPEC "%{!mcpu=*:-mcpu=xscale} %{mhard-float:-mfpu=fpa} %{!mhard-float: %{msoft-float:-mfpu=softfpa} %{!msoft-float:-mfpu=softvfp}}" #ifndef MULTILIB_DEFAULTS #define MULTILIB_DEFAULTS \ diff -uNrp gcc.old/gcc/config.gcc gcc-3.3.2/gcc/config.gcc --- gcc.old/gcc/config.gcc 2003-10-01 15:07:01.000000000 -0400 +++ gcc-3.3.2/gcc/config.gcc 2004-02-03 14:21:56.000000000 -0500 @@ -2305,7 +2305,7 @@ sh-*-rtems*) fi ;; sh-*-linux* | sh[2346lbe]*-*-linux*) - tmake_file="sh/t-sh sh/t-elf" + tmake_file="sh/t-sh sh/t-elf t-slibgcc-elf-ver t-linux" case $machine in sh*be-*-* | sh*eb-*-*) ;; *) diff -uNrp gcc.old/gcc/testsuite/gcc.c-torture/execute/20031204-1.c gcc-3.3.2/gcc/testsuite/gcc.c-torture/execute/20031204-1.c --- gcc.old/gcc/testsuite/gcc.c-torture/execute/20031204-1.c 1969-12-31 19:00:00.000000000 -0500 +++ gcc-3.3.2/gcc/testsuite/gcc.c-torture/execute/20031204-1.c 2004-02-03 14:21:55.000000000 -0500 @@ -0,0 +1,49 @@ +/* PR optimization/13260 */ + +#include + +typedef unsigned long u32; + +u32 in_aton(const char* x) +{ + return 0x0a0b0c0d; +} + +u32 root_nfs_parse_addr(char *name) +{ + u32 addr; + int octets = 0; + char *cp, *cq; + + cp = cq = name; + while (octets < 4) { + while (*cp >= '0' && *cp <= '9') + cp++; + if (cp == cq || cp - cq > 3) + break; + if (*cp == '.' || octets == 3) + octets++; + if (octets < 4) + cp++; + cq = cp; + } + + if (octets == 4 && (*cp == ':' || *cp == '\0')) { + if (*cp == ':') + *cp++ = '\0'; + addr = in_aton(name); + strcpy(name, cp); + } else + addr = (-1); + + return addr; +} + +int +main() +{ + static char addr[] = "10.11.12.13:/hello"; + u32 result = root_nfs_parse_addr(addr); + if (result != 0x0a0b0c0d) { abort(); } + return 0; +} diff -uNrp gcc.old/gcc/testsuite/gcc.dg/pr10392-1.c gcc-3.3.2/gcc/testsuite/gcc.dg/pr10392-1.c --- gcc.old/gcc/testsuite/gcc.dg/pr10392-1.c 1969-12-31 19:00:00.000000000 -0500 +++ gcc-3.3.2/gcc/testsuite/gcc.dg/pr10392-1.c 2004-02-03 14:21:54.000000000 -0500 @@ -0,0 +1,62 @@ +/* PR optimization/10392 + * Reporter: marcus@mc.pp.se + * Summary: [3.3/3.4 regression] [SH] optimizer generates faulty array indexing + * Description: + * The address calculation of an index operation on an array on the stack + * can _under some conditions_ get messed up completely + * + * Testcase tweaked by dank@kegel.com + * Problem only happens with -O2 -m4, so it should only happen on sh4, + * but what the heck, let's test other architectures, too. + * Not marked as xfail since it's a regression. +*/ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +/* { dg-options "-O2 -m4" { target sh4-*-* } } */ +const char *dont_optimize_function_away; + +const char *use(const char *str) +{ + dont_optimize_function_away = str; + if (str[0] != 'v') + abort(); + if (str[1] < '1' || str[1] > '6') + abort(); + if (str[2]) + abort(); + return str[2] ? "notused" : "v6"; +} + +const char *func(char *a, char *b) +{ + char buf[128]; + unsigned char i; + const char *result; + + char *item[] = { + "v1", + "v2", + }; + + buf[0] = 'v'; + buf[1] = '3'; + buf[2] = 0; + + for (i = 0; i < 2; i++) { + /* bug is: following line passes wild pointer to use() on sh4 -O2 */ + result = use(item[i]); + + use(buf); + use(a); + use(b); + result = use(result); + } + return result; +} + +int main() +{ + func("v4", "v5"); + return 0; +} + diff -uNrp gcc.old/gcc/testsuite/gcc.dg/pr10412-1.c gcc-3.3.2/gcc/testsuite/gcc.dg/pr10412-1.c --- gcc.old/gcc/testsuite/gcc.dg/pr10412-1.c 1969-12-31 19:00:00.000000000 -0500 +++ gcc-3.3.2/gcc/testsuite/gcc.dg/pr10412-1.c 2004-02-03 14:21:54.000000000 -0500 @@ -0,0 +1,43 @@ +/* PR target/10412 + * Reporter: shrinivasa@kpitcummins.com + * Summary: Renesas SH - Incorrect code generation + * Description: + * When following code is compiled with + * sh-elf-gcc -S -mhitachi -m2 -O2 bug1.c + * generates an incorrect code. + * + * Testcase tweaked by dank@kegel.com + * Problem only happens with -mhitachi -m2. Not sure if I can give those + * options for all sh targets. They work on sh4, though. + * Not marked as xfail as it's a regression relative to hardhat 2.0 gcc-2.97. +*/ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +int global_val; + +int func0(int x) +{ + global_val += x; + return (x != 99); +} + +int func1(unsigned long addr) +{ + int err; + + err = func0(addr); + if (err) + return (err); + + err = func0(addr * 7); /* address of func0 is lost during multiplication -> probable SIGSEGV */ + return (err); +} + +int main(int argc, char **argv) +{ + global_val = 0; + global_val += func1(99); + if (global_val != 99 * 8 + 1) + abort(); + return 0; +} diff -uNrp gcc.old/gcc/testsuite/gcc.dg/pr10589-1.c gcc-3.3.2/gcc/testsuite/gcc.dg/pr10589-1.c --- gcc.old/gcc/testsuite/gcc.dg/pr10589-1.c 1969-12-31 19:00:00.000000000 -0500 +++ gcc-3.3.2/gcc/testsuite/gcc.dg/pr10589-1.c 2004-02-03 14:21:55.000000000 -0500 @@ -0,0 +1,14 @@ +/* PR target/10589 + * Reporter: mathieu@thenesis.com + * Summary: For Hitachi SH target, GCC crashes when both -fomit-frame-pointer and -mdalign options are specified + * Keywords: ice-on-valid-code + * Testcase by Kazu Hirata, tweaked by dank@kegel.com + * Did not fail in Hard Hat 2.0 gcc-2.97, nor in dodes gcc-3.0.2, so this is a regression, so I'm not marking it xfail + */ +/* { dg-do compile { target sh*-*-* } } */ +/* { dg-options "-fomit-frame-pointer -mdalign" } */ + +int foo(int a, int b) +{ + return a / b; +} diff -uNrp gcc.old/gcc/testsuite/gcc.dg/pr11162-1.c gcc-3.3.2/gcc/testsuite/gcc.dg/pr11162-1.c --- gcc.old/gcc/testsuite/gcc.dg/pr11162-1.c 1969-12-31 19:00:00.000000000 -0500 +++ gcc-3.3.2/gcc/testsuite/gcc.dg/pr11162-1.c 2004-02-03 14:21:55.000000000 -0500 @@ -0,0 +1,22 @@ +/* + * PR optimization/11162 + * Reporter: Toshiyasu Morita + * Summary: [3.4 Regression] [-fnew-ra] ICE compiling channel.i on sh + * Keywords: ice-on-valid-code + * Description: + * [ICE] when channel.i from stress-1.17 is compiled with options: + * -O2 -m4 -fnew-ra + * ... + * The CFG code gets confused by the addition of a USE insn after a sibcall. + * Sibcalls are supposed to end the block (and the function!), so the fact + * that we have an instruction following one (even if its fake) is confusing. + * + * testcase tweaked by dank@kegel.com + */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fnew-ra" } */ + +int foo(char *p1) +{ + return bar(p1); +} diff -uNrp gcc.old/gcc/testsuite/gcc.dg/pr11587-1.c gcc-3.3.2/gcc/testsuite/gcc.dg/pr11587-1.c --- gcc.old/gcc/testsuite/gcc.dg/pr11587-1.c 1969-12-31 19:00:00.000000000 -0500 +++ gcc-3.3.2/gcc/testsuite/gcc.dg/pr11587-1.c 2004-02-03 14:21:55.000000000 -0500 @@ -0,0 +1,36 @@ +/* PR optimization/11587 + * Reporter: Michael Eager + * Summary: [3.3/3.4 Regression] SH ICE in reload_cse_simplify_operand, postreload.c + * Keywords: ice-on-valid-code + * + * Note: a fix exists; see bugzilla + * + * Testcase tweaked by dank@kegel.com + * Not marked as xfail since it's a regression. + */ + +/* { dg-do compile } */ +/* { dg-options "-O1" } */ +int foo1(void); +int foo2(); +int foo3(); + +static int goo() +{ + int i; + + if (i <= 0) + return i; + + if (foo1() > 1) { + i = foo2(); + if (i < 0) + return i; + if (i) { + if (foo3()) + return 1; + return 0; + } + } + +} diff -uNrp gcc.old/gcc/testsuite/gcc.dg/pr11736-1.c gcc-3.3.2/gcc/testsuite/gcc.dg/pr11736-1.c --- gcc.old/gcc/testsuite/gcc.dg/pr11736-1.c 1969-12-31 19:00:00.000000000 -0500 +++ gcc-3.3.2/gcc/testsuite/gcc.dg/pr11736-1.c 2004-02-03 14:21:55.000000000 -0500 @@ -0,0 +1,45 @@ +/* PR optimization/11736 + * Reporter: marcus@mc.pp.se + * Summary: Stackpointer messed up on SuperH + * Keywords: wrong-code + * Description: + * When a function with 5 arguments is called in both branches of a + * conditional, and only the last argument differs, the code to push that + * last argument on the stack gets confused. + * Space for the fifth argument is reserved on the stack by the + * instruction I have marked as "A". However, if the else-branch is + * taken the stackpointer is decremented _again_ at "B". This + * decrementation is never restored, and it is only due to the + * restoration of r15 from r14 that the function works at all. With + * -fomit-frame-pointer it will crash. + * + * Testcase tweaked by dank@kegel.com + * Not marked as xfail since it's a regression from hardhat 2.0 gcc-2.97 + * and dodes gcc-3.0.2 + */ + +/* { dg-do run } */ +/* { dg-options "-O1 -fomit-frame-pointer" } */ + +int expected_e; + +void bar(int a, int b, int c, int d, int e) +{ + if (e != expected_e) + abort(); +} + +void foo(int a) +{ + if (a) + bar(0, 0, 0, 0, 1); + else + bar(0, 0, 0, 0, 0); /* stack pointer decremented extra time here, causing segfault */ +} + +int main(int argc, char **argv) +{ + for (expected_e = 0; expected_e < 2; expected_e++) + foo(expected_e); + return 0; +} diff -uNrp gcc.old/gcc/testsuite/gcc.dg/pr11864-1.c gcc-3.3.2/gcc/testsuite/gcc.dg/pr11864-1.c --- gcc.old/gcc/testsuite/gcc.dg/pr11864-1.c 1969-12-31 19:00:00.000000000 -0500 +++ gcc-3.3.2/gcc/testsuite/gcc.dg/pr11864-1.c 2004-02-03 14:21:55.000000000 -0500 @@ -0,0 +1,42 @@ +/* PR optimization/11864 + * Reporter: Kazumoto Kojima + * Summary: [3.3/3.4 regression] miscompiles zero extension and test + * Description: + * gcc-3.3/3.4 -O2 for sh target may miscompile the combination of zero extension + * and test if it's zero. + * + * Testcase tweaked by dank@kegel.com. Not marked as xfail because it's a regression. + */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +extern void abort(void); + +int val = 0xff00; + +int f(void) +{ + return val; +} + +unsigned char a[1]; + +void foo(void) +{ + a[0] = f() & 255; + + if (!a[0]) + a[0] = f() & 255; + + if (!a[0]) + a[0] = 1 + (f() & 127); +} + +int main(int argc, char **argv) +{ + foo(); + if (!a[0]) + abort(); + + return 0; +} diff -uNrp gcc.old/gcc/testsuite/gcc.dg/pr9365-1.c gcc-3.3.2/gcc/testsuite/gcc.dg/pr9365-1.c --- gcc.old/gcc/testsuite/gcc.dg/pr9365-1.c 1969-12-31 19:00:00.000000000 -0500 +++ gcc-3.3.2/gcc/testsuite/gcc.dg/pr9365-1.c 2004-02-03 14:21:55.000000000 -0500 @@ -0,0 +1,40 @@ +/* PR target/9365 + * Origin: marcus@mc.pp.se + * Testcase tweaked by dank@kegel.com + * [3.3 regression] [SH] segfault in gen_far_branch (config/sh/sh.c) + * ice-on-valid-code + * Not marked as xfail since it's a regression +*/ +/* { dg-do compile } */ +/* { dg-options "-O2 -fomit-frame-pointer" } */ + + +void foo(int n, int *p) +{ + switch(n) { + case 100: case 110: case 120: case 130: case 140: + case 200: case 210: case 220: case 230: case 240: + case 300: case 310: case 320: case 330: case 340: + case 400: case 410: case 420: case 430: case 440: + case 500: case 510: case 520: case 530: case 540: + case 600: case 610: case 620: case 630: case 640: + case 700: case 710: case 720: case 730: case 740: + case 800: case 810: case 820: case 830: case 840: + case 900: case 910: case 920: case 930: case 940: + break; + default: + *p = n; + break; + } +} + +int main(int argc, char **argv) +{ + int p; + + (void) argv; + + foo(argc, &p); + + return p; +} diff -uNrp gcc.old/gcc/testsuite/g++.dg/abi/empty6.C gcc-3.3.2/gcc/testsuite/g++.dg/abi/empty6.C --- gcc.old/gcc/testsuite/g++.dg/abi/empty6.C 2002-09-25 15:07:35.000000000 -0400 +++ gcc-3.3.2/gcc/testsuite/g++.dg/abi/empty6.C 2004-02-03 14:21:54.000000000 -0500 @@ -5,4 +5,9 @@ struct A {}; struct B { A a; // { dg-warning "empty" } virtual void f () {} -}; +} __attribute__((aligned(8))); +/* The preceding attribute is necessary on targets with + BIGGEST_ALIGNMENT <= 32 to trigger the warning, as otherwise a 32 bit + offset is split into DECL_FIELD_OFFSET 4 and DECL_FIELD_BIT_OFFSET 0, + and then there is no discrepancy between DECL_FIELD_OFFSET and + byte_position to warn about. */ diff -uNrp gcc.old/gcc/testsuite/g++.old-deja/g++.jason/thunk3.C gcc-3.3.2/gcc/testsuite/g++.old-deja/g++.jason/thunk3.C --- gcc.old/gcc/testsuite/g++.old-deja/g++.jason/thunk3.C 2002-03-25 12:57:03.000000000 -0500 +++ gcc-3.3.2/gcc/testsuite/g++.old-deja/g++.jason/thunk3.C 2004-02-03 14:21:56.000000000 -0500 @@ -2,7 +2,7 @@ // Note that this will break on any target that uses the generic thunk // support, because it doesn't support variadic functions. -// excess errors test - XFAIL mips*-*-* rs6000-*-* powerpc-*-eabi m68k-*-coff m68k-motorola-sysv m88k-motorola-sysv3 mn10300-*-* mn10200-*-* v850-*-* sh-*-* sh64-*-* h8*-*-* xtensa-*-* +// excess errors test - XFAIL mips*-*-* rs6000-*-* powerpc-*-eabi m68k-*-coff m68k-motorola-sysv m88k-motorola-sysv3 mn10300-*-* mn10200-*-* v850-*-* sh*-*-* h8*-*-* xtensa-*-* #include diff -uNrp gcc.old/gcc/testsuite/lib/g++.exp gcc-3.3.2/gcc/testsuite/lib/g++.exp --- gcc.old/gcc/testsuite/lib/g++.exp 2002-09-26 05:51:44.000000000 -0400 +++ gcc-3.3.2/gcc/testsuite/lib/g++.exp 2004-02-03 14:21:54.000000000 -0500 @@ -72,6 +72,8 @@ proc g++_version { } { # proc g++_include_flags { paths } { global srcdir + global objdir + global target_triplet global HAVE_LIBSTDCXX_V3 global TESTING_IN_BUILD_TREE @@ -90,6 +92,20 @@ proc g++_include_flags { paths } { if { ${HAVE_LIBSTDCXX_V3} } { set odir_v3 [lookfor_file ${gccpath} libstdc++-v3] + if { $odir_v3 == "" } { + verbose "g++_include_flags: couldn't find libstdc++-v3 on first try, now looking in build directory $objdir" + # first assume no multilibs + set odir_v3 [lookfor_file ${objdir} "$target_triplet/libstdc++-v3"] + } + if { $odir_v3 == "" } { + verbose "g++_include_flags: couldn't find libstdc++-v3 on second try, trying multilib" + # assume multilib only one level deep + set multisub [file tail $gccpath] + set odir_v3 [lookfor_file ${objdir} "$target_triplet/$multisub/libstdc++-v3"] + } + if { $odir_v3 == "" } { + error "Can't find libstdc++-v3" + } append flags [exec sh ${odir_v3}/testsuite_flags --build-includes] } else { set odir_v2 [lookfor_file ${gccpath} libstdc++] @@ -176,16 +192,20 @@ proc g++_link_flags { paths } { } } - # On IRIX 6, we have to set variables akin to LD_LIBRARY_PATH, but - # called LD_LIBRARYN32_PATH (for the N32 ABI) and LD_LIBRARY64_PATH - # (for the 64-bit ABI). The right way to do this would be to modify - # unix.exp -- but that's not an option since it's part of DejaGNU - # proper, so we do it here. We really only need to do - # this on IRIX, but it shouldn't hurt to do it anywhere else. - setenv LD_LIBRARY_PATH $ld_library_path - setenv SHLIB_PATH $ld_library_path - setenv LD_LIBRARYN32_PATH $ld_library_path - setenv LD_LIBRARY64_PATH $ld_library_path + if {![is_remote target]} { + # On IRIX 6, we have to set variables akin to LD_LIBRARY_PATH, but + # called LD_LIBRARYN32_PATH (for the N32 ABI) and LD_LIBRARY64_PATH + # (for the 64-bit ABI). The right way to do this would be to modify + # unix.exp -- but that's not an option since it's part of DejaGNU + # proper, so we do it here. We really only need to do + # this on IRIX, but it shouldn't hurt to do it anywhere else. + + # Doing this causes us to be unable to run cross-compilers. + setenv LD_LIBRARY_PATH $ld_library_path + setenv SHLIB_PATH $ld_library_path + setenv LD_LIBRARYN32_PATH $ld_library_path + setenv LD_LIBRARY64_PATH $ld_library_path + } return "$flags" } diff -uNrp gcc.old/LAST_UPDATED gcc-3.3.2/LAST_UPDATED --- gcc.old/LAST_UPDATED 1969-12-31 19:00:00.000000000 -0500 +++ gcc-3.3.2/LAST_UPDATED 2004-02-03 14:24:19.000000000 -0500 @@ -0,0 +1 @@ +gcc-3.3.2 diff -uNrp gcc.old/libstdc++-v3/testsuite/lib/libstdc++-v3-dg.exp gcc-3.3.2/libstdc++-v3/testsuite/lib/libstdc++-v3-dg.exp --- gcc.old/libstdc++-v3/testsuite/lib/libstdc++-v3-dg.exp 2003-01-15 20:41:55.000000000 -0500 +++ gcc-3.3.2/libstdc++-v3/testsuite/lib/libstdc++-v3-dg.exp 2004-02-03 14:21:54.000000000 -0500 @@ -46,8 +46,23 @@ proc libstdc++-v3-init { args } { global gluefile wrap_flags global ld_library_path global tool_root_dir + global target_triplet set blddir [lookfor_file [get_multilibs] libstdc++-v3] + if { $blddir == "" } { + set multilibs [get_multilibs] + # FIXME: assume multilib only one level deep + set multisub [file tail $multilibs] + verbose "libstdc++-v3-init: couldn't find libstdc++-v3 in $multilibs, trying $objdir" + set blddir [lookfor_file ${objdir} "$target_triplet/$multisub/libstdc++-v3"] + } + if { $blddir == "" } { + verbose "libstdc++-v3-init: couldn't find libstdc++-v3, trying $objdir without multilibs" + set blddir [lookfor_file ${objdir} "$target_triplet/libstdc++-v3"] + } + if { $blddir == "" } { + error "Can't find libstdc++-v3" + } # By default, we assume we want to run program images. global dg-do-what-default