// Rowley MAXQ20 runtime support.
//
// Copyright (c) 2004 Rowley Associates Limited.
//
// This file may be distributed under the terms of the License Agreement
// provided with this software.
//
// THIS FILE IS PROVIDED AS IS WITH NO WARRANTY OF ANY KIND, INCLUDING THE
// WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.

#define MAXQ2000 2000
#define MAXQ3100 3100
#define MAXQ3120 3120
#define MAXQ7654 7654

#if __TARGET_PROCESSOR == MAXQ2000
#define HARDWARE_MULTIPLY
#define MCNT  M2[0]
#define MA    M2[1]
#define MB    M2[2]
#define MC2   M2[3]
#define MC1   M2[4]
#define MC0   M2[5]

#elif __TARGET_PROCESSOR == MAXQ3120
#define HARDWARE_MULTIPLY
#define MCNT  M3[0]
#define MA    M3[1]
#define MB    M3[2]
#define MC2   M3[3]
#define MC1   M3[4]
#define MC0   M3[5]

#elif __TARGET_PROCESSOR == MAXQ7654
#define HARDWARE_MULTIPLY
#define MCNT  M1[0]
#define MA    M1[1]
#define MB    M1[2]
#define MC2   M1[3]
#define MC1   M1[4]
#define MC0   M1[5]

#elif __TARGET_PROCESSOR == MAXQ3100
#undef HARDWARE_MULTIPLY

#else
#error Bad target processor
#endif

        code
        export  ___float32_mul

; Compute Product[R15:R14] = Multiplier[R15:R14] * Multiplicand[R13:R12]
; As multiplication is commutative, register usage on input isn't that
; much of a concern.

___float32_mul proc 

; x*0 == 0 and 0*x == 0
        move    AP, #5
        jz      L$0
        move    AP, #7
        jnz     L$1
L$0     move    A[6], #0
        move    A[7], #0
        ret

; Save working registers.
L$1     move    DP[1], A[3]
        move    DP[0], A[1]
        move    BP, A[0]

; Get the exponents of the multiplier and multiplicand into bits 24-31 of A[1] and A[3],
; dropping the sign bits as we go.  Move the exclusive or of the sign bits
; into bit zero of A[1], as this is the "negative product" flag.
        move    AP, #7
        move    C, ACC.15
        move    AP, #5
        xor     ACC.15
        move    LC[1], PSF  ; save carry
        move    GR, A[7]
        move    AP, #3
        move    ACC, GRH
        move    C, A[7].7
        rlc
        move    GR, A[5]
        move    AP, #1
        move    ACC, GRH
        move    C, A[5].7
        rlc

; Extract the multilplier's mantissa.
#ifdef HARDWARE_MULTIPLY
        move    AP, #7
        and     #$ff
        or      #$80        ; set assumed bit
#else
        move    GR, A[7]
        move    AP, #6
        xch
        move    GRH, A[6]
        and     #$ff00
        move    A[7], GRS
        move    AP, #7
        or      #$8000      ; set assumed bit
#endif

; Extract the multiplicand's mantissa.
        move    AP, #5
        and     #$ff
        or      #$80        ; set assumed bit

; Compute the product's exponent (first cut) which is simply the sum
; of the exponents. R10 and R11 contain the exponents of the multiplier
; and mutiplicand along with some of the significand, so extract the true
; exponents and form sum.
        move    AP, #3
        and     #$ff
        cmp     #$ff        ; if one of the operands is inf/nan, return inf
        je      L$overflow
        move    AP, #1
        and     #$ff
        cmp     #$ff        ; if one of the operands is inf/nan, return inf
        je      L$overflow
        move    AP, #3
        add     A[1]
        sub     #$7e        ; remove extra excess-127 from exponent

; Now perform a 24-bit x 24-bit multiplication of the value in A[5:4]
; by A[7:6], and generating the product into A[1]:A[0]:A[15].  We extend R13
; to 48 bits by appending a zero into R11 so that we generate a 48-bit
; shift register.
; One of the preconditions for the optimized loop below is that R14:R15
; is not zero on entry, therefore we can keep shifting until we find
; a 1 bit in the multiplier.
; Last of all, please don't mess with this code.

#ifdef HARDWARE_MULTIPLY

; Save IC so we can restore IGE on exit.
        move    GR, IC

; Disable interrupts.
        move    IC.0, #0

; Form low product (low1 * low2).
        move    MCNT, #0x23           ; CLD MMAC SUS - MAC multiply
        move    MA, A[4]
        move    MB, A[6]
        move    MA, A[5]              ; sneak in for next MAC

; Shift down high part of low1*low2 ready for MAC.
        move    MC0, MC1
        move    MC1, #0

; Calculate and sum low1 * high2, note MA written above
        move    MB, MB                ; low1 * high2  (a[5] * a[6])

; Calculate and sum high1 * low2
        move    MA, A[4]
        move    MB, A[7]              ; high1 * low2  (a[4] * a[7])
        move    MA, A[5]              ; sneak in for next MAC
        move    A[6], MC0
        move    MC0, MC1
        move    MC1, #0

; Calculate and sum high1 * high2
        move    MB, A[7]

; Wait for result
        nop
        move    A[7], MC0

; Restore IC.
        move    IC, GR

; Get carry for rounding result
        move    APC, #1     ; increment AP modulo 2
        move    AP, #6
        sla
        rlc

#else
; Clear product register.
        move    A[1], #0
        move    A[0], #0
        move    LC[0], #24
        move    APC, #1     ; increment AP modulo 2
        jump    L$2
L$mpy   move    AP, #1
        rrc
        rrc
L$2     move    AP, #5
        sr
        rrc
        jnc     L$djnz
        move    AP, #0
        add     A[6]
        addc    A[7]
L$djnz  djnz    LC[0], L$mpy

; Move product to result registers.
        move    A[7], A[1]
        move    A[6], A[0]
#endif

; Normalize.  Only one normalisation step is required.
        jc      L$round
        move    AP, #3
        sub     #1
        move    AP, #6
        sla
        rlc

; Perform IEEE rounding.  We look at the significand bits that we're
; going to get rid of, which is the least significant 8 bits of R14,
; and decide how to round based on those digits.  If it's less than
; $80, we round down (truncate), if it's more than $80 we round up,
; and if it's exactly $80 we round to the nearest even.
L$round move    APC, #0     ; AP static
        move    A[5], A[6]
        move    AP, #5
        and     #$1ff
        sub     #$100
        jc      L$pack      ; if less than $80, round down
        jnz     L$up        ; if more than $80, round up
        move    AP, #6
        move    C, ACC.9    ; round to even; if already even, no rounding
        jnc     L$pack
L$up    move    AP, #6
        add     #$200       ; round up
        move    AP, #7
        addc    #0
        jnc     L$pack      ; no overflow
        rrc                 ; overflowed, so renormalise and adjust exponent
        move    AP, #6
        rrc
        move    AP, #3
        add     #1

; Check for underflow or overflow.
L$pack  move    AP, #3
        jz      L$underflow  ; zero exponent, flush to zero, no denormals
        js      L$underflow  ; negative exponent, too small and no denormals
        sub     #256
        jnc     L$overflow

; Drop the assumed bit, and jam the exponent into the high byte of A[7].
        move    GR, A[6]
        move    GRL, A[7]
        move    A[6], GRS
        move    GR, A[7]
        move    GRL, A[3]
        move    A[7], GRS

; Now drag the sign bit back in.        
        move    PSF, LC[1]
        move    AP, #7
        rrc
        move    AP, #6
        rrc

; Restore registers and return.
L$rest  move    A[0], BP
        move    A[1], DP[0]
        move    A[3], DP[1]
        ret

; Flush small numbers to zero.
L$underflow
        move   A[7], #0
        move   A[6], #0
        jump   L$rest

; Overflow returns 
L$overflow
        move   A[7], #$7f80
        move   A[6], #0
        jump   L$rest

        endproc
