// Rowley MAXQ20 C Compiler, runtime support.
//
// Copyright (c) 2004 Rowley Associates Limited.
//
// This file may be distributed under the terms of the License Agreement
// provided with this software.
//
// THIS FILE IS PROVIDED AS IS WITH NO WARRANTY OF ANY KIND, INCLUDING THE
// WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.

// Unsigned 32-bit by 32-bit integer multiply to 64-bit product

// Inputs:
//    A[7]:A[6] - multiplier
//    A[5]:A[4] - multiplicand
// Outputs:
//    A[7]:A[6]:A[5]:A[4] - product
//    All other non-volatile registers preserved

// Hardware:
//    Minimum = 23 cycles
//    Maximum = 23 cycles
// Software:
//    Minimum = 38 cycles
//    Maximum = 362 cycles

#define MAXQ2000 2000
#define MAXQ3100 3100
#define MAXQ3120 3120
#define MAXQ7654 7654

#if __TARGET_PROCESSOR == MAXQ2000
#define HARDWARE_MULTIPLY
#define MCNT  M2[0]
#define MA    M2[1]
#define MB    M2[2]
#define MC2   M2[3]
#define MC1   M2[4]
#define MC0   M2[5]

#elif __TARGET_PROCESSOR == MAXQ3120
#define HARDWARE_MULTIPLY
#define MCNT  M3[0]
#define MA    M3[1]
#define MB    M3[2]
#define MC2   M3[3]
#define MC1   M3[4]
#define MC0   M3[5]

#elif __TARGET_PROCESSOR == MAXQ7654
#define HARDWARE_MULTIPLY
#define MCNT  M1[0]
#define MA    M1[1]
#define MB    M1[2]
#define MC2   M1[3]
#define MC1   M1[4]
#define MC0   M1[5]

#elif __TARGET_PROCESSOR == MAXQ3100
#undef HARDWARE_MULTIPLY

#else
#error Bad target processor
#endif

        code
        export  ___uint64_mul_32x32
        
___uint64_mul_32x32 proc

#ifdef HARDWARE_MULTIPLY
        
; Save IC so we can restore IGE on exit.
        move    GR, IC

; Disable interrupts.
        move    IC.0, #0

; Form low product (low1 * low2).
        move    MCNT, #0x23           ; CLD MMAC SUS - MAC multiply
        move    MA, A[4]
        move    MB, A[6]
        move    MA, A[4]              ; sneak in next MAC
        move    LC[0], MC0            ; low result of low1*low2  (a[4] * a[6])

; Shift down high part of low1*low2 ready for MAC.
        move    MC0, MC1              ; shift result for next MAC
        move    MC1, #0

; Calculate and sum low1 * high2, note MA written above
        move    MB, A[7]              ; high1 * low2  (a[4] * a[7])
        move    MA, A[5]
        move    MB, A[6]              ; low1 * high2  (a[5] * a[6])

; Grab intermediate result.
        move    A[4], LC[0]
        move    MA, A[5]
        move    A[5], MC0

; Shift down and sum high1 * high2, note MA written above
        move    MC0, MC1              ; shift result for next MAC
        move    MC1, MC2
        move    MC2, #0
        move    MB, A[7]

; Wait for result
        nop
        move    A[7], MC1
        move    A[6], MC0

; Restore IC and return.
        move    IC, GR
        ret

#else

; Multiplication by clockwork.  Save working registers.
        move    LC[0], A[0]
        move    LC[1], A[1]
        move    DP[0], A[2]
        move    DP[1], A[3]
        move    BP, A[15]

; Set AP incrementing modulo 16.
        move    APC, #4

; Extend multiplicand by 16 bits
        move    A[15], #0

; Clear product
        move    A[0], #0
        move    A[1], #0
        move    A[2], #0
        move    A[3], #0
        
; Treat each half of the multiplier separately
L$0     move    AP, #6
L$x     sr
        jnc     L$1
        move    AP, #1
        add     A[4]
        addc    A[5]
        addc    A[15]
L$1     move    AP, #7
        sr
        jnc     L$2
        move    AP, #0
        add     A[4]
        addc    A[5]
        addc    A[15]
        addc    #0

; Update shift register
L$2     move    AP, #4
        sla
        rlc
        move    AP, #15
        rlc

; Continue until all bits shifted out of multiplicand.
        move    AP, #6
        jnz     L$x
        move    AP, #7
        jnz     L$0

; Move product to result registers.
        move    A[7], A[3]
        move    A[6], A[2]
        move    A[5], A[1]
        move    A[4], A[0]

; Restore working registers
        move    A[0], LC[0]
        move    A[1], LC[1]
        move    A[2], DP[0]
        move    A[3], DP[1]
        move    A[15], BP

; Restore default APC
        move    APC, #0
        ret

#endif

        endproc
