// Rowley MAXQ20 C Compiler, runtime support.
//
// Copyright (c) 2004 Rowley Associates Limited.
//
// This file may be distributed under the terms of the License Agreement
// provided with this software.
//
// THIS FILE IS PROVIDED AS IS WITH NO WARRANTY OF ANY KIND, INCLUDING THE
// WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.

// 64-bit by 64-bit integer multiply

// Inputs:
//    A[7:4] - multiplier
//    P[3:0] - multiplicand on stack
// Outputs:
//    A[7:4] - product
//    All other non-volatile registers preserved

// Hardware:
//    Minimum = 34 cycles
//    Maximum = 34 cycles
// Software:
//    Minimum = 59 cycles
//    Maximum = 557 cycles

#define MAXQ2000 2000
#define MAXQ3100 3100
#define MAXQ3120 3120
#define MAXQ7654 7654

#if __TARGET_PROCESSOR == MAXQ2000
#define HARDWARE_MULTIPLY
#define MCNT  M2[0]
#define MA    M2[1]
#define MB    M2[2]
#define MC2   M2[3]
#define MC1   M2[4]
#define MC0   M2[5]

#elif __TARGET_PROCESSOR == MAXQ3120
#define HARDWARE_MULTIPLY
#define MCNT  M3[0]
#define MA    M3[1]
#define MB    M3[2]
#define MC2   M3[3]
#define MC1   M3[4]
#define MC0   M3[5]

#elif __TARGET_PROCESSOR == MAXQ7654
#define HARDWARE_MULTIPLY
#define MCNT  M1[0]
#define MA    M1[1]
#define MB    M1[2]
#define MC2   M1[3]
#define MC1   M1[4]
#define MC0   M1[5]

#elif __TARGET_PROCESSOR == MAXQ3100
#undef HARDWARE_MULTIPLY

#else
#error Bad target processor
#endif

        code
        export  ___int64_mul
        
___int64_mul proc

#ifdef HARDWARE_MULTIPLY

; Save IC so we can restore IGE on exit.
        move    GR, IC

; Disable interrupts.
        move    IC.0, #0

; Point DP[1] to second operand.
        move    DP[1], A[8]

; Run using the multiplier.
        move    MCNT, #0x23           ; CLD MMAC SUS - MAC multiply
        move    MA, A[4]
        move    MB, @DP[1]
        move    MA, A[5]              ; sneak in for next MAC
        move    LC[0], MC0            ; stash result of A[4]P[0]
        move    MC0, MC1              ; shift down for next block
        move    MC1, #0
        move    MB, @DP[1]++
        move    MA, A[4]
        move    MB, @DP[1]++
        move    MA, A[4]              ; sneak in for next MAC
        move    LC[1], MC0            ; stash low result of A[5]P[0] + A[4]P[1]
        move    MC0, MC1              ; shift down for next block
        move    MC1, MC2
        move    MB, @DP[1]--
        move    MA, A[5]
        move    MB, @DP[1]--
        move    MA, A[6]
        move    MB, @DP[1]
        move    MA, A[7]              ; sneak in for next MAC
        move    A[7], MC0             ; stash low result of A[6]P[0] + A[5]P[1] + A[4]P[2]
        move    MC0, MC1              ; shift down for next block
        move    MC1, MC2
        move    MB, @DP[1]++
        move    MA, A[6]
        move    MB, @DP[1]++
        move    MA, A[5]
        move    MB, @DP[1]++
        move    MA, A[4]
        move    MB, @DP[1]

; Completed.
        move    A[6], A[7]            ; wait for multiplier to finish
        move    A[7], MC0             ; result of A[7]P[0] + A[6]P[1] + A[5]P[2] + A[4]P[3]
        move    IC, GR                ; re-enable interrupts early
        move    A[5], LC[1]
        move    A[4], LC[0]
        ret

#else

; Save working registers
        move    AP, #8
        sub     #8
        move    DP[1], A[8]
        move    @DP[1], A[12]
        move    @++DP[1], A[13]
        move    @++DP[1], A[14]
        move    @++DP[1], A[15]
        move    @++DP[1], A[0]
        move    @++DP[1], A[1]
        move    @++DP[1], A[2]
        move    @++DP[1], A[3]

; Set AP incrementing modulo 16.
        move    APC, #4

; Load parameter.
        move    NUL, @DP[1]++
        move    A[0], @DP[1]++
        move    A[1], @DP[1]++
        move    A[2], @DP[1]++
        move    A[3], @DP[1]++

; Clear product
        move    AP, #12
        move    ACC, #0
        move    ACC, #0
        move    ACC, #0
        move    ACC, #0

; Treat each of the four words that make up the multiplier separately.
L$0     move    AP, #7
        sr
        jnc     L$1
        move    AP, #12
        add     A[0]
        addc    A[1]
        addc    A[2]
        addc    A[3]
L$1     move    AP, #6
        sr
        jnc     L$2
        move    AP, #13
        add     A[0]
        addc    A[1]
        addc    A[2]
L$2     move    AP, #5
        sr
        jnc     L$3
        move    AP, #14
        add     A[0]
        addc    A[1]
L$3     move    AP, #4
        sr
        jnc     L$4
        move    AP, #15
        add     A[0]

; Update shift register
L$4     move    AP, #0
        sla
        rlc
        rlc
        rlc

; Continue for at most 64 bits (until shifted multiplier goes to zero).
        jnz     L$0
        move    AP, #5
        jnz     L$0
        move    AP, #6
        jnz     L$0
        move    AP, #7
        jnz     L$0

; Move product to result registers.
        move    A[4], A[12]
        move    A[5], A[13]
        move    A[6], A[14]
        move    A[7], A[15]

; Restore working registers
        move    AP, #12
        move    DP[1], A[8]
        move    ACC, @DP[1]++
        move    ACC, @DP[1]++
        move    ACC, @DP[1]++
        move    ACC, @DP[1]++
        move    ACC, @DP[1]++
        move    ACC, @DP[1]++
        move    ACC, @DP[1]++
        move    ACC, @DP[1]++
        move    A[8], DP[1]

; Restore default APC
        move    APC, #0
        ret

#endif

        endproc
