;++ ; MACRO64$PI.M64 -- Based on original algorithm and C program by Remy Dube. ; ; Copyright 1992 Digital Equipment Corporation ; All rights reserved. ; ; This software is furnished under a license and may be used and copied ; only in accordance with the terms of such license and with the ; inclusion of the above copyright notice. This software and any copies ; shall not be provided to any other person. No title to or ownership of ; the software is hereby transferred. The information in this software ; is subject to change without notice. DIGITAL assumes no responsibility ; for the use, functionality or reliability of its software on equipment ; which is not supplied by DIGITAL. ; ; This is a complete MACRO-64 program which computes PI to a specified ; number of digits. This example is one of three installed with MACRO-64: ; ; MACRO64$HELLO.M64 - Simple Hello World program ; Demonstrates calling standard macros ; MACRO64$WHAMI.M64 - Program that displays WHAMI IPR ; Demonstrates system calls ; MACRO64$PI.M64 - Program that computes PI (this program) ; Demonstrates general programming with ; MACRO-64 along with optimization techniques ; ; Digital suggests you become familiar with the concepts illustrated in ; the MACRO64$HELLO.M64 example and the MACRO64$WHAMI.M64 example before ; you attempt to understand this example example. ; ; ; The original algorithm implemented by this program is by Remy Dube. ; This MACRO-64 implementation is adapted from Remy's C implementation. ; While the algorithm and implementation are reasonably fast and accurate, ; the intent of this program is to illustrate MACRO-64 programming ; concepts rather than set world speed or precision records. This program ; illustrates the use of a number of optimization techniques, such as loop ; unrolling and routine inlining. In several instances, the context in ; which these techniques are applied causes the technique to yield only a ; marginal benefit. Furthermore, due to the decreased development and ; maintenance costs and increased portability associated with High Order ; Languages, it would not normally be advisable to write an entire program ; such as this in assembly language. Nonetheless, this program serves to ; illustrate a number of paradigms applicable to efficient programming in ; MACRO-64. ; ; This program calls the DEC C Runtime Library to perform I/O and for ; other ancillary tasks. Note that the DEC C Runtime Library is available ; on all OpenVMS AXP systems, regardless of whether you have purchased and ; installed the DEC C compiler product. ; ; To run this program on an OpenVMS AXP system, use the following commands: ; ; $ macro/alpha_axp/object=pi sys$examples:macro64$pi ; $ link pi ; $ run pi ;-- ;++ ; The $library_debug macro helps work around a problem that ; exists between MACRO-64 V1.0 and the V1.0 symbolic debugger ; where the debugger fills the screen with messages indicating ; "no source line available." Invoking $library_debug near ; the beginning of your module causes these message to occur ; at the beginning of your module, rather than interdispersed ; throughout your routines. ;-- $library_debug ; About 30 blank lines after the invocation of $library_debug ; keeps the "source line not available" messages from ; overwriting your MACRO-64 statements in the source display ; window. ;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; ASSEMBLY-TIME CONSTANTS ;--------------------------------------------------------------------------- ; Turn on optimizations and automatic alignment. .enable align_code, align_data, peephole, schedule PRINT_WIDTH = 100 ; Width of output device BIGINT_BYTES = 8 ; Number of bytes in a quadword BIGINT_SHIFTER = 3 ; L-Shift this much to convert array index to byte offset BIGINT_BITS = BIGINT_BYTES * 8 ; Integer argument registers. IARG0 = "r16" IARG1 = "r17" IARG2 = "r18" IARG3 = "r19" IARG4 = "r20" IARG5 = "r21" ; Floating-point argument registers. FARG0 = "f16" FARG1 = "f17" FARG2 = "f18" FARG3 = "f19" FARG4 = "f20" FARG5 = "f21" ;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; MODULE VARIABLE DATA ; --------------------------------------------------------------------------- $data_section WEIGHT: .quad 0 FILE_VAR: .quad 0 LOG_4: .g_floating 0.0 PRECISION: .quad 0 IMAX: .quad 0 CLUSTER_SIZE: .quad 0 CLUSTER_VECT: .quad 0 CLUSTER_VECT_SIZE: .quad 0 ISTOP: .quad 0 START: .quad 0 FINISH: .quad 0 DURATION: .quad 0 SCRATCH: .byte %repeat(99,<0,>)0 CLK_TCK: .g_floating 100.0 ; From DECC$LIBRARY_INCLUDE:TIME.H G1: .g_floating 1.0 G4: .g_floating 4.0 ;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; UTILITY MACROS ;--------------------------------------------------------------------------- ;++ ; Macro IPOW ; Abstract: Integer power function for assembly-time values. ; Inputs: A - Value to be raised to a power ; B - Power to which to raise value ; Method: Multiply A times itself B times. ;-- .macro IPOW A B RESULT RESULT = %repeat(<>,<*>) 1 .endm IPOW ; The LOOP_UNROLL assembly-time variable is used to control manual loop ; unrolling: Use 0 For no loop unrolling, 1 for normal loop unrolling, ; or a value greater than 1 for a higher, global unroll factor. LOOP_UNROLL = 1 ;++ ; Macro UNROLL ; Abstract: ; Macro UNROLL is used to assist in in manually unrolling a loop. ; It can be beneficial to repeat the body of a loop a number of times ; with branch-outs between each repetition to the loop exit. At ; the least, you gain the benefit of a a branch not taken vs. a branch ; taken. While this effect may not gain very much, when loop unrolling ; is combined with instruction scheduling (.ENABLE SCHEDULE), you may ; be able to schedule portions successive repetitions of the loop in ; parallel. See Appendix B of the MACRO-64 Assembler for OpenVMS AXP ; Systems Reference Manual for more information on this optimization ; technique. ; ; To use UNROLL, you must define a macro that expands to the body of the ; loop. Specify the name of this macro with the BODY argument. You ; must also specify 2 branch instruction statements: 1 that tests a ; condition and branches to the beginning of the loop to continue, and ; another that tests the opposite condition and branches to the end of the ; loop to exit. Specify the loop continuation statement with the ; CONTINUE argument. Specify the loop exit statement with the EXIT ; argument. Specify the target label for the loop-continue statement ; as %%LOOP_START%% and the target for the loop-exit statement as ; %%LOOP_END%%. For instance, the continue statement might be a BNE and ; the end statement would be BEQ, or vice versa. Often, you must precede ; the branch instruction with a comparison instruction. If so, specify ; the comparison instruction statement with the COMPARE argument. ; ; You can use the TEST_FIRST argument to specify the style of the loop. ; Specify 1 to get generate an exit test prior to the first iteration of ; the loop. Specify 0 to always execute the loop at least once. ; ; You can experiment with different values for the FACTOR argument. A ; FACTOR of 0 or 1 results in a single copy of the loop body -- that is, no ; loop unrolling. FACTORs greater than 1 can improve runtime performance ; at the expense of increased code size. However, depending on the ; size of the loop body, too large a FACTOR can cause the repeated loop ; body to overflow the instruction cache, thus negatively impacting ; performance. ; ; In addition, if the loop body contains a call to another routine, ; it is unlikely that the scheduler (.ENABLE SCHEDULE) will be able to ; concurrently schedule successive repetitions of the loop body. As a ; result, the benefit realized by unrolling the loop will likely ; reduce to the difference in cost between a branch taken and a branch ; not taken times the FACTOR you specify. ; ; Thus, the best loops to unroll are those that are in your critical ; performance path, contain a small number of instructions, and do not ; call other routines. ; ; Inputs: ; BODY - Name of the macro that expands to the loop body ; FACTOR - Specific repetition factor to use when unrolling the loop ; CONTINUE - A conditional branch instruction statement to continue the loop ; EXIT - A conditional branch instruction statement to exit the loop ; COMPARE - An optional comparison instruction to use prior to the CONTINUE ; or EXIT instruction ; TEST_FIRST - non-zero means test the exit condition prior to the 1st ; iteration, zero means always execute the loop at least once ; UNROLL_LOOP (assembly variable) - Controls whether or not to unroll loops ; or whether to increase the unroll factor for all unrolled loops ; ; Outputs: ; The body of the loop is repeated FACTOR*UNROLL_LOOP times. ; ; Example usage: ; ; ; First define the loop body as a macro ; .macro LOOP_BODY ; ldq r0, (r5) ; lda r5, 8(r5) ; lda r1, 1(r1) ; .endm LOOP_BODY ; ; ; Now invoke the UNROLL macro to unroll the loop ; UNROLL BODY=LOOP_BODY, FACTOR=3, TEST_FIRST=1, - ; COMPARE = , - ; CONTINUE = , - ; EXIT = ; ; ; The above use of UNROLL would result in the following code ; ; being generated (where LOOP_START and LOOP_END are unique labels ; ; for each invocation of UNROLL): ; ; cmple r0, r2, r22 ; These 2 instructions are suppressed ; beq r22, LOOP_END ; if you specify TEST_FIRST=0 ; LOOP_START: ; ldq r0, (r5) ; lda r5, 8(r5) ; lda r1, 1(r1) ; cmple r0, r2, r22 ; beq r22, LOOP_END ; ldq r0, (r5) ; lda r5, 8(r5) ; lda r1, 1(r1) ; cmple r0, r2, r22 ; beq r22, LOOP_END ; ldq r0, (r5) ; lda r5, 8(r5) ; lda r1, 1(r1) ; cmple r0, r2, r22 ; bne r22, LOOP_START ; LOOP_END: ;-- .macro UNROLL BODY, FACTOR, CONTINUE, EXIT, COMPARE, TEST_FIRST=1 .if not_defined, UNROLL_INDEX UNROLL_INDEX = 0 .else UNROLL_INDEX = UNROLL_INDEX + 1 .endc LOOP_START = "UNROLL_LOOP_START_%integer(UNROLL_INDEX)" LOOP_END = "UNROLL_LOOP_END_%integer(UNROLL_INDEX)" UNROLL_HELPER , , <%LOOP_START%>, <%LOOP_END%>, - , , , .endm UNROLL ; Helper macro for loop unrolling .macro UNROLL_HELPER BODY, FACTOR, START, END, CONTINUE, EXIT, COMPARE, - TEST_FIRST .if ne, COMPARE EXIT .endc START: .repeat <-1> * LOOP_UNROLL BODY COMPARE EXIT .endr BODY COMPARE CONTINUE END: .endm UNROLL_HELPER ;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; MODULE LOCAL ROUTINES ;--------------------------------------------------------------------------- $routine ASK_NUMBER_OF_DIGITS, kind=stack, local=true, - data_section_pointer=true, - saved_regs= ;++ ; Abstract: Inquire from the user terminal who many digits to compute. ; Inputs: Terminal I/O ; Outputs: PRECISION - Number of digits to compute ;-- $linkage_section 10$: .asciz "How many digits do you want to compute? " 20$: .asciz "Computing PI with %-d digits \x0A\x0A" 30$: .asciz "Please specify a positive, non-zero integer value.\x0A" $code_section .base r27, $ls ; 1st access link sect w/r27 mov r27, r2 ; Later use r2 for link sect ldq r3, $dp ; Access... .base r3, $ds ; ...data section .base r2, $ls ; Now use r2 for link sect 100$: $call DECC$GPRINTF, args=10$/a ; Print "How many?" $call DECC$GETS, args=SCRATCH/a ; Get answer $call DECC$ATOL, args=SCRATCH/a ; Convert to longword bgt r0, 200$ $call DECC$GPRINTF, args=30$/a ; Print "be positive" br 100$ 200$: stq r0, PRECISION ; Set precision ; Output "Computing with N digits" $call DECC$GPRINTF, args=<20$/a, r0/l>, scratch_regs= $return $end_routine ASK_NUMBER_OF_DIGITS $routine INITIALIZE, kind=stack, local=true, - saved_regs=, - data_section_pointer=true ;++ ; Abstract: Start up processing. ; Inputs: ; PRECISION ; Outputs: ; LOG_4 - log10(4.0) ; IMAX - PRECISION/LOG_4 ; WEIGHT - Scale factor for various computations ; CLUSTER_SIZE - Number of decimal digits represented in a quadword ; CLUSTER_VECT_SIZE - Number of quadwords needed for spec'd precision ; CLUSTER_VECT - Ptr to array of quadwords ; CLUSTER_VECT[0..last] - Initialized to 0 ; FILE_VAR - Opened for output to PI.DAT ;-- $linkage_section POWX = "r4" ; r4 holds pow(2,BIGINT_BITS-4)/IMAX*10 W = "r5" ; r5 is temp for WEIGHT CS = "r6" ; r6 is temp for CLUSTER_SIZE CVS = "r7" ; r7 is temp for CLUSTER_VECT_SIZE IPOW 2 T ; T=pow(2,BIGINT_BITS-4) 10$: .quad T 20$: .asciz "pi.dat" 30$: .asciz "w" 40$: .asciz " PI WITH %-d digits \x0A \x0A" $code_section .base r27, $ls ; 1st access link sect w/r27 mov r27, r2 ; Later use r2 for link sect ldq r3, $dp ; Access... .base r3, $ds ; ...data section .base r2, $ls ; Now use r2 for link sect .base r3, $ds ; ...data section $call DECC$GLOG10, args=G4/g ; r0=LOG10(4.0) stg f0, LOG_4 ; LOG_4=LOG10(4.0) ldt f1, PRECISION ; Convert precision to... cvtqg f1, f1 ; ...G float divg f1, f0, f1 ; f1=PRECISION/LOG_4 cvtgq/c f1, f1 ; Convert to quadword stt f1, IMAX ; IMAX=PRECISION/LOG_4 ldq %IARG1%, IMAX ; IARG1=IMAX mulq %IARG1%, #10, %IARG1% ; IARG1=IMAX*10 ;++ ; The OTS$DIV_L routine takes 2 quad word arguments and returns ; in r0 the quadword quotient of the first argument divided by the ; second argument. OTS$DIV_L is in the sharable library LIBOTS.EXE, ; which is available at link time by default. A similar routine, ; OTS$DIV_I, is available in that same library for longword integer ; division. ;-- ; r0=pow(2,BIGINT_BITS-4)/(IMAX*10) $call OTS$DIV_L, args=<10$/q, %IARG1%/q> ; POWX=pow(2,BIGINT_BITS-4)/(IMAX*10) mov r0, %POWX% mov 1, %W% ; WEIGHT=1 mov 0, %CS% ; CLUSTER_SIZE=0 ; Loop computing WEIGHT and CLUSTER_SIZE .macro LOOP_BODY ; Loop body for unrolling mulq %W%, #10, %W% ; WEIGHT=WEIGHT*10 lda %CS%, 1(%CS%) ; CLUSTER_SIZE=CLUSTER_SIZE+1 .endm LOOP_BODY UNROLL BODY=LOOP_BODY, FACTOR=10, TEST_FIRST=1, - COMPARE = , - CONTINUE = , - EXIT = stq %W%, WEIGHT ; Store local copies of stq %CS%, CLUSTER_SIZE ; WEIGHT and CLUSTER_SIZE ldq %IARG0%, PRECISION ; IARG0=PRECISION addq %CS%, %IARG0%, %IARG0% ; IARG0=PRECISION+CLUSTER_SIZE lda %IARG0%, -1(%IARG0%) ; IARG0=PRECISION+CLUSTER_SIZE-1 ; r0=(PRECISION+CLUSTER_SIZE-1)/CLUSTER_SIZE $call OTS$DIV_L, args=<%IARG0%/q,%CS%/q> ; r0=((PRECISION+CLUSTER_SIZE-1)/CLUSTER_SIZE)+1 lda r0, 1(r0) ; CLUSTER_VECT_SIZE=((PRECISION+CLUSTER_SIZE-1)/CLUSTER_SIZE)+1 mov r0, %CVS% stq r0, CLUSTER_VECT_SIZE ; Allocate CLUSTER_VECT sll r0, #BIGINT_SHIFTER, r0 $call DECC$MALLOC, args=r0/l, scratch_regs= stq r0, CLUSTER_VECT CCA = "r0" ; r0 is current cluster addr ; Initialize CLUSTER_VECT mov %CVS%, r1 ; r1=CLUSTER_VECT_SIZE 100$: stq r31, (%CCA%) ; *CCA = 0 lda r1, -1(r1) ; --r1 lda r0, BIGINT_BYTES(R0) ; CCA++ bgt r1, 100$ ; Branch if not done $call DECC$FOPEN, args=<20$/a, 30$/a> ; Open output file stq r0, FILE_VAR $call DECC$GFPRINTF, args=, - scratch_regs= $return $end_routine INITIALIZE $routine MULTIPLY_AND_DIVIDE, kind=stack, - saved_regs=, - data_section_pointer=true, - local=true ;++ ; Abstract: Walk the CLUSTER_VECTOR array performing a series of ; multiplications and divisions. ; Inputs: ; IARG0 - NUMERATOR ; IARG1 - DENOMINATOR ; ISTOP - Indicator of how much to compute ; WEIGHT ; CLUSTER_VECT ; CLUSTER_VECT[0..ISTOP] ; Outputs: ; CLUSTER_VECT[0..ISTOP] ;-- .base r27, $ls ; 1st access link sect w/r27 mov r27, r2 ; Later use r2 for link sect ldq r3, $dp ; Access... .base r3, $ds ; ...data section .base r2, $ls ; Now use r2 for link sect TEMP = "r4" ; r4 is variable 'temp' I = "r5" ; r5 is variable 'i' W = "r6" ; r6 is local WEIGHT CCA = "r7" ; r7 is curr cluster addr NUM = "r8" ; r8 is local copy of NUM DEN = "r9" ; r9 is local copy of DEN CC = "r10" ; r10 is curr cluster clr %TEMP% ; TEMP=0 ldq %I%, ISTOP ; I=ISTOP lda %I%, 1(%I%) ; I=ISTOP+1 ldq %W%, WEIGHT ; Load local copy of WEIGHT ldq %CCA%, CLUSTER_VECT ; CCA = &CLUSTER_VECT[0] mov %IARG0%, %NUM% ; Get local copy of NUM mov %IARG1%, %DEN% ; Get local copy of DEN .macro LOOP_BODY ; Unroll loop mulq %TEMP%, %W%, %TEMP% ; TEMP *= WEIGHT ldq %CC%, (%CCA%) ; CC = *CCA mulq %NUM%, %CC%, r22 ; r22 = CC * NUM addq %TEMP%, r22, %TEMP% ; TEMP += CC * NUM $call OTS$DIV_L, - ; r0=TEMP/DEN args=<%TEMP%/q, %DEN%/q> stq r0, (%CCA%) ; *CCA=TEMP/DEN mulq %DEN%, r0, r0 ; r0=DEN * (*CCA) subq %TEMP%, r0, %TEMP% ; TEMP -= DEN * (*CCA) lda %CCA%, BIGINT_BYTES(%CCA%); Advance to next cluster lda %I%, -1(%I%) ; Decrement loop index .endm LOOP_BODY UNROLL BODY=LOOP_BODY, FACTOR=8, TEST_FIRST=1, - CONTINUE = , - EXIT = $return $end_routine MULTIPLY_AND_DIVIDE ;++ ; Macro PROPAGATE_CARRY ; Abstract: Walk the CLUSTER_VECTOR array in reverse order propagating ; a carry out from one cluster to its predecessor as necessary. ; (A manually inlined routine.) ; Inputs: ; %CA% - register that holds CLUSTER_VECT ; WEIGHT ; CLUSTER_VECT[0..last] ; Outputs: ; CLUSTER_VECT[0..last] ;-- .macro PROPAGATE_CARRY CCA = "r0" ; r0 is addr of current cluster CC = "r1" ; r1 is current cluster CARRY = "r22" ; r22 is 'CARRY' variable W = "r23" ; r23 is local WEIGHT clr %CARRY% ; CARRY=0 ldq %W%, WEIGHT ; load local copy of WEIGHT ldq %CCA%, CLUSTER_VECT_SIZE; CCA=sizeof(CLUSTER_VECT) lda %CCA%, -1(%CCA%) ; CCA=sizeof(CLUSTER_VECT)-1 ; CCA=(sizeof(CLUSTER_VECT)-1)*sizeof(CLUSTER_VECT[0]) sll %CCA%, #BIGINT_SHIFTER, %CCA% addq %CA%, %CCA%, %CCA% ; CCA --> last cluster .macro LOOP1_BODY ; Unroll loop 1 ldq %CC%, (%CCA%) ; r22 = *CCA addq %CC%, %CARRY%, %CC% ; r22 = *CCA + CARRY clr %CARRY% ; CARRY=0 .macro LOOP2_BODY ; Unroll loop 2 subq %CC%, %W%, %CC% ; CC -= WEIGHT lda %CARRY%, 1(%CARRY%); CARRY++ .endm LOOP2_BODY UNROLL BODY=LOOP2_BODY, FACTOR=8, TEST_FIRST=1, - COMPARE = , - CONTINUE = , - EXIT = stq %CC%, (%CCA%) ; Save current cluster lda %CCA%, -BIGINT_BYTES(%CCA%) ; Backup 1 cluster .endm LOOP1_BODY UNROLL BODY=LOOP1_BODY, FACTOR=2, TEST_FIRST=1, - COMPARE = , - CONTINUE = , - EXIT = .endm PROPAGATE_CARRY $routine COMPUTE, kind=stack, - saved_regs=, - local=true, - data_section_pointer=true, - size=$rsa_end+16 ;++ ; Abstract: Build up a binary representation of PI in the CLUSTER_VECT array. ; ; Inputs: ; PRECISION ; CLUSTER_SIZE ; IMAX ; LOG_4 ; CLUSTER_VECT[0] ; ; Outputs: ; CLUSTER_VECT[0..last] ; ; Notes: ; $ROUTINE defines the assembly-time variable $RSA_END as the offset ; from the beginning of the stack frame to just beyond the end of the ; the register save area. You can use this variable both to define ; the stack size with $ROUTINE's SIZE argument as above, and in ; referencing the stack beyond the register save area. In this case, ; we have allocated an additional 16 bytes of stack storage beyond the ; end of the register-save area. Note that the size of the stack frame ; must be an even multiple of 16. ;-- $linkage_section I = "r4" ; r4 is variable 'I' CS = "r5" ; r5 is local CLUSTER_SIZE TMP = "r6" ; r6 is variable 'TMP' CA = "r7" ; r7 is local &CLUSTER_VECT[0] PCC = "r8" ; r8 is variable 'PCC' I2M1 = "r9" ; r9 is 2*I-1 I4 = "r10" ; r10 is 4*I and 4*I+2 I.G = "f2" ; f2 is variable 'I.G', G float ; shadow of I L4 = "f3" ; f3 is local LOG_4 ONE.G = "f4" ; f4 holds 1.0 in G floating $code_section .base r27, $ls ; 1st access link sect w/r27 mov r27, r2 ; Later use r2 for link sect ldq r3, $dp ; Access... .base r3, $ds ; ...data section .base r2, $ls ; Now use r2 for link sect ldq %I%, IMAX ; I=IMAX ldt %I.G%, IMAX ; I.G shadows I cvtqg %I.G%, %I.G% ; Convert to G format ldg %L4%, LOG_4 ; Load local LOG_4 ldq %CS%, CLUSTER_SIZE ; Load local CLUSTER_SIZE ldq %TMP%, PRECISION ; TMP=PRECISION mov %TMP%, %PCC% ; PCC=PRECISION lda %TMP%, 2(%TMP%) ; TMP=PRECISION+2 addq %CS%, %TMP%, %TMP% ; TMP=CLUSTER_SIZE+PRECISION+2 ldq %CA%, CLUSTER_VECT ; CA = &CLUSTER_VECT[0] addq %PCC%, %CS%, %PCC% ; PCC=PRECISION+CLUSTER_SIZE lda %PCC%, -1(%PCC%) ; PCC=PRECISION+CLUSTER_SIZE-1 ; r0=(PRECISION+CLUSTER_SIZE-1)/CLUSTER_SIZE $call OTS$DIV_L, args=<%PCC%/q,%CS%/q> ; PCC=(PRECISION+CLUSTER_SIZE-1)/CLUSTER_SIZE mov r0, %PCC% ldg %ONE.G%, G1 ; Load ONE.G with 1.0 .macro LOOP_BODY ?LABEL ; Unroll loop mulg %I.G%, %L4%, f0 ; f0=I*LOG_4 cvtgq/c f0, f0 ; Convert to integer ;++ ; We must store F0 to memory in order to load it into an ; integer register. Our invocation of the $ROUTINE macro ; above allocated an additional 16 bytes of stack storage, ; 8 of which we will use now for that purpose. We can ; reference the stack beyond the end of the register save ; area relative to the frame pointer, FP, using the ; $RSA_END assembly-time variable that is defined by the ; $ROUTINE macro. ;-- stt f0, $rsa_end(fp) ; Put to memory ldq %IARG0%, $rsa_end(fp) ; Read back to I reg ; IARG0=(PRECISION-I*LOG_4+2+CLUSTER_SIZE) subq %TMP%, %IARG0%, %IARG0% $call OTS$DIV_L, args=<%IARG0%/q, %CS%/q> cmple r0, %PCC%, r1 ; result <= PCC? bne r1, LABEL ; Yes, branch mov %PCC%, r0 ; No, use PCC LABEL: stq r0, ISTOP ; ISTOP=min(result,PCC) sll %I%, #1, %I2M1% ; I2M1 = 2*I lda %I2M1%, -1(%I2M1%) ; I2M1 = 2*I-1 sll %I%, #2, %I4% ; I4 = 4*I $call MULTIPLY_AND_DIVIDE, args=<%I2M1%/q, %I4%/q>, - local=true lda %I4%, 2(%I4%) ; I4=4*I+2 $call MULTIPLY_AND_DIVIDE, args=<%I2M1%/q, %I4%/q>, - local=true ldq r0, (%CA%) ; r0=CLUSTER_VECT[0] lda r0, 3(R0) ; r0=CLUSTER_VECT[0] + 3 stq r0, (%CA%) ; CLUSTER_VECT[0] += 3 lda %I%, -1(%I%) ; I-- subg %I.G%, %ONE.G%, %I.G% ; I.G shadows I .endm LOOP_BODY UNROLL BODY=LOOP_BODY, FACTOR=4, TEST_FIRST=1, - CONTINUE = , - EXIT = PROPAGATE_CARRY ; Inlined routine call $return $end_routine COMPUTE $routine PRINT_RESULT, kind=stack, - saved_regs=,- local=true, - data_section_pointer=true, - size=$rsa_end+16 ;++ ; Abstract: Print the results of the computation. ; Inputs: ; START - seconds at start of computation ; FINISH - seconds at end of computation ; FILE_VAR ; PRECISION ; WEIGHT ; CLUSTER_SIZE ; CLUSTER_VECT_SIZE ; CLUSTER_VECT[0..last] ; PRINT_WIDTH ;-- $linkage_section SECONDS = "f2" ; f2 is variable 'SECONDS' MINUTES = "r4" ; r4 is variable 'MINUTES' HOURS = "r5" ; r5 is variable 'HOURS' I = "r6" ; r6 is variable 'I' CCA = "r7" ; r7 is variable 'CCA' CC = "r8" ; r8 is variable 'CC' CLIM = "r9" ; r9 is variable 'CLIM' J = "r10" ; r10 is variable 'J' CS = "r11" ; r11 is local CLUSTER_SIZE W = "r12" ; r12 is local WEIGHT COUNT = "r13" ; r13 is variable COUNT P = "r14" ; r14 is local PRECISION 10$: .asciz - " Calculation time : %-d hours %-d minutes %f seconds \x0A \x0A" 20$: .asciz "%-d." 30$: .asciz "%-d" 40$: .asciz "\x0A " 50$: .asciz "\x0A" $code_section .base r27, $ls ; 1st access link sect w/r27 mov r27, r2 ; Later use r2 for link sect ldq r3, $dp ; Access... .base r3, $ds ; ...data section .base r2, $ls ; Now use r2 for link sect ldq r22, FINISH ; r22=FINISH ldq r23, START ; r23=START subq r22, r23, r23 ; r23=FINISH-START stq r23, $rsa_end(fp) ; Convert FINISH-START ldt f0, $rsa_end(fp) ; ...to... cvtqg f0, f0 ; ...G floating ldg f1, CLK_TCK ; f1=CLK_TCK divg f0, f1, %SECONDS% ; SECONDS=(FINISH-START)/CLK_TCK cvtgq/c %SECONDS%, f0 ; Convert... stt f0, $rsa_end(fp) ; ...to integer $call OTS$DIV_L, args=<$rsa_end(fp)/q, 60/a> ; r0=SECONDS/60 mov r0, %MINUTES% ; MINUTES=SECONDS/60 mulq r0, #60, r0 ; r0=MINUTES*60 stq r0, $rsa_end(fp) ; Convert... ldt f0, $rsa_end(fp) ; ...to... cvtqg f0, f0 ; G floating subg %SECONDS%, f0, %SECONDS% ; SECONDS -= MINUTES*60 $call OTS$DIV_L, - ; r0=MINUTES/60 args=<%MINUTES%/q, 60/a> mov r0, %HOURS% ; HOURS=MINUTES/60 mulq %HOURS%, #60, r0 ; r0=HOURS*60 subq %MINUTES%, r0, %MINUTES% ; MINUTES -= HOURS*60 $call DECC$GFPRINTF, - args= ldq %CCA%, CLUSTER_VECT ; CCA = &CLUSTER_VECT[0] ldq %CLIM%, CLUSTER_VECT_SIZE ; CLIM=sizeof(CLUSTER_VECT) ; CLIM=sizeof(CLUSTER_VECT)*sizeof(CLUSTER_VECT[0]) sll %CLIM%, #BIGINT_SHIFTER, %CLIM% addq %CCA%, %CLIM%, %CLIM% ; CLIM --> beyond last cluster ldq %CC%, (%CCA%) ; CC = CLUSTER_VECT[0] $call DECC$GFPRINTF, - args= mov 1, %I% ; I=1 ldq %CS%, CLUSTER_SIZE ; Load local cluster size lda %CCA%, BIGINT_BYTES(%CCA%) ; Advance to next cluster cmplt %CCA%, %CLIM%, r0 ; Beyond last cluster? beq r0, 400$ ; Yes, branch to exit loop 1 ldq %W%, WEIGHT ; Load local copy of WEIGHT ldq %P%, PRECISION ; Load local copy of PRECISION clr %COUNT% ; COUNT=0 100$: ldq %CC%, (%CCA%) ; CC = *CCA mov 1, %J% ; J=1 cmple %J%, %CS%, r1 ; J<=CLUSTER_SIZE? beq r1, 300$ ; No, branch to exit loop 2 200$: mulq %CC%, #10, %CC% ; CC *= 10 $call OTS$DIV_L, args=<%CC%/q, %W%/q> ; r0=CC/WEIGHT mov r0, %IARG2% ; IARG2=CC/WEIGHT mulq %W%, r0, r0 ; r0=IARG2*WEIGHT subq %CC%, r0, %CC% ; CC -= IARG2*WEIGHT cmple %COUNT%, %P%, r1 ; COUNT<=PRECISION? beq r1, 210$ ; No, branch sextl %IARG2%, %IARG2% ; Convert to longword $call DECC$GFPRINTF, - args= 210$: lda %COUNT%, 1(%COUNT%) ; COUNT++ cmplt %I%, #PRINT_WIDTH, r0 ; I>=PRINT_WIDTH? bne r0, 220$ ; No, branch clr %I% ; I=0 $call DECC$GFPRINTF, - ; Print linefeed + 2 spaces args= 220$: lda %I%, 1(%I%) ; I++ lda %J%, 1(%J%) ; J++ cmple %J%, %CS%, r0 ; J<=CLUSTER_SIZE? bne r0, 200$ ; Yes, branch to continue loop 2 300$: lda %CCA%, BIGINT_BYTES(%CCA%) ; Advance to next cluster cmplt %CCA%, %CLIM%, r0 ; Beyond last cluster? bne r0, 100$ ; No, branch to continue loop 1 $call DECC$GFPRINTF, - ; Print linefeed args= 400$: $return $end_routine PRINT_RESULT $routine CLEANUP, kind=stack, - local=true, - data_section_pointer=true ;++ ; Abstract: Perform shutdown trivia. ; Inputs: FILE_VAR ; Outputs: The output file is closed. ;-- .base r27, $ls ; Access linkage section ldq r22, $dp ; Access... .base r22, $ds ; ...data section $call DECC$FCLOSE, args=FILE_VAR/q $return $end_routine CLEANUP ;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; EXTERNAL ROUTINES ;--------------------------------------------------------------------------- $routine PI, kind=stack, saved_regs=, - data_section_pointer=true ;++ ; Abstract: Program entry point. ; Outputs: ; PI.DAT with the specified number of decimal places. ;-- .base r27, $ls ; 1st access link sect w/r27 mov r27, r2 ; Later use r2 for link sect ldq r3, $dp ; Access... .base r3, $ds ; ...data section .base r2, $ls ; Now use r2 for link sect $call ASK_NUMBER_OF_DIGITS, local=true $call INITIALIZE, local=true $call DECC$CLOCK stq r0, START $call COMPUTE, local=true $call DECC$CLOCK stq r0, FINISH $call PRINT_RESULT, local=true $call CLEANUP, local=true mov 1, r0 ; Return success status $return $end_routine PI .end PI