1) Long Division by 9:

Data:
  AL: lower address of the considered number.
  AH: upper address of the considered number.
  C1 = &1C71C71C.
  C2 = 2*C1+1 = &38E38E39.
  PM: pointer to the 9 products NA*C1 for 0 <= NA <= 8.

Temporary Registers:
  NA: 4 MSBs of the current partial dividends before the division, remainder after the division.
  NB: 32 LSBs of the current partial dividends before the division, quotient after the division.
  TB: partial quotient (yielded by the 4 MSBs).

Unmodified Registers: AL, C1, C2, PM.

-- DIV9 --------------------------------------------------------------------
		MOV	NA, #0				;Remainder = 0 at the beginning.
div9_loop	LDR	NB, [AH]			;LSBs of the dividend.
		LDR	TB, [PM, NA, LSL #2]		;\
		ADDS	NB, NB, NA, LSL #2		;| We get rid of the 4 MSBs of the dividend
		SUBCS	NB, NB, #36			;| --> partial quotient TB (no remainder).
		ADDCS	TB, TB, #4			;/
		SUB	NB, NB, NB, LSL #3		;\
		ADD	NB, NB, NB, LSL #6		;| 32-bit multiplication
		ADD	NB, NB, NB, LSL #12		;| NB * (2a+1).
		ADD	NB, NB, NB, LSL #24		;/
		CMP	NB, C2
		SUBCS	NB, NB, C2
		SBC	NA, C2, C1, LSL #1		;NA = C flag (generated by the CMP).
		CMP	NB, C2, LSL #1
		SUBCS	NB, NB, C2, LSL #1
		ADDCS	NA, NA, #2
		CMP	NB, C2
		SUBCS	NB, NB, C2
		ADDCS	NA, NA, #1
		CMP	C1, NB
		SBCCC	NB, NB, C1
		ADDCC	NA, NA, #5
		ADD	NB, NB, TB
		STR	NB, [AH], #-4			;Store the quotient.
		CMP	AH, AL
		BCS	div9_loop			;Loop...
----------------------------------------------------------------------------


2) Long Division by a 32-bit Number:

Data:
  AH: upper address of the dividend.
  BL: lower address of the quotient.
  BH: upper address of the quotient.
  SL: left shift count to normalize the divisor.
  SR: 32-SL.
  DV: normalized divisor.
  QT: pointer to the table of the 8-bit partial quotients.
  PL: pointer to the table of the LSBs of the products NQ.DV.

Temporary Registers:
  (NA,NB): current normalized partial dividend.
  NK: will be the 32-bit partial quotient (8-bit partial quotient for stage 1).
  NQ: 8-bit partial quotient (stages 2, 3, 4).
  NT: temporary.

Unmodified Registers: BL, SL, SR, DV, QT, PL.

-- LDIV --------------------------------------------------------------------
		GBLA	ldiv_n
		MOV	NA, #0

ldiv_loop	LDR	NB, [AH], #-4			;Next digit.
		ORR	NA, NA, NB, LSR SR		;\ Normalize this digit --> the partial
		MOV	NB, NB, LSL SL			;/ dividend (NA,NB) is normalized.

		LDRB	NK, [QT, NA, LSR #23]		;Minimal partial quotient (real partial quotient = NK or NK+1).
		LDR	NT, [PL, NK, LSL #2]		;NT = 32 LSBs of NQ.DV.
		RSB	NA, NT, NA, LSL #7		;\ 8-bit shift of (NA,NB) to the left (normalization),
		MOVS	NA, NA, LSL #1			;| and subtract NT at the same time (C = 1 if there is
		ADD	NA, NA, NB, LSR #24		;| an overflow in the subtraction; in this case, the
		MOV	NB, NB, LSL #8			;/ real quotient was NK+1).
		CMPCC	NA, DV
		SUBCS	NA, NA, DV			;Correction of the partial dividend.
		ADDCS	NK, NK, #1			;Correction of the partial quotient.

ldiv_n		SETA	2				;These instructions will be assembled 3 times.
		WHILE	ldiv_n >= 0
		LDRB	NQ, [QT, NA, LSR #23]		;Cf above (idem for the following instructions).
		LDR	NT, [PL, NQ, LSL #2]		;Note: NQ is used instead of NK.
		RSB	NA, NT, NA, LSL #7
		MOVS	NA, NA, LSL #1
		ADD	NA, NA, NB, LSR #24
		[	ldiv_n != 0
		MOV	NB, NB, LSL #8			;This instruction is useless in the last stage.
		]
		CMPCC	NA, DV
		SUBCS	NA, NA, DV			;Correction of the partial dividend.
		ADC	NK, NQ, NK, LSL #8		;Correction + update of the 32-bit partial quotient.
ldiv_n		SETA	ldiv_n - 1
		WEND

		STR	NK, [BH], #-4			;Store the 32-bit partial quotient.
		CMP	BH, BL
		BCS	ldiv_loop			;Loop...
----------------------------------------------------------------------------


3) Computation of the Table of the Partial Quotients:

Data:
  QT: pointer to the table that will be created.
  DV: normalized divisor.

Temporary Registers:
  (NA,NB): multiple of 2*DV (NA: 9 MSBs of the dividend).
  NQ: current partial quotient.

Unmodified Registers: DV.

-- CQUOT -------------------------------------------------------------------
		MVN	NA, #0
		MVN	NB, #0
		MOV	NQ, #0
cquot_loop	STRB	NQ, [QT], #1
		ADDS	NB, NB, DV, LSL #1
		ADC	NA, NA, #1
		STRCSB	NQ, [QT], #1
		ADD	NQ, NQ, #1
		CMP	NQ, #&100
		BNE	cquot_loop
----------------------------------------------------------------------------


4) Computation of the Table of the Products:

Data:
  PL: pointer to the table that will be created.
  DV: normalized divisor.

Temporary Registers:
  T1: LSBs of the current product; the 7 MSBs are not represented.
  T2: counter.

Unmodified Registers: DV.

-- CPROD -------------------------------------------------------------------
		MOV	T1, #0
		MOV	T2, #256
cprod_loop	STR	T1, [PL], #4
		ADD	T1, T1, DV, LSR #1
		SUBS	T2, T2, #1
		BNE	cprod_loop
----------------------------------------------------------------------------


5) Conversion base 2 --> base 10:

Data:
  AH: upper address + 4 of the binary number.
  TL: lower address of the temporary decimal number T.
  TH: upper address + 4 of the temporary decimal number T.
  RH: upper address + 4 of the result R, = TH + 4.
  Z1: &01010101.
  RL: lower address where the non-interlaced result will be stored.

Temporary Registers:
  T1: misc.
  T2: misc.
  T3: misc.
  T4: misc, used in the last part.
  Z5: &05050505, calculated and used in the last part.
  TA: current word of the binary number.
  TK: current address of the least significant non-zero digits of T.
  TT: pointer to a word of T or R.
  CT: counter.
  FL: flag for the division.

Unmodified Registers: TL, RH, Z1.

Note:
  T4 and Z5 must be mapped on 2 registers corresponding to AH, TH, TK, TT and CT.
  The words of T must be initially equal to &80000000.
  The words of R must be initially equal to 0.
  The last 4 digits of R will be equal to 0, so the lengths of the decimal numbers must
  be equal to the wanted number of digits + 4 (there will still be round errors).

-- CONV --------------------------------------------------------------------
		MOV	T1, #&04000000
		STR	T1, [TH, #-4]!			;Initial value of T: 4.000.
		MOV	TK, TH
		MOV	CT, #0
		MOV	FL, #0
conv_outer1	LDR	TA, [AH, #-4]!			;Read the next word of the binary number.
conv_inner1	EORS	FL, FL, #1
		BNE	conv_readbit

		MOV	TT, TH				;T will be divided by 4.
		LDR	T1, [TT]			;Read the most significant word of T.
		CMP	T1, #3
		SUBLS	TH, TH, #8			;Decrease TH if this word will become zero.
		CMP	TH, TL
		BEQ	conv_norm			;Branch if T will become zero (end of the conversion).
		STMFD	SP!, {Z1, FL}			;Save Z1 and FL.
		ORR	Z1, Z1, Z1, LSL #1		;Z1 = &03030303.
		MOV	Z1, Z1, LSL #6			;Z1 = &C0C0C0C0.
		MVN	FL, Z1
conv_loop4	MOV	T3, T1, LSL #30			;T3[31..30]: binary backcarry due to the least significant digit of T1.
		ADD	T2, T3, T3, LSR #2
		ADD	T2, T3, T2, LSR #7		;T2: decimal backcarry * 32 due to the least significant digit of T1.
		AND	T3, Z1, T1, LSR #2		;T3: binary backcarries in bits 23, 22, 15, 14, 7 and 6.
		AND	T1, FL, T1, LSR #2		;Divide T1 by 4 and clear the backcarries in T1.
		ADD	T1, T1, T3, LSR #5		;Add &02 where there are backcarries.
		ADD	T3, T3, T3, LSR #2		;Multiply T3 by 5/4.
		ADD	T1, T1, T3, LSR #12		;Add &00.05 where there are backcarries.
		STR	T1, [TT], #-8			;Store the result.
		LDR	T1, [TT]			;Read the next 4 digits.
		ADDS	T1, T1, T2, LSR #3		;Add the decimal backcarry * 4 due to the first digit (LSD).
		ADD	T1, T1, T3, LSL #22		;Add the decimal backcarry * 4 due to the second digit.
		BPL	conv_loop4			;Loop if there are real digits (not &80000000).
		BIC	T1, T1, #&80000000		;T1 = decimal backcarry * 4.
		MOVS	T1, T1, LSR #2			;Divide T1 by 4. If it is 0, no digit is concatenated to the number.
		CMPNE	TT, TL				;Idem if the wanted precision has been reached.
		STRNE	T1, [TK, #-8]!			;Otherwise the digits given by the backcarry are concatenated.
		LDMFD	SP!, {Z1, FL}			;Restore Z1 and FL.

conv_readbit	MOVS	TA, TA, LSL #1			;Next bit of the binary number.
		BCC	conv_next			;Branch if zero (no addition).
		MOVS	FL, FL, LSR #1
		BCC	conv_add

		MOV	TT, TH				;T will be divided by 2.
		MOV	T2, #0				;T2: "backcarry": 0 or &05000000.
		LDR	T1, [TT]			;Read the most significant word of T.
		CMP	T1, #1
		SUBLS	TH, TH, #8			;Decrease TH if this word will become zero.
		CMP	TH, TL
		BEQ	conv_norm			;Branch if T will become zero (end of the conversion).
conv_loop3	MOVS	T2, T2, LSR #27			;C = backcarry, and T2 = 0.
		MOVS	T1, T1, RRX			;Divide by 2 (backcarries in bits 31, 23, 15, 7 and C).
		AND	T3, Z1, T1, LSR #7		;T3: backcarries in bits 24, 16, 8 and 0.
		BIC	T1, T1, Z1, LSL #7		;Clear the backcarries in T1.
		ADD	T3, T3, T3, LSL #2
		ADD	T1, T1, T3			;Add 5 where there are backcarries.
		STR	T1, [TT], #-8			;Store the result.
		MOVCS	T2, #&05000000			;New backcarry.
		LDR	T1, [TT]			;Read the next 4 digits.
		TEQ	T1, #0
		BPL	conv_loop3			;Loop if there are real digits (not &FFFFFFFF).
		TEQ	T2, #0				;If backcarry = 0, no digit is concatenated to the number.
		CMPNE	TT, TL				;Idem if the wanted precision has been reached.
		STRNE	T2, [TK, #-8]!			;Otherwise digits 5, 0, 0, 0 are concatenated.

conv_add	MOV	TT, TK				;T will be added to R.
conv_loop1	LDMIA	TT!, {T1, T2}			;Read the next 4 digits of T and R.
		ADD	T1, T1, T2			;Addition (without carry).
		STR	T1, [TT, #-4]			;Store the result.
		CMP	TH, TT
		BCS	conv_loop1			;Loop while there still are non-zero digits of T.

		ADDS	CT, CT, #1			;\ Every 4 additions,
		TST	CT, #4				;| the result must be
		BEQ	conv_next			;/ "cleaned".
		BIC	CT, CT, #4			;CT[2..0] = 0.

		ADD	TT, TK, #4			;Least significant non-zero word of the result.
		MOV	T2, #0				;Clear the carry (whose value is 0 or 12).
conv_loop2	LDR	T1, [TT]			;Read the next 4 digits (a digit is in 0..239).
		ADD	T1, T1, T2, LSR #28		;Add the carry (0 or 12).
		AND	T2, T1, Z1, LSL #7		;Bits 31, 23, 15, 7: 1 when the corresponding digit is >= 128.
		ADD	T1, T1, T2, LSL #4		;\ Add 12 (carry) to the left digit when the bit has
		ADD	T1, T1, T2, LSL #3		;| the value 1 (except for bit 31), and subtract 120
		ADD	T1, T1, T2, LSR #4		;| from the digits corresponding to the non-zero
		SUB	T1, T1, T2			;/ bits.
		ORR	T2, T2, T2, LSR #1		;Carry in bits 28..31: 12 if bit 31 had the value 1, otherwise 0.
		CMP	TH, TT
		STR	T1, [TT], #8			;Store the result.
		BCS	conv_loop2			;Loop while the most significant non-zero word of T was not reached...
		CMP	T2, #0
		BNE	conv_loop2			;and while the carry is not 0.

conv_next	ADDS	CT, CT, #&08000000		;&8000000 = &100000000 / 32.
		BCC	conv_inner1			;Loop if all the bits of the word TA have not been read.
		B	conv_outer1			;Loop.

conv_norm	ADD	TA, TL, #12			;The result will be normalized and copied at RL.
		ADD	Z5, Z1, Z1, LSL #2		;T5 = &05050505.
		MOV	T2, #0				;Clear the carry.
conv_outer2	LDR	T1, [TA], #8			;Read the next 4 digits.
		ADD	T1, T1, T2			;Add the carry.
		MOV	T2, #0				;Clear the new carry.
		AND	T3, T1, Z1, LSL #7
		AND	T4, T1, Z1, LSL #6
		ORRS	T3, T3, T4, LSL #1		;Bits 31, 23, 15, 7: 1 iff the corresponding digit is >= 64.
conv_inner2	ADDMI	T2, T2, #6			;Add 6 to the carry if the most significant digit is >= 64.
		ADD	T3, T3, T3, LSR #1
		ADD	T1, T1, T3, LSL #3		;Add 6 to the next digits when the bit is 1.
		ADD	T3, T3, T3, LSR #2		;\ Subtract 60 when
		SUB	T1, T1, T3, LSR #2		;/ the bit is 1.
		AND	T3, T1, Z1, LSL #7
		AND	T4, T1, Z1, LSL #6
		ORRS	T3, T3, T4, LSL #1		;Bits 31, 23, 15, 7: 1 iff the corresponding digit is >= 64.
		BNE	conv_inner2			;Loop while at least one bit is 1.
		ORR	T3, T1, Z1, LSL #7		;T3: set bits 31, 23, 15, 7.
		SUB	T3, T3, Z5, LSL #3		;Subtract 40 from each digit (in T3).
		ANDS	T3, T3, Z1, LSL #7		;Bits 31, 23, 15, 7: 1 iff the corresponding digit is >= 40.
		ADDMI	T2, T2, #4			;Add 4 to the carry if the most significant digit is >= 40.
		ADD	T1, T1, T3, LSL #3		;Add 4 to the next digits when the bit is 1.
		SUB	T1, T1, T3, LSR #2		;\ Subtract 40 when
		SUB	T1, T1, T3, LSR #4		;/ the bit is 1.
		ORR	T3, T1, Z1, LSL #7		;T3: set bits 31, 23, 15, 7.
		SUB	T3, T3, Z5, LSL #2		;Subtract 20 from each digit (in T3).
		ANDS	T3, T3, Z1, LSL #7		;Bits 31, 23, 15, 7: 1 iff the corresponding digit is >= 20.
		ADDMI	T2, T2, #2			;Add 2 to the carry if the most significant digit is >= 20.
		ADD	T1, T1, T3, LSL #2		;Add 2 to the next digits when the bit is 1.
		SUB	T1, T1, T3, LSR #3		;\ Subtract 20 when
		SUB	T1, T1, T3, LSR #5		;/ the bit is 1.
		ORR	T3, T1, Z1, LSL #7		;T3: set bits 31, 23, 15, 7.
		SUB	T3, T3, Z5, LSL #1		;Subtract 10 from each digit (in T3).
		ANDS	T3, T3, Z1, LSL #7		;Bits 31, 23, 15, 7: 1 iff the corresponding digit is >= 10.
conv_inner3	ADDMI	T2, T2, #1			;Add 1 to the carry if the most significant digit is >= 10.
		ADD	T1, T1, T3, LSL #1		;Add 1 to the next digits when the bit is 1.
		SUB	T1, T1, T3, LSR #4		;\ Subtract 10 when
		SUB	T1, T1, T3, LSR #6		;/ the bit is 1.
		ORR	T3, T1, Z1, LSL #7		;T3: set bits 31, 23, 15, 7.
		SUB	T3, T3, Z5, LSL #1		;Subtract 10 from each digit (in T3).
		ANDS	T3, T3, Z1, LSL #7		;Bits 31, 23, 15, 7: 1 iff the corresponding digit is >= 10.
		BNE	conv_inner3			;Branch if at least one bit is 1 (prob. 1/10).
		STR	T1, [RL], #4			;Store the result.
		CMP	RH, TA
		BCS	conv_outer2			;Loop while there still are digits.
----------------------------------------------------------------------------


6) Main program:

-- MAIN --------------------------------------------------------------------
SP		RN	13
BB		RN	2
TL		RN	3
TH		RN	4
RL		RN	5
QT		RN	6
NN		RN	7
SL		RN	8
T1		RN	12
T2		RN	11
T3		RN	10
T4		RN	9
T5		RN	8
T6		RN	7

start		ADD	BB, R0, R0, LSR #2		;BB = 5 * R0 / 4. BB will be divided by 3.
		ADD	T1, BB, BB, LSL #2		;\
		ADD	T1, T1, T1, LSL #4		;|
		ADD	T1, T1, T1, LSL #8		;| Multiply BB by &AAAAAAAB = 3^(-1) in Z/32Z.
		ADD	T1, T1, T1, LSL #16		;|
		ADD	BB, BB, T1, LSL #1		;/
		LDR	T2, fv				;T2 = &55555555.
		RSBS	T1, BB, T2, LSL #1
		SBCCC	BB, BB, T2, LSL #1		;If (BB >= &AAAAAAAB), BB -= &AAAAAAAB.
		RSBS	T1, BB, T2
		SBCCC	BB, BB, T2			;If (BB >= &55555556), BB -= &55555556.
		ADD	BB, BB, #3			;BB = 5 * R0 / 12 + 3.
		BIC	BB, BB, #3			;BB final value.
		ADD	TL, R1, BB			;TL = R1 + B.
		ADD	TH, TL, BB
		SUB	TH, TH, #4			;TH = R1 + 2 * B - 4.
		STMFD	SP!, {R0, LR}
		ADD	R0, R0, #4
		ADD	RL, R1, R0, LSL #1		;RL = R1 + 2 * (N + 4).
		ADD	QT, RL, BB			;QT = RL + B.
		MOV	NN, #3				;Initial divisor: NN = 3.
		MOV	SL, #30				;Shift count: SL = 30.

		MOV	T1, TL
		MOV	T3, RL
init_loop	STR	T2, [T1], #4			;T = 4/3 = 01010101...
		STR	T2, [T3], #4			;R = 4/3 = 01010101...
		CMP	QT, T3
		BNE	init_loop
		ORR	T2, T2, #&80000000		;T2 = 1101010101...
		STR	T2, [T3, #-4]			;R = 4 (1/2 + 1/3) = 1101010101...

outer		CMP	TH, TL
		BCC	ldiv				;Branch if T = 0 (no division by 9).
		LDR	T2, c2				;T2 = &38E38E39.
		MOV	T1, T2, LSR #1			;T1 = &1C71C71C.
		ADR	T3, pm				;Pointer to the T1*i's where i is in 0..8.
		STMFD	SP!, {TH, SL}
		<DIV9>	AL=TL, AH=TH, C1=T1, C2=T2, PM=T3, NA=T4, NB=T5, TB=LR
		LDMFD	SP!, {TH, SL}
		LDR	T1, [TH]
		CMP	T1, #0
		SUBEQ	TH, TH, #4			;Update TH.
ldiv		MOV	T3, NN, LSL SL			;T3: normalized divisor.
		<CQUOT>	QT=QT, DV=T3, NA=T1, NB=T2, NQ=T4
		ADD	QT, RL, BB			;Restore QT (which was modified by CQUOT).
		ADD	T4, QT, #512
		<CPROD>	PL=T4, DV=T3, T1=T1, T2=T2
		MOV	T4, NN, LSR #5			;T4: address + 4 where the most significant
		SUB	T4, TL, T4, LSL #2		;word of the quotient will be stored.
		CMP	T4, R1				;Branch to conversion if the address is less than
		BLS	conv				;the address of the least significant word.
		SUB	T4, T4, #4
		MOV	T1, #1
		MOV	T1, T1, ROR NN			;T1 = 2^k: number that will be added to [T4, BB].
		LDR	T2, [T4, BB]
		STMFD	SP!, {BB, TL, TH, RL, NN, T2, T4}
		ORR	T2, T2, T1
		STR	T2, [T4, BB]			;[R1+BB..T4+BB]: 1/2^n + 1/3^n.
		ADD	TH, T4, BB
		RSB	LR, SL, #32
		ADD	TL, QT, #512
		<LDIV>	AH=TH, BL=R1, BH=T4, SL=SL, SR=LR, DV=T3, QT=QT, PL=TL, NA=T1, NB=T2, NK=NN, NQ=BB, NT=RL
		LDMFD	SP!, {BB, TL, TH, RL, NN, T2, T4}
		STR	T2, [T4, BB]			;Restore [T4, BB].
		STMFD	SP!, {R1, RL}
		TST	NN, #2
		BNE	subtract
		MOV	T3, #0				;The quotient will be added to the result.
add_loop1	MOVS	T3, T3, LSR #1
		LDR	T1, [R1], #4
		LDR	T2, [RL]
		ADCS	T2, T2, T1
		STR	T2, [RL], #4
		ADC	T3, T3, T3
		CMP	T4, R1
		BCS	add_loop1
		MOVS	T3, T3, LSR #1
		BCC	addsub_end
add_loop2	LDR	T2, [RL]
		ADDS	T2, T2, #1
		STR	T2, [RL], #4
		BCC	addsub_end
		B	add_loop2
subtract	MOV	T3, #1				;The quotient will be subtracted from the result.
sub_loop1	MOVS	T3, T3, LSR #1
sub_loop2	LDR	T1, [R1], #4
		LDR	T2, [RL]
		SBCS	T2, T2, T1
		STR	T2, [RL], #4
		ADC	T3, T3, T3
		CMP	T4, R1
		BCS	sub_loop1
		MOVS	T3, T3, LSR #1
		BCC	sub_loop2
addsub_end	LDMFD	SP!, {R1, RL}
		ADD	NN, NN, #2			;Next divisor.
		MOVS	T3, NN, LSL SL
		BCC	outer
		SUB	SL, SL, #1			;SL = new shift count.
		B	outer

conv		MOV	TL, R1
		ADD	RL, R1, R0, LSL #1
		SUB	TH, RL, #4
		MOV	T1, R1
		MOV	T2, #0
		MOV	T3, #&80000000
ff_loop		STMIA	T1!, {T3, T2}			;T3 (= R10) stored before T2 (= R11).
		CMP	T1, TH
		BLS	ff_loop
		LDR	T6, z1
		<CONV>	AH=QT, TL=TL, TH=TH, RL=R1, RH=RL, Z1=T6, T1=T1, T2=T2, T3=T3, T4=T4, Z5=T5, TA=LR, TT=T5, FL=R0, CT=BB, TK=T4
		LDMFD	SP!, {R0, PC}

z1		DCD	&01010101
fv		DCD	&55555555
c2		DCD	&38E38E39
pm		DCD	0,&1C71C71C,&38E38E38,&55555554,&71C71C70,&8E38E38C,&AAAAAAA8,&C71C71C4,&E38E38E0
----------------------------------------------------------------------------