1 |
2a2d6309
|
(no author)
|
/*
|
2 |
|
|
* linux/arch/arm/lib/div64.S
|
3 |
|
|
*
|
4 |
|
|
* Optimized computation of 64-bit dividend / 32-bit divisor
|
5 |
|
|
*
|
6 |
|
|
* Author: Nicolas Pitre
|
7 |
|
|
* Created: Oct 5, 2003
|
8 |
|
|
* Copyright: Monta Vista Software, Inc.
|
9 |
|
|
*
|
10 |
|
|
* This program is free software; you can redistribute it and/or modify
|
11 |
|
|
* it under the terms of the GNU General Public License version 2 as
|
12 |
|
|
* published by the Free Software Foundation.
|
13 |
|
|
*/
|
14 |
|
|
|
15 |
|
|
#include <asm/linkage.h>
|
16 |
|
|
|
17 |
|
|
#ifdef __ARMEB__
|
18 |
|
|
#define xh r0
|
19 |
|
|
#define xl r1
|
20 |
|
|
#define yh r2
|
21 |
|
|
#define yl r3
|
22 |
|
|
#else
|
23 |
|
|
#define xl r0
|
24 |
|
|
#define xh r1
|
25 |
|
|
#define yl r2
|
26 |
|
|
#define yh r3
|
27 |
|
|
#endif
|
28 |
|
|
|
29 |
|
|
/*
|
30 |
|
|
* __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
|
31 |
|
|
*
|
32 |
|
|
* Note: Calling convention is totally non standard for optimal code.
|
33 |
|
|
* This is meant to be used by do_div() from include/asm/div64.h only.
|
34 |
|
|
*
|
35 |
|
|
* Input parameters:
|
36 |
|
|
* xh-xl = dividend (clobbered)
|
37 |
|
|
* r4 = divisor (preserved)
|
38 |
|
|
*
|
39 |
|
|
* Output values:
|
40 |
|
|
* yh-yl = result
|
41 |
|
|
* xh = remainder
|
42 |
|
|
*
|
43 |
|
|
* Clobbered regs: xl, ip
|
44 |
|
|
*/
|
45 |
|
|
|
46 |
|
|
ENTRY(__do_div64)
|
47 |
|
|
|
48 |
|
|
@ Test for easy paths first.
|
49 |
|
|
subs ip, r4, #1
|
50 |
|
|
bls 9f @ divisor is 0 or 1
|
51 |
|
|
tst ip, r4
|
52 |
|
|
beq 8f @ divisor is power of 2
|
53 |
|
|
|
54 |
|
|
@ See if we need to handle upper 32-bit result.
|
55 |
|
|
cmp xh, r4
|
56 |
|
|
mov yh, #0
|
57 |
|
|
blo 3f
|
58 |
|
|
|
59 |
|
|
@ Align divisor with upper part of dividend.
|
60 |
|
|
@ The aligned divisor is stored in yl preserving the original.
|
61 |
|
|
@ The bit position is stored in ip.
|
62 |
|
|
|
63 |
|
|
#if __LINUX_ARM_ARCH__ >= 5
|
64 |
|
|
|
65 |
|
|
clz yl, r4
|
66 |
|
|
clz ip, xh
|
67 |
|
|
sub yl, yl, ip
|
68 |
|
|
mov ip, #1
|
69 |
|
|
mov ip, ip, lsl yl
|
70 |
|
|
mov yl, r4, lsl yl
|
71 |
|
|
|
72 |
|
|
#else
|
73 |
|
|
|
74 |
|
|
mov yl, r4
|
75 |
|
|
mov ip, #1
|
76 |
|
|
1: cmp yl, #0x80000000
|
77 |
|
|
cmpcc yl, xh
|
78 |
|
|
movcc yl, yl, lsl #1
|
79 |
|
|
movcc ip, ip, lsl #1
|
80 |
|
|
bcc 1b
|
81 |
|
|
|
82 |
|
|
#endif
|
83 |
|
|
|
84 |
|
|
@ The division loop for needed upper bit positions.
|
85 |
|
|
@ Break out early if dividend reaches 0.
|
86 |
|
|
2: cmp xh, yl
|
87 |
|
|
orrcs yh, yh, ip
|
88 |
|
|
subcss xh, xh, yl
|
89 |
|
|
movnes ip, ip, lsr #1
|
90 |
|
|
mov yl, yl, lsr #1
|
91 |
|
|
bne 2b
|
92 |
|
|
|
93 |
|
|
@ See if we need to handle lower 32-bit result.
|
94 |
|
|
3: cmp xh, #0
|
95 |
|
|
mov yl, #0
|
96 |
|
|
cmpeq xl, r4
|
97 |
|
|
movlo xh, xl
|
98 |
|
|
movlo pc, lr
|
99 |
|
|
|
100 |
|
|
@ The division loop for lower bit positions.
|
101 |
|
|
@ Here we shift remainer bits leftwards rather than moving the
|
102 |
|
|
@ divisor for comparisons, considering the carry-out bit as well.
|
103 |
|
|
mov ip, #0x80000000
|
104 |
|
|
4: movs xl, xl, lsl #1
|
105 |
|
|
adcs xh, xh, xh
|
106 |
|
|
beq 6f
|
107 |
|
|
cmpcc xh, r4
|
108 |
|
|
5: orrcs yl, yl, ip
|
109 |
|
|
subcs xh, xh, r4
|
110 |
|
|
movs ip, ip, lsr #1
|
111 |
|
|
bne 4b
|
112 |
|
|
mov pc, lr
|
113 |
|
|
|
114 |
|
|
@ The top part of remainder became zero. If carry is set
|
115 |
|
|
@ (the 33th bit) this is a false positive so resume the loop.
|
116 |
|
|
@ Otherwise, if lower part is also null then we are done.
|
117 |
|
|
6: bcs 5b
|
118 |
|
|
cmp xl, #0
|
119 |
|
|
moveq pc, lr
|
120 |
|
|
|
121 |
|
|
@ We still have remainer bits in the low part. Bring them up.
|
122 |
|
|
|
123 |
|
|
#if __LINUX_ARM_ARCH__ >= 5
|
124 |
|
|
|
125 |
|
|
clz xh, xl @ we know xh is zero here so...
|
126 |
|
|
add xh, xh, #1
|
127 |
|
|
mov xl, xl, lsl xh
|
128 |
|
|
mov ip, ip, lsr xh
|
129 |
|
|
|
130 |
|
|
#else
|
131 |
|
|
|
132 |
|
|
7: movs xl, xl, lsl #1
|
133 |
|
|
mov ip, ip, lsr #1
|
134 |
|
|
bcc 7b
|
135 |
|
|
|
136 |
|
|
#endif
|
137 |
|
|
|
138 |
|
|
@ Current remainder is now 1. It is worthless to compare with
|
139 |
|
|
@ divisor at this point since divisor can not be smaller than 3 here.
|
140 |
|
|
@ If possible, branch for another shift in the division loop.
|
141 |
|
|
@ If no bit position left then we are done.
|
142 |
|
|
movs ip, ip, lsr #1
|
143 |
|
|
mov xh, #1
|
144 |
|
|
bne 4b
|
145 |
|
|
mov pc, lr
|
146 |
|
|
|
147 |
|
|
8: @ Division by a power of 2: determine what that divisor order is
|
148 |
|
|
@ then simply shift values around
|
149 |
|
|
|
150 |
|
|
#if __LINUX_ARM_ARCH__ >= 5
|
151 |
|
|
|
152 |
|
|
clz ip, r4
|
153 |
|
|
rsb ip, ip, #31
|
154 |
|
|
|
155 |
|
|
#else
|
156 |
|
|
|
157 |
|
|
mov yl, r4
|
158 |
|
|
cmp r4, #(1 << 16)
|
159 |
|
|
mov ip, #0
|
160 |
|
|
movhs yl, yl, lsr #16
|
161 |
|
|
movhs ip, #16
|
162 |
|
|
|
163 |
|
|
cmp yl, #(1 << 8)
|
164 |
|
|
movhs yl, yl, lsr #8
|
165 |
|
|
addhs ip, ip, #8
|
166 |
|
|
|
167 |
|
|
cmp yl, #(1 << 4)
|
168 |
|
|
movhs yl, yl, lsr #4
|
169 |
|
|
addhs ip, ip, #4
|
170 |
|
|
|
171 |
|
|
cmp yl, #(1 << 2)
|
172 |
|
|
addhi ip, ip, #3
|
173 |
|
|
addls ip, ip, yl, lsr #1
|
174 |
|
|
|
175 |
|
|
#endif
|
176 |
|
|
|
177 |
|
|
mov yh, xh, lsr ip
|
178 |
|
|
mov yl, xl, lsr ip
|
179 |
|
|
rsb ip, ip, #32
|
180 |
|
|
orr yl, yl, xh, lsl ip
|
181 |
|
|
mov xh, xl, lsl ip
|
182 |
|
|
mov xh, xh, lsr ip
|
183 |
|
|
mov pc, lr
|
184 |
|
|
|
185 |
|
|
@ eq -> division by 1: obvious enough...
|
186 |
|
|
9: moveq yl, xl
|
187 |
|
|
moveq yh, xh
|
188 |
|
|
moveq xh, #0
|
189 |
|
|
moveq pc, lr
|
190 |
|
|
|
191 |
|
|
@ Division by 0:
|
192 |
|
|
str lr, [sp, #-8]!
|
193 |
|
|
bl __div0
|
194 |
|
|
|
195 |
|
|
@ as wrong as it could be...
|
196 |
|
|
mov yl, #0
|
197 |
|
|
mov yh, #0
|
198 |
|
|
mov xh, #0
|
199 |
|
|
ldr pc, [sp], #8
|