1
|
/*
|
2
|
* linux/arch/arm/lib/div64.S
|
3
|
*
|
4
|
* Optimized computation of 64-bit dividend / 32-bit divisor
|
5
|
*
|
6
|
* Author: Nicolas Pitre
|
7
|
* Created: Oct 5, 2003
|
8
|
* Copyright: Monta Vista Software, Inc.
|
9
|
*
|
10
|
* This program is free software; you can redistribute it and/or modify
|
11
|
* it under the terms of the GNU General Public License version 2 as
|
12
|
* published by the Free Software Foundation.
|
13
|
*/
|
14
|
|
15
|
#include <asm/linkage.h>
|
16
|
|
17
|
#ifdef __ARMEB__
|
18
|
#define xh r0
|
19
|
#define xl r1
|
20
|
#define yh r2
|
21
|
#define yl r3
|
22
|
#else
|
23
|
#define xl r0
|
24
|
#define xh r1
|
25
|
#define yl r2
|
26
|
#define yh r3
|
27
|
#endif
|
28
|
|
29
|
/*
|
30
|
* __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
|
31
|
*
|
32
|
* Note: Calling convention is totally non standard for optimal code.
|
33
|
* This is meant to be used by do_div() from include/asm/div64.h only.
|
34
|
*
|
35
|
* Input parameters:
|
36
|
* xh-xl = dividend (clobbered)
|
37
|
* r4 = divisor (preserved)
|
38
|
*
|
39
|
* Output values:
|
40
|
* yh-yl = result
|
41
|
* xh = remainder
|
42
|
*
|
43
|
* Clobbered regs: xl, ip
|
44
|
*/
|
45
|
|
46
|
ENTRY(__do_div64)
|
47
|
|
48
|
@ Test for easy paths first.
|
49
|
subs ip, r4, #1
|
50
|
bls 9f @ divisor is 0 or 1
|
51
|
tst ip, r4
|
52
|
beq 8f @ divisor is power of 2
|
53
|
|
54
|
@ See if we need to handle upper 32-bit result.
|
55
|
cmp xh, r4
|
56
|
mov yh, #0
|
57
|
blo 3f
|
58
|
|
59
|
@ Align divisor with upper part of dividend.
|
60
|
@ The aligned divisor is stored in yl preserving the original.
|
61
|
@ The bit position is stored in ip.
|
62
|
|
63
|
#if __LINUX_ARM_ARCH__ >= 5
|
64
|
|
65
|
clz yl, r4
|
66
|
clz ip, xh
|
67
|
sub yl, yl, ip
|
68
|
mov ip, #1
|
69
|
mov ip, ip, lsl yl
|
70
|
mov yl, r4, lsl yl
|
71
|
|
72
|
#else
|
73
|
|
74
|
mov yl, r4
|
75
|
mov ip, #1
|
76
|
1: cmp yl, #0x80000000
|
77
|
cmpcc yl, xh
|
78
|
movcc yl, yl, lsl #1
|
79
|
movcc ip, ip, lsl #1
|
80
|
bcc 1b
|
81
|
|
82
|
#endif
|
83
|
|
84
|
@ The division loop for needed upper bit positions.
|
85
|
@ Break out early if dividend reaches 0.
|
86
|
2: cmp xh, yl
|
87
|
orrcs yh, yh, ip
|
88
|
subcss xh, xh, yl
|
89
|
movnes ip, ip, lsr #1
|
90
|
mov yl, yl, lsr #1
|
91
|
bne 2b
|
92
|
|
93
|
@ See if we need to handle lower 32-bit result.
|
94
|
3: cmp xh, #0
|
95
|
mov yl, #0
|
96
|
cmpeq xl, r4
|
97
|
movlo xh, xl
|
98
|
movlo pc, lr
|
99
|
|
100
|
@ The division loop for lower bit positions.
|
101
|
@ Here we shift remainer bits leftwards rather than moving the
|
102
|
@ divisor for comparisons, considering the carry-out bit as well.
|
103
|
mov ip, #0x80000000
|
104
|
4: movs xl, xl, lsl #1
|
105
|
adcs xh, xh, xh
|
106
|
beq 6f
|
107
|
cmpcc xh, r4
|
108
|
5: orrcs yl, yl, ip
|
109
|
subcs xh, xh, r4
|
110
|
movs ip, ip, lsr #1
|
111
|
bne 4b
|
112
|
mov pc, lr
|
113
|
|
114
|
@ The top part of remainder became zero. If carry is set
|
115
|
@ (the 33th bit) this is a false positive so resume the loop.
|
116
|
@ Otherwise, if lower part is also null then we are done.
|
117
|
6: bcs 5b
|
118
|
cmp xl, #0
|
119
|
moveq pc, lr
|
120
|
|
121
|
@ We still have remainer bits in the low part. Bring them up.
|
122
|
|
123
|
#if __LINUX_ARM_ARCH__ >= 5
|
124
|
|
125
|
clz xh, xl @ we know xh is zero here so...
|
126
|
add xh, xh, #1
|
127
|
mov xl, xl, lsl xh
|
128
|
mov ip, ip, lsr xh
|
129
|
|
130
|
#else
|
131
|
|
132
|
7: movs xl, xl, lsl #1
|
133
|
mov ip, ip, lsr #1
|
134
|
bcc 7b
|
135
|
|
136
|
#endif
|
137
|
|
138
|
@ Current remainder is now 1. It is worthless to compare with
|
139
|
@ divisor at this point since divisor can not be smaller than 3 here.
|
140
|
@ If possible, branch for another shift in the division loop.
|
141
|
@ If no bit position left then we are done.
|
142
|
movs ip, ip, lsr #1
|
143
|
mov xh, #1
|
144
|
bne 4b
|
145
|
mov pc, lr
|
146
|
|
147
|
8: @ Division by a power of 2: determine what that divisor order is
|
148
|
@ then simply shift values around
|
149
|
|
150
|
#if __LINUX_ARM_ARCH__ >= 5
|
151
|
|
152
|
clz ip, r4
|
153
|
rsb ip, ip, #31
|
154
|
|
155
|
#else
|
156
|
|
157
|
mov yl, r4
|
158
|
cmp r4, #(1 << 16)
|
159
|
mov ip, #0
|
160
|
movhs yl, yl, lsr #16
|
161
|
movhs ip, #16
|
162
|
|
163
|
cmp yl, #(1 << 8)
|
164
|
movhs yl, yl, lsr #8
|
165
|
addhs ip, ip, #8
|
166
|
|
167
|
cmp yl, #(1 << 4)
|
168
|
movhs yl, yl, lsr #4
|
169
|
addhs ip, ip, #4
|
170
|
|
171
|
cmp yl, #(1 << 2)
|
172
|
addhi ip, ip, #3
|
173
|
addls ip, ip, yl, lsr #1
|
174
|
|
175
|
#endif
|
176
|
|
177
|
mov yh, xh, lsr ip
|
178
|
mov yl, xl, lsr ip
|
179
|
rsb ip, ip, #32
|
180
|
orr yl, yl, xh, lsl ip
|
181
|
mov xh, xl, lsl ip
|
182
|
mov xh, xh, lsr ip
|
183
|
mov pc, lr
|
184
|
|
185
|
@ eq -> division by 1: obvious enough...
|
186
|
9: moveq yl, xl
|
187
|
moveq yh, xh
|
188
|
moveq xh, #0
|
189
|
moveq pc, lr
|
190
|
|
191
|
@ Division by 0:
|
192
|
str lr, [sp, #-8]!
|
193
|
bl __div0
|
194
|
|
195
|
@ as wrong as it could be...
|
196
|
mov yl, #0
|
197
|
mov yh, #0
|
198
|
mov xh, #0
|
199
|
ldr pc, [sp], #8
|
200
|
|