source: mainline/uspace/lib/c/arch/ia64/src/fibril.S@ 3b0f1b9a

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 3b0f1b9a was 7947c34, checked in by Jakub Jermar <jakub@…>, 11 years ago

Autogenerate ia64 fibril context_t and its offsets.

Rewrite fibril context_save_arch() and context_restore_arch() to use the
new autogenerated offsets. Also improve instruction level parallelization
of these routines by eliminating some stops.

  • Property mode set to 100644
File size: 8.6 KB
Line 
1#
2# Copyright (c) 2005 Jakub Jermar
3# All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions
7# are met:
8#
9# - Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11# - Redistributions in binary form must reproduce the above copyright
12# notice, this list of conditions and the following disclaimer in the
13# documentation and/or other materials provided with the distribution.
14# - The name of the author may not be used to endorse or promote products
15# derived from this software without specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27#
28
29#include <libarch/fibril_context.h>
30
31.text
32
33.global context_save
34.global context_restore
35
36context_save:
37 alloc loc0 = ar.pfs, 1, 49, 0, 0
38 mov loc1 = ar.unat ;;
39 mov loc3 = ar.rsc
40
41 .auto
42
43 /*
44 * Flush dirty registers to backing store.
45 * After this ar.bsp and ar.bspstore are equal.
46 */
47 flushrs
48 mov loc4 = ar.bsp
49
50 /*
51 * Put RSE to enforced lazy mode.
52 * So that ar.rnat can be read.
53 */
54 and loc5 = ~3, loc3
55 mov ar.rsc = loc5
56 mov loc5 = ar.rnat
57
58 .explicit
59
60 mov loc6 = ar.lc
61
62 add loc8 = CONTEXT_OFFSET_AR_PFS, in0
63 add loc9 = CONTEXT_OFFSET_AR_UNAT_CALLER, in0
64 add loc10 = CONTEXT_OFFSET_AR_UNAT_CALLEE, in0
65 add loc11 = CONTEXT_OFFSET_AR_RSC, in0
66 add loc12 = CONTEXT_OFFSET_BSP, in0
67 add loc13 = CONTEXT_OFFSET_AR_RNAT, in0
68 add loc14 = CONTEXT_OFFSET_AR_LC, in0
69
70 add loc15 = CONTEXT_OFFSET_R1, in0
71 add loc16 = CONTEXT_OFFSET_R4, in0
72 add loc17 = CONTEXT_OFFSET_R5, in0
73 add loc18 = CONTEXT_OFFSET_R6, in0
74 add loc19 = CONTEXT_OFFSET_R7, in0
75 add loc20 = CONTEXT_OFFSET_SP, in0
76 add loc21 = CONTEXT_OFFSET_TP, in0
77
78 add loc22 = CONTEXT_OFFSET_PC, in0
79 add loc23 = CONTEXT_OFFSET_B1, in0
80 add loc24 = CONTEXT_OFFSET_B2, in0
81 add loc25 = CONTEXT_OFFSET_B3, in0
82 add loc26 = CONTEXT_OFFSET_B4, in0
83 add loc27 = CONTEXT_OFFSET_B5, in0
84
85 add loc28 = CONTEXT_OFFSET_PR, in0
86
87 add loc29 = CONTEXT_OFFSET_F2, in0
88 add loc30 = CONTEXT_OFFSET_F3, in0
89 add loc31 = CONTEXT_OFFSET_F4, in0
90 add loc32 = CONTEXT_OFFSET_F5, in0
91
92 add loc33 = CONTEXT_OFFSET_F16, in0
93 add loc34 = CONTEXT_OFFSET_F17, in0
94 add loc35 = CONTEXT_OFFSET_F18, in0
95 add loc36 = CONTEXT_OFFSET_F19, in0
96 add loc37 = CONTEXT_OFFSET_F20, in0
97 add loc38 = CONTEXT_OFFSET_F21, in0
98 add loc39 = CONTEXT_OFFSET_F22, in0
99 add loc40 = CONTEXT_OFFSET_F23, in0
100 add loc41 = CONTEXT_OFFSET_F24, in0
101 add loc42 = CONTEXT_OFFSET_F25, in0
102 add loc43 = CONTEXT_OFFSET_F26, in0
103 add loc44 = CONTEXT_OFFSET_F27, in0
104 add loc45 = CONTEXT_OFFSET_F28, in0
105 add loc46 = CONTEXT_OFFSET_F29, in0
106 add loc47 = CONTEXT_OFFSET_F30, in0
107 add loc48 = CONTEXT_OFFSET_F31, in0 ;;
108
109 /*
110 * Save general registers including NaT bits
111 */
112 st8.spill [loc15] = r1 ;;
113 st8.spill [loc16] = r4 ;;
114 st8.spill [loc17] = r5 ;;
115 st8.spill [loc18] = r6 ;;
116 st8.spill [loc19] = r7 ;;
117 st8.spill [loc20] = r12 ;; /* save sp */
118 st8.spill [loc21] = r13 ;; /* save tp */
119
120 mov loc2 = ar.unat
121
122 /*
123 * Save application registers
124 */
125 st8 [loc8] = loc0 /* save ar.pfs */
126 st8 [loc9] = loc1 ;; /* save ar.unat (caller) */
127 st8 [loc10] = loc2 /* save ar.unat (callee) */
128 st8 [loc11] = loc3 /* save ar.rsc */
129 st8 [loc12] = loc4 /* save ar.bsp */
130 st8 [loc13] = loc5 /* save ar.rnat */
131 st8 [loc14] = loc6 ;; /* save ar.lc */
132
133 /*
134 * Save branch registers
135 */
136 mov loc2 = b0
137 mov loc3 = b1
138 mov loc4 = b2
139 mov loc5 = b3
140 mov loc6 = b4
141 mov loc7 = b5 ;;
142 st8 [loc22] = loc2 /* save pc */
143 st8 [loc23] = loc3
144 st8 [loc24] = loc4
145 st8 [loc25] = loc5
146 st8 [loc26] = loc6
147 st8 [loc27] = loc7 ;;
148
149 /*
150 * Save predicate registers
151 */
152 mov loc2 = pr ;;
153 st8 [loc28] = loc2
154
155 /*
156 * Save floating-point registers.
157 */
158 stf.spill [loc29] = f2
159 stf.spill [loc30] = f3
160 stf.spill [loc31] = f4
161 stf.spill [loc32] = f5
162
163 stf.spill [loc33] = f16
164 stf.spill [loc34] = f17
165 stf.spill [loc35] = f18
166 stf.spill [loc36] = f19
167 stf.spill [loc37] = f20
168 stf.spill [loc38] = f21
169 stf.spill [loc39] = f22
170 stf.spill [loc40] = f23
171 stf.spill [loc41] = f24
172 stf.spill [loc42] = f25
173 stf.spill [loc43] = f26
174 stf.spill [loc44] = f27
175 stf.spill [loc45] = f28
176 stf.spill [loc46] = f29
177 stf.spill [loc47] = f30
178 stf.spill [loc48] = f31
179
180 mov ar.unat = loc1
181
182 add r8 = r0, r0, 1 /* context_save returns 1 */
183 br.ret.sptk.many b0
184
185context_restore:
186 alloc loc0 = ar.pfs, 1, 50, 0, 0 ;;
187
188 add loc9 = CONTEXT_OFFSET_AR_PFS, in0
189 add loc10 = CONTEXT_OFFSET_AR_UNAT_CALLER, in0
190 add loc11 = CONTEXT_OFFSET_AR_UNAT_CALLEE, in0
191 add loc12 = CONTEXT_OFFSET_AR_RSC, in0
192 add loc13 = CONTEXT_OFFSET_BSP, in0
193 add loc14 = CONTEXT_OFFSET_AR_RNAT, in0
194 add loc15 = CONTEXT_OFFSET_AR_LC, in0
195
196 add loc16 = CONTEXT_OFFSET_R1, in0
197 add loc17 = CONTEXT_OFFSET_R4, in0
198 add loc18 = CONTEXT_OFFSET_R5, in0
199 add loc19 = CONTEXT_OFFSET_R6, in0
200 add loc20 = CONTEXT_OFFSET_R7, in0
201 add loc21 = CONTEXT_OFFSET_SP, in0
202 add loc22 = CONTEXT_OFFSET_TP, in0
203
204 add loc23 = CONTEXT_OFFSET_PC, in0
205 add loc24 = CONTEXT_OFFSET_B1, in0
206 add loc25 = CONTEXT_OFFSET_B2, in0
207 add loc26 = CONTEXT_OFFSET_B3, in0
208 add loc27 = CONTEXT_OFFSET_B4, in0
209 add loc28 = CONTEXT_OFFSET_B5, in0
210
211 add loc29 = CONTEXT_OFFSET_PR, in0
212
213 add loc30 = CONTEXT_OFFSET_F2, in0
214 add loc31 = CONTEXT_OFFSET_F3, in0
215 add loc32 = CONTEXT_OFFSET_F4, in0
216 add loc33 = CONTEXT_OFFSET_F5, in0
217
218 add loc34 = CONTEXT_OFFSET_F16, in0
219 add loc35 = CONTEXT_OFFSET_F17, in0
220 add loc36 = CONTEXT_OFFSET_F18, in0
221 add loc37 = CONTEXT_OFFSET_F19, in0
222 add loc38 = CONTEXT_OFFSET_F20, in0
223 add loc39 = CONTEXT_OFFSET_F21, in0
224 add loc40 = CONTEXT_OFFSET_F22, in0
225 add loc41 = CONTEXT_OFFSET_F23, in0
226 add loc42 = CONTEXT_OFFSET_F24, in0
227 add loc43 = CONTEXT_OFFSET_F25, in0
228 add loc44 = CONTEXT_OFFSET_F26, in0
229 add loc45 = CONTEXT_OFFSET_F27, in0
230 add loc46 = CONTEXT_OFFSET_F28, in0
231 add loc47 = CONTEXT_OFFSET_F29, in0
232 add loc48 = CONTEXT_OFFSET_F30, in0
233 add loc49 = CONTEXT_OFFSET_F31, in0 ;;
234
235 ld8 loc0 = [loc9] /* load ar.pfs */
236 ld8 loc1 = [loc10] /* load ar.unat (caller) */
237 ld8 loc2 = [loc11] /* load ar.unat (callee) */
238 ld8 loc3 = [loc12] /* load ar.rsc */
239 ld8 loc4 = [loc13] /* load ar.bsp */
240 ld8 loc5 = [loc14] /* load ar.rnat */
241 ld8 loc6 = [loc15] /* load ar.lc */
242
243 .auto
244
245 /*
246 * Invalidate the ALAT
247 */
248 invala
249
250 /*
251 * Put RSE to enforced lazy mode.
252 * So that ar.bspstore and ar.rnat can be written.
253 */
254 movl loc8 = ~3
255 and loc8 = loc3, loc8
256 mov ar.rsc = loc8
257
258 /*
259 * Flush dirty registers to backing store.
260 * We do this because we want the following move
261 * to ar.bspstore to assign the same value to ar.bsp.
262 */
263 flushrs
264
265 /*
266 * Restore application registers
267 */
268 mov ar.bspstore = loc4 /* rse.bspload = ar.bsp = ar.bspstore = loc4 */
269 mov ar.rnat = loc5
270 mov ar.pfs = loc0
271 mov ar.rsc = loc3
272
273 .explicit
274
275 mov ar.unat = loc2 ;;
276 mov ar.lc = loc6
277
278 /*
279 * Restore general registers including NaT bits
280 */
281 ld8.fill r1 = [loc16] ;;
282 ld8.fill r4 = [loc17] ;;
283 ld8.fill r5 = [loc18] ;;
284 ld8.fill r6 = [loc19] ;;
285 ld8.fill r7 = [loc20] ;;
286 ld8.fill r12 = [loc21] ;; /* restore sp */
287 ld8.fill r13 = [loc22] ;;
288
289 /*
290 * Restore branch registers
291 */
292 ld8 loc2 = [loc23] /* restore pc */
293 ld8 loc3 = [loc24]
294 ld8 loc4 = [loc25]
295 ld8 loc5 = [loc26]
296 ld8 loc6 = [loc27]
297 ld8 loc7 = [loc28] ;;
298 mov b0 = loc2
299 mov b1 = loc3
300 mov b2 = loc4
301 mov b3 = loc5
302 mov b4 = loc6
303 mov b5 = loc7 ;;
304
305 /*
306 * Restore predicate registers
307 */
308 ld8 loc2 = [loc29] ;;
309 mov pr = loc2, ~0
310
311 /*
312 * Restore floating-point registers.
313 */
314 ldf.fill f2 = [loc30]
315 ldf.fill f3 = [loc31]
316 ldf.fill f4 = [loc32]
317 ldf.fill f5 = [loc33]
318
319 ldf.fill f16 = [loc34]
320 ldf.fill f17 = [loc35]
321 ldf.fill f18 = [loc36]
322 ldf.fill f19 = [loc37]
323 ldf.fill f20 = [loc38]
324 ldf.fill f21 = [loc39]
325 ldf.fill f22 = [loc40]
326 ldf.fill f23 = [loc41]
327 ldf.fill f24 = [loc42]
328 ldf.fill f25 = [loc43]
329 ldf.fill f26 = [loc44]
330 ldf.fill f27 = [loc45]
331 ldf.fill f28 = [loc46]
332 ldf.fill f29 = [loc47]
333 ldf.fill f30 = [loc48]
334 ldf.fill f31 = [loc49]
335
336 mov ar.unat = loc1
337
338 mov r8 = r0 /* context_restore returns 0 */
339 br.ret.sptk.many b0
Note: See TracBrowser for help on using the repository browser.