Text file
src/runtime/race_loong64.s
1 // Copyright 2025 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build race
6
7 #include "go_asm.h"
8 #include "funcdata.h"
9 #include "textflag.h"
10 #include "cgo/abi_loong64.h"
11
12 // The following thunks allow calling the gcc-compiled race runtime directly
13 // from Go code without going all the way through cgo.
14 // First, it's much faster (up to 50% speedup for real Go programs).
15 // Second, it eliminates race-related special cases from cgocall and scheduler.
16 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
17
18 // A brief recap of the loong64 calling convention.
19 // Arguments are passed in R4...R11, the rest is on stack.
20 // Callee-saved registers are: R23...R30.
21 // Temporary registers are: R12...R20
22 // SP must be 16-byte aligned.
23
24 // When calling racecalladdr, R20 is the call target address.
25
26 // The race ctx, ThreadState *thr below, is passed in R4 and loaded in racecalladdr.
27
28 // Load g from TLS. (See tls_loong64.s)
29 #define load_g \
30 MOVV runtime·tls_g(SB), g
31
32 #define RARG0 R4
33 #define RARG1 R5
34 #define RARG2 R6
35 #define RARG3 R7
36 #define RCALL R20
37
38 // func runtime·raceread(addr uintptr)
39 // Called from instrumented code.
40 // Defined as ABIInternal so as to avoid introducing a wrapper,
41 // which would make caller's PC ineffective.
42 TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
43 MOVV R4, RARG1
44 MOVV R1, RARG2
45 // void __tsan_read(ThreadState *thr, void *addr, void *pc);
46 MOVV $__tsan_read(SB), RCALL
47 JMP racecalladdr<>(SB)
48
49 // func runtime·RaceRead(addr uintptr)
50 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
51 // This needs to be a tail call, because raceread reads caller pc.
52 JMP runtime·raceread(SB)
53
54 // func runtime·racereadpc(void *addr, void *callpc, void *pc)
55 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
56 MOVV addr+0(FP), RARG1
57 MOVV callpc+8(FP), RARG2
58 MOVV pc+16(FP), RARG3
59 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
60 MOVV $__tsan_read_pc(SB), RCALL
61 JMP racecalladdr<>(SB)
62
63 // func runtime·racewrite(addr uintptr)
64 // Called from instrumented code.
65 // Defined as ABIInternal so as to avoid introducing a wrapper,
66 // which would make caller's PC ineffective.
67 TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
68 MOVV R4, RARG1
69 MOVV R1, RARG2
70 // void __tsan_write(ThreadState *thr, void *addr, void *pc);
71 MOVV $__tsan_write(SB), RCALL
72 JMP racecalladdr<>(SB)
73
74 // func runtime·RaceWrite(addr uintptr)
75 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
76 // This needs to be a tail call, because racewrite reads caller pc.
77 JMP runtime·racewrite(SB)
78
79 // func runtime·racewritepc(void *addr, void *callpc, void *pc)
80 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
81 MOVV addr+0(FP), RARG1
82 MOVV callpc+8(FP), RARG2
83 MOVV pc+16(FP), RARG3
84 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
85 MOVV $__tsan_write_pc(SB), RCALL
86 JMP racecalladdr<>(SB)
87
88 // func runtime·racereadrange(addr, size uintptr)
89 // Called from instrumented code.
90 // Defined as ABIInternal so as to avoid introducing a wrapper,
91 // which would make caller's PC ineffective.
92 TEXT runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
93 MOVV R5, RARG2
94 MOVV R4, RARG1
95 MOVV R1, RARG3
96 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
97 MOVV $__tsan_read_range(SB), RCALL
98 JMP racecalladdr<>(SB)
99
100 // func runtime·RaceReadRange(addr, size uintptr)
101 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
102 // This needs to be a tail call, because racereadrange reads caller pc.
103 JMP runtime·racereadrange(SB)
104
105 // func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
106 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
107 MOVV addr+0(FP), RARG1
108 MOVV size+8(FP), RARG2
109 MOVV pc+16(FP), RARG3
110 ADDV $4, RARG3 // pc is function start, tsan wants return address.
111 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
112 MOVV $__tsan_read_range(SB), RCALL
113 JMP racecalladdr<>(SB)
114
115 // func runtime·racewriterange(addr, size uintptr)
116 // Called from instrumented code.
117 // Defined as ABIInternal so as to avoid introducing a wrapper,
118 // which would make caller's PC ineffective.
119 TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
120 MOVV R5, RARG2
121 MOVV R4, RARG1
122 MOVV R1, RARG3
123 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
124 MOVV $__tsan_write_range(SB), RCALL
125 JMP racecalladdr<>(SB)
126
127 // func runtime·RaceWriteRange(addr, size uintptr)
128 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
129 // This needs to be a tail call, because racewriterange reads caller pc.
130 JMP runtime·racewriterange(SB)
131
132 // func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
133 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
134 MOVV addr+0(FP), RARG1
135 MOVV size+8(FP), RARG2
136 MOVV pc+16(FP), RARG3
137 ADDV $4, RARG3 // pc is function start, tsan wants return address.
138 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
139 MOVV $__tsan_write_range(SB), RCALL
140 JMP racecalladdr<>(SB)
141
142 // Call a __tsan function from Go code.
143 //
144 // RCALL = tsan function address
145 // RARG0 = *ThreadState a.k.a. g_racectx from g
146 // RARG1 = addr passed to __tsan function
147 //
148 // If addr (RARG1) is out of range, do nothing. Otherwise, setup goroutine
149 // context and invoke racecall. Other arguments already set.
150 TEXT racecalladdr<>(SB), NOSPLIT, $0-0
151 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
152 MOVV runtime·racearenastart(SB), R12
153 BLT RARG1, R12, data
154 MOVV runtime·racearenaend(SB), R12
155 BLT RARG1, R12, call
156 data:
157 MOVV runtime·racedatastart(SB), R12
158 BLT RARG1, R12, ret
159 MOVV runtime·racedataend(SB), R12
160 BGE RARG1, R12, ret
161 call:
162 load_g
163 MOVV g_racectx(g), RARG0
164 JMP racecall<>(SB)
165 ret:
166 RET
167
168 // func runtime·racefuncenter(pc uintptr)
169 // Called from instrumented code.
170 TEXT runtime·racefuncenter<ABIInternal>(SB), NOSPLIT, $0-8
171 MOVV R4, RCALL
172 JMP racefuncenter<>(SB)
173
174 // Common code for racefuncenter
175 // RCALL = caller's return address
176 TEXT racefuncenter<>(SB), NOSPLIT, $0-0
177 load_g
178 MOVV g_racectx(g), RARG0 // goroutine racectx
179 MOVV RCALL, RARG1
180 // void __tsan_func_enter(ThreadState *thr, void *pc);
181 MOVV $__tsan_func_enter(SB), RCALL
182 JAL racecall<>(SB)
183 RET
184
185 // func runtime·racefuncexit()
186 // Called from instrumented code.
187 TEXT runtime·racefuncexit<ABIInternal>(SB), NOSPLIT, $0-0
188 load_g
189 MOVV g_racectx(g), RARG0 // race context
190 // void __tsan_func_exit(ThreadState *thr);
191 MOVV $__tsan_func_exit(SB), RCALL
192 JMP racecall<>(SB)
193
194 // Atomic operations for sync/atomic package.
195 // R7 = addr of arguments passed to this function, it can
196 // be fetched at 24(R3) in racecallatomic after two times JAL
197 // RARG0, RARG1, RARG2 set in racecallatomic
198
199 // Load
200 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
201 GO_ARGS
202 MOVV $__tsan_go_atomic32_load(SB), RCALL
203 JAL racecallatomic<>(SB)
204 RET
205
206 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
207 GO_ARGS
208 MOVV $__tsan_go_atomic64_load(SB), RCALL
209 JAL racecallatomic<>(SB)
210 RET
211
212 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
213 GO_ARGS
214 JMP sync∕atomic·LoadInt32(SB)
215
216 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
217 GO_ARGS
218 JMP sync∕atomic·LoadInt64(SB)
219
220 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
221 GO_ARGS
222 JMP sync∕atomic·LoadInt64(SB)
223
224 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
225 GO_ARGS
226 JMP sync∕atomic·LoadInt64(SB)
227
228 // Store
229 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
230 GO_ARGS
231 MOVV $__tsan_go_atomic32_store(SB), RCALL
232 JAL racecallatomic<>(SB)
233 RET
234
235 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
236 GO_ARGS
237 MOVV $__tsan_go_atomic64_store(SB), RCALL
238 JAL racecallatomic<>(SB)
239 RET
240
241 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
242 GO_ARGS
243 JMP sync∕atomic·StoreInt32(SB)
244
245 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
246 GO_ARGS
247 JMP sync∕atomic·StoreInt64(SB)
248
249 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
250 GO_ARGS
251 JMP sync∕atomic·StoreInt64(SB)
252
253 // Swap
254 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
255 GO_ARGS
256 MOVV $__tsan_go_atomic32_exchange(SB), RCALL
257 JAL racecallatomic<>(SB)
258 RET
259
260 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
261 GO_ARGS
262 MOVV $__tsan_go_atomic64_exchange(SB), RCALL
263 JAL racecallatomic<>(SB)
264 RET
265
266 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
267 GO_ARGS
268 JMP sync∕atomic·SwapInt32(SB)
269
270 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
271 GO_ARGS
272 JMP sync∕atomic·SwapInt64(SB)
273
274 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
275 GO_ARGS
276 JMP sync∕atomic·SwapInt64(SB)
277
278 // Add
279 TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
280 GO_ARGS
281 MOVV $__tsan_go_atomic32_fetch_add(SB), RCALL
282 JAL racecallatomic<>(SB)
283 MOVW add+8(FP), RARG0 // convert fetch_add to add_fetch
284 MOVW ret+16(FP), RARG1
285 ADD RARG0, RARG1, RARG0
286 MOVW RARG0, ret+16(FP)
287 RET
288
289 TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
290 GO_ARGS
291 MOVV $__tsan_go_atomic64_fetch_add(SB), RCALL
292 JAL racecallatomic<>(SB)
293 MOVV add+8(FP), RARG0 // convert fetch_add to add_fetch
294 MOVV ret+16(FP), RARG1
295 ADDV RARG0, RARG1, RARG0
296 MOVV RARG0, ret+16(FP)
297 RET
298
299 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
300 GO_ARGS
301 JMP sync∕atomic·AddInt32(SB)
302
303 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
304 GO_ARGS
305 JMP sync∕atomic·AddInt64(SB)
306
307 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
308 GO_ARGS
309 JMP sync∕atomic·AddInt64(SB)
310
311 // And
312 TEXT sync∕atomic·AndInt32(SB), NOSPLIT, $0-20
313 GO_ARGS
314 MOVV $__tsan_go_atomic32_fetch_and(SB), RCALL
315 JAL racecallatomic<>(SB)
316 RET
317
318 TEXT sync∕atomic·AndInt64(SB), NOSPLIT, $0-24
319 GO_ARGS
320 MOVV $__tsan_go_atomic64_fetch_and(SB), RCALL
321 JAL racecallatomic<>(SB)
322 RET
323
324 TEXT sync∕atomic·AndUint32(SB), NOSPLIT, $0-20
325 GO_ARGS
326 JMP sync∕atomic·AndInt32(SB)
327
328 TEXT sync∕atomic·AndUint64(SB), NOSPLIT, $0-24
329 GO_ARGS
330 JMP sync∕atomic·AndInt64(SB)
331
332 TEXT sync∕atomic·AndUintptr(SB), NOSPLIT, $0-24
333 GO_ARGS
334 JMP sync∕atomic·AndInt64(SB)
335
336 // Or
337 TEXT sync∕atomic·OrInt32(SB), NOSPLIT, $0-20
338 GO_ARGS
339 MOVV $__tsan_go_atomic32_fetch_or(SB), RCALL
340 JAL racecallatomic<>(SB)
341 RET
342
343 TEXT sync∕atomic·OrInt64(SB), NOSPLIT, $0-24
344 GO_ARGS
345 MOVV $__tsan_go_atomic64_fetch_or(SB), RCALL
346 JAL racecallatomic<>(SB)
347 RET
348
349 TEXT sync∕atomic·OrUint32(SB), NOSPLIT, $0-20
350 GO_ARGS
351 JMP sync∕atomic·OrInt32(SB)
352
353 TEXT sync∕atomic·OrUint64(SB), NOSPLIT, $0-24
354 GO_ARGS
355 JMP sync∕atomic·OrInt64(SB)
356
357 TEXT sync∕atomic·OrUintptr(SB), NOSPLIT, $0-24
358 GO_ARGS
359 JMP sync∕atomic·OrInt64(SB)
360
361 // CompareAndSwap
362 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
363 GO_ARGS
364 MOVV $__tsan_go_atomic32_compare_exchange(SB), RCALL
365 JAL racecallatomic<>(SB)
366 RET
367
368 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
369 GO_ARGS
370 MOVV $__tsan_go_atomic64_compare_exchange(SB), RCALL
371 JAL racecallatomic<>(SB)
372 RET
373
374 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
375 GO_ARGS
376 JMP sync∕atomic·CompareAndSwapInt32(SB)
377
378 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
379 GO_ARGS
380 JMP sync∕atomic·CompareAndSwapInt64(SB)
381
382 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
383 GO_ARGS
384 JMP sync∕atomic·CompareAndSwapInt64(SB)
385
386 // Generic atomic operation implementation.
387 // RCALL = addr of target function
388 TEXT racecallatomic<>(SB), NOSPLIT, $0
389 // Set up these registers
390 // RARG0 = *ThreadState
391 // RARG1 = caller pc
392 // RARG2 = pc
393 // RARG3 = addr of incoming arg list
394
395 // Trigger SIGSEGV early.
396 MOVV 24(R3), RARG3 // 1st arg is addr. after two times JAL, get it at 24(R3)
397 MOVB (RARG3), R12 // segv here if addr is bad
398
399 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
400 MOVV runtime·racearenastart(SB), R12
401 BLT RARG3, R12, racecallatomic_data
402 MOVV runtime·racearenaend(SB), R12
403 BLT RARG3, R12, racecallatomic_ok
404
405 racecallatomic_data:
406 MOVV runtime·racedatastart(SB), R12
407 BLT RARG3, R12, racecallatomic_ignore
408 MOVV runtime·racedataend(SB), R12
409 BGE RARG3, R12, racecallatomic_ignore
410
411 racecallatomic_ok:
412 // Addr is within the good range, call the atomic function.
413 load_g
414 MOVV g_racectx(g), RARG0 // goroutine context
415 MOVV 8(R3), RARG1 // caller pc
416 MOVV RCALL, RARG2 // pc
417 ADDV $24, R3, RARG3
418 JAL racecall<>(SB) // does not return
419 RET
420
421 racecallatomic_ignore:
422 // Addr is outside the good range.
423 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
424 // An attempt to synchronize on the address would cause crash.
425 MOVV RCALL, R25 // remember the original function
426 MOVV $__tsan_go_ignore_sync_begin(SB), RCALL
427 load_g
428 MOVV g_racectx(g), RARG0 // goroutine context
429 JAL racecall<>(SB)
430 MOVV R25, RCALL // restore the original function
431
432 // Call the atomic function.
433 // racecall will call LLVM race code which might clobber R22 (g)
434 load_g
435 MOVV g_racectx(g), RARG0 // goroutine context
436 MOVV 8(R3), RARG1 // caller pc
437 MOVV RCALL, RARG2 // pc
438 ADDV $24, R3, RARG3 // arguments
439 JAL racecall<>(SB)
440
441 // Call __tsan_go_ignore_sync_end.
442 MOVV $__tsan_go_ignore_sync_end(SB), RCALL
443 MOVV g_racectx(g), RARG0 // goroutine context
444 JAL racecall<>(SB)
445 RET
446
447 // func runtime·racecall(void(*f)(...), ...)
448 // Calls C function f from race runtime and passes up to 4 arguments to it.
449 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
450 TEXT runtime·racecall(SB), NOSPLIT, $0-0
451 MOVV fn+0(FP), RCALL
452 MOVV arg0+8(FP), RARG0
453 MOVV arg1+16(FP), RARG1
454 MOVV arg2+24(FP), RARG2
455 MOVV arg3+32(FP), RARG3
456 JMP racecall<>(SB)
457
458 // Switches SP to g0 stack and calls (RCALL). Arguments already set.
459 TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0
460 MOVV g_m(g), R12
461 // Switch to g0 stack.
462 MOVV R3, R23 // callee-saved, preserved across the CALL
463 MOVV R1, R24 // callee-saved, preserved across the CALL
464 MOVV m_g0(R12), R13
465 BEQ R13, g, call // already on g0
466 MOVV (g_sched+gobuf_sp)(R13), R3
467 call:
468 JAL (RCALL)
469 MOVV R23, R3
470 JAL (R24)
471 RET
472
473 // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
474 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
475 // The overall effect of Go->C->Go call chain is similar to that of mcall.
476 // RARG0 contains command code. RARG1 contains command-specific context.
477 // See racecallback for command codes.
478 TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
479 // Handle command raceGetProcCmd (0) here.
480 // First, code below assumes that we are on curg, while raceGetProcCmd
481 // can be executed on g0. Second, it is called frequently, so will
482 // benefit from this fast path.
483 BNE RARG0, R0, rest
484 MOVV g, R15
485 load_g
486 MOVV g_m(g), RARG0
487 MOVV m_p(RARG0), RARG0
488 MOVV p_raceprocctx(RARG0), RARG0
489 MOVV RARG0, (RARG1)
490 MOVV R15, g
491 JMP (R1)
492 rest:
493 // Save callee-saved registers (Go code won't respect that).
494 // 8(R3) and 16(R3) are for args passed through racecallback
495 ADDV $-176, R3
496 MOVV R1, 0(R3)
497
498 SAVE_R22_TO_R31(8*3)
499 SAVE_F24_TO_F31(8*13)
500 // Set g = g0.
501 load_g
502 MOVV g_m(g), R15
503 MOVV m_g0(R15), R14
504 BEQ R14, g, noswitch // branch if already on g0
505 MOVV R14, g
506
507 JAL runtime·racecallback<ABIInternal>(SB)
508 // All registers are smashed after Go code, reload.
509 MOVV g_m(g), R15
510 MOVV m_curg(R15), g // g = m->curg
511 ret:
512 // Restore callee-saved registers.
513 MOVV 0(R3), R1
514 RESTORE_F24_TO_F31(8*13)
515 RESTORE_R22_TO_R31(8*3)
516 ADDV $176, R3
517 JMP (R1)
518
519 noswitch:
520 // already on g0
521 JAL runtime·racecallback<ABIInternal>(SB)
522 JMP ret
523
524 // tls_g, g value for each thread in TLS
525 GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
526
View as plain text