internal/runtime/syscall.Syscall6
/usr/lib/go/src/internal/runtime/syscall/asm_linux_arm64.s
Total: 6.47s 6.47s (flat, cum) 17.23% 9 . . MOVD num+0(FP), R8 // syscall entry 10 . . MOVD a1+8(FP), R0 11 . . MOVD a2+16(FP), R1 12 . . MOVD a3+24(FP), R2 13 . . MOVD a4+32(FP), R3 14 10ms 10ms MOVD a5+40(FP), R4 15 . . MOVD a6+48(FP), R5 16 . . SVC 17 6.43s 6.43s CMN $4095, R0 18 . . BCC ok 19 . . MOVD $-1, R4 20 . . MOVD R4, r1+56(FP) 21 . . MOVD ZR, r2+64(FP) 22 . . NEG R0, R0 23 . . MOVD R0, errno+72(FP) 24 . . RET 25 . . ok: 26 20ms 20ms MOVD R0, r1+56(FP) 27 10ms 10ms MOVD R1, r2+64(FP) 28 . . MOVD ZR, errno+72(FP) 29 . . RET
runtime.usleep
/usr/lib/go/src/runtime/sys_linux_arm64.s
Total: 230ms 230ms (flat, cum) 0.61% 134 . . // nanosleep(&ts, 0) 135 . . ADD $8, RSP, R0 136 . . MOVD $0, R1 137 . . MOVD $SYS_nanosleep, R8 138 . . SVC 139 230ms 230ms RET 140 . . 141 . . TEXT runtime·gettid(SB),NOSPLIT,$0-4 142 . . MOVD $SYS_gettid, R8 143 . . SVC 144 . . MOVW R0, ret+0(FP)
runtime.nanotime1
/usr/lib/go/src/runtime/sys_linux_arm64.s
Total: 10ms 10ms (flat, cum) 0.027% 316 . . 317 . . // Set vdsoPC and vdsoSP for SIGPROF traceback. 318 . . // Save the old values on stack and restore them on exit, 319 . . // so this function is reentrant. 320 . . MOVD m_vdsoPC(R21), R2 321 10ms 10ms MOVD m_vdsoSP(R21), R3 322 . . MOVD R2, 8(RSP) 323 . . MOVD R3, 16(RSP) 324 . . 325 . . MOVD $ret-8(FP), R2 // caller's SP 326 . . MOVD LR, m_vdsoPC(R21)
runtime.sysMmap
/usr/lib/go/src/runtime/sys_linux_arm64.s
Total: 10ms 10ms (flat, cum) 0.027% 578 . . MOVW fd+24(FP), R4 579 . . MOVW off+28(FP), R5 580 . . 581 . . MOVD $SYS_mmap, R8 582 . . SVC 583 10ms 10ms CMN $4095, R0 584 . . BCC ok 585 . . NEG R0,R0 586 . . MOVD $0, p+32(FP) 587 . . MOVD R0, err+40(FP) 588 . . RET
runtime.madvise
/usr/lib/go/src/runtime/sys_linux_arm64.s
Total: 30ms 30ms (flat, cum) 0.08% 633 . . MOVD addr+0(FP), R0 634 . . MOVD n+8(FP), R1 635 . . MOVW flags+16(FP), R2 636 . . MOVD $SYS_madvise, R8 637 . . SVC 638 30ms 30ms MOVW R0, ret+24(FP) 639 . . RET 640 . .
runtime.futex
/usr/lib/go/src/runtime/sys_linux_arm64.s
Total: 3.35s 3.35s (flat, cum) 8.92% 642 . . // struct timespec *timeout, int32 *uaddr2, int32 val2); 643 . . TEXT runtime·futex(SB),NOSPLIT|NOFRAME,$0 644 10ms 10ms MOVD addr+0(FP), R0 645 . . MOVW op+8(FP), R1 646 . . MOVW val+12(FP), R2 647 . . MOVD ts+16(FP), R3 648 . . MOVD addr2+24(FP), R4 649 . . MOVW val3+32(FP), R5 650 . . MOVD $SYS_futex, R8 651 . . SVC 652 3.34s 3.34s MOVW R0, ret+40(FP) 653 . . RET 654 . . 655 . . // int64 clone(int32 flags, void *stk, M *mp, G *gp, void (*fn)(void)); 656 . . TEXT runtime·clone(SB),NOSPLIT|NOFRAME,$0 657 . . MOVW flags+0(FP), R0
runtime.osyield
/usr/lib/go/src/runtime/sys_linux_arm64.s
Total: 130ms 130ms (flat, cum) 0.35% 734 . . RET 735 . . 736 . . TEXT runtime·osyield(SB),NOSPLIT|NOFRAME,$0 737 . . MOVD $SYS_sched_yield, R8 738 . . SVC 739 130ms 130ms RET 740 . . 741 . . TEXT runtime·sched_getaffinity(SB),NOSPLIT|NOFRAME,$0 742 . . MOVD pid+0(FP), R0 743 . . MOVD len+8(FP), R1 744 . . MOVD buf+16(FP), R2
runtime.nextFreeFast
/usr/lib/go/src/runtime/malloc.go
Total: 990ms 990ms (flat, cum) 2.64% 927 . . var zerobase uintptr 928 . . 929 . . // nextFreeFast returns the next free object if one is quickly available. 930 . . // Otherwise it returns 0. 931 . . func nextFreeFast(s *mspan) gclinkptr { 932 640ms 640ms theBit := sys.TrailingZeros64(s.allocCache) // Is there a free object in the allocCache? 933 . . if theBit < 64 { 934 120ms 120ms result := s.freeindex + uint16(theBit) 935 . . if result < s.nelems { 936 70ms 70ms freeidx := result + 1 937 . . if freeidx%64 == 0 && freeidx != s.nelems { 938 . . return 0 939 . . } 940 20ms 20ms s.allocCache >>= uint(theBit + 1) 941 40ms 40ms s.freeindex = freeidx 942 40ms 40ms s.allocCount++ 943 60ms 60ms return gclinkptr(uintptr(result)*s.elemsize + s.base()) ⋮ ⋮ return s.startAddr mheap.go:523 ⋮ 944 . . } 945 . . } 946 . . return 0 947 . . } 948 . .
runtime.(*mcache).nextFree
/usr/lib/go/src/runtime/malloc.go
Total: 0 2.01s (flat, cum) 5.35% 953 . . // determine whether a new GC cycle needs to be started or if the GC is active 954 . . // whether this goroutine needs to assist the GC. 955 . . // 956 . . // Must run in a non-preemptible context since otherwise the owner of 957 . . // c could change. 958 . 40ms func (c *mcache) nextFree(spc spanClass) (v gclinkptr, s *mspan, checkGCTrigger bool) { 959 . . s = c.alloc[spc] 960 . . checkGCTrigger = false 961 . 130ms freeIndex := s.nextFreeIndex() 962 . . if freeIndex == s.nelems { 963 . . // The span is full. 964 . . if s.allocCount != s.nelems { 965 . . println("runtime: s.allocCount=", s.allocCount, "s.nelems=", s.nelems) 966 . . throw("s.allocCount != s.nelems && freeIndex == s.nelems") 967 . . } 968 . 1.82s c.refill(spc) 969 . . checkGCTrigger = true 970 . . s = c.alloc[spc] 971 . . 972 . 20ms freeIndex = s.nextFreeIndex() 973 . . } 974 . . 975 . . if freeIndex >= s.nelems { 976 . . throw("freeIndex is not valid") 977 . . }
runtime.mallocgc
/usr/lib/go/src/runtime/malloc.go
Total: 460ms 8.24s (flat, cum) 21.95% 1009 . . // 1010 . . // Do not remove or change the type signature. 1011 . . // See go.dev/issue/67401. 1012 . . // 1013 . . //go:linkname mallocgc 1014 120ms 950ms func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { 1015 . . if doubleCheckMalloc { 1016 . . if gcphase == _GCmarktermination { 1017 . . throw("mallocgc called with gcphase == _GCmarktermination") 1018 . . } 1019 . . } 1020 . . 1021 . . // Short-circuit zero-sized allocation requests. 1022 70ms 70ms if size == 0 { 1023 . . return unsafe.Pointer(&zerobase) 1024 . . } 1025 . . 1026 . . // It's possible for any malloc to trigger sweeping, which may in 1027 . . // turn queue finalizers. Record this dynamic lock edge. 1028 . . // N.B. Compiled away if lockrank experiment is not enabled. 1029 . . lockRankMayQueueFinalizer() 1030 . . 1031 . . // Pre-malloc debug hooks. 1032 90ms 90ms if debug.malloc { 1033 . . if x := preMallocgcDebug(size, typ); x != nil { 1034 . . return x 1035 . . } 1036 . . } 1037 . . 1038 . . // For ASAN, we allocate extra memory around each allocation called the "redzone." 1039 . . // These "redzones" are marked as unaddressable. 1040 . . var asanRZ uintptr 1041 . . if asanenabled { 1042 . . asanRZ = redZoneSize(size) 1043 . . size += asanRZ 1044 . . } 1045 . . 1046 . . // Assist the GC if needed. 1047 50ms 50ms if gcBlackenEnabled != 0 { 1048 . . deductAssistCredit(size) 1049 . . } 1050 . . 1051 . . // Actually do the allocation. 1052 . . var x unsafe.Pointer 1053 . . var elemsize uintptr 1054 . . if size <= maxSmallSize-gc.MallocHeaderSize { 1055 60ms 60ms if typ == nil || !typ.Pointers() { ⋮ func (t *Type) Pointers() bool { return t.PtrBytes != 0 } type.go:200 1056 20ms 20ms if size < maxTinySize { 1057 . 220ms x, elemsize = mallocgcTiny(size, typ) 1058 . . } else { 1059 10ms 670ms x, elemsize = mallocgcSmallNoscan(size, typ, needzero) 1060 . . } 1061 . . } else { 1062 . . if !needzero { 1063 . . throw("objects with pointers must be zeroed") 1064 . . } 1065 . . if heapBitsInSpan(size) { 1066 40ms 5.83s x, elemsize = mallocgcSmallScanNoHeader(size, typ) 1067 . . } else { 1068 . 280ms x, elemsize = mallocgcSmallScanHeader(size, typ) 1069 . . } 1070 . . } 1071 . . } else { 1072 . . x, elemsize = mallocgcLarge(size, typ, needzero) 1073 . . }
runtime.mallocgc
/usr/lib/go/src/runtime/malloc.go
Total: 100ms 100ms (flat, cum) 0.27% 1092 . . if valgrindenabled { 1093 . . valgrindMalloc(x, size-asanRZ) 1094 . . } 1095 . . 1096 . . // Adjust our GC assist debt to account for internal fragmentation. 1097 40ms 40ms if gcBlackenEnabled != 0 && elemsize != 0 { 1098 . . if assistG := getg().m.curg; assistG != nil { 1099 . . assistG.gcAssistBytes -= int64(elemsize - size) 1100 . . } 1101 . . } 1102 . . 1103 . . // Post-malloc debug hooks. 1104 50ms 50ms if debug.malloc { 1105 . . postMallocgcDebug(x, elemsize, typ) 1106 . . } 1107 10ms 10ms return x 1108 . . } 1109 . . 1110 . . func mallocgcTiny(size uintptr, typ *_type) (unsafe.Pointer, uintptr) { 1111 . . // Set mp.mallocing to keep from being preempted by GC. 1112 . . mp := acquirem()
runtime.mallocgcTiny
/usr/lib/go/src/runtime/malloc.go
Total: 120ms 190ms (flat, cum) 0.51% 1150 . . // 1151 . . // The main targets of tiny allocator are small strings and 1152 . . // standalone escaping variables. On a json benchmark 1153 . . // the allocator reduces number of allocations by ~12% and 1154 . . // reduces heap size by ~20%. 1155 10ms 10ms c := getMCache(mp) c = pp.mcache mcache.go:139 1156 . . off := c.tinyoffset 1157 . . // Align tiny pointer for required (conservative) alignment. 1158 10ms 10ms if size&7 == 0 { 1159 . . off = alignUp(off, 8) 1160 . . } else if goarch.PtrSize == 4 && size == 12 { 1161 . . // Conservatively align 12-byte objects to 8 bytes on 32-bit 1162 . . // systems so that objects whose first field is a 64-bit 1163 . . // value is aligned to 8 bytes and does not cause a fault on 1164 . . // atomic access. See issue 37262. 1165 . . // TODO(mknyszek): Remove this workaround if/when issue 36606 1166 . . // is resolved. 1167 . . off = alignUp(off, 8) 1168 . . } else if size&3 == 0 { 1169 . . off = alignUp(off, 4) 1170 . . } else if size&1 == 0 { 1171 . . off = alignUp(off, 2) 1172 . . } 1173 30ms 30ms if off+size <= maxTinySize && c.tiny != 0 { 1174 . . // The object fits into existing tiny block. 1175 . . x := unsafe.Pointer(c.tiny + off) 1176 . . c.tinyoffset = off + size 1177 . . c.tinyAllocs++ 1178 . . mp.mallocing = 0 1179 10ms 10ms releasem(mp) mp.locks-- runtime1.go:638 1180 . . return x, 0 1181 . . } 1182 . . // Allocate a new maxTinySize block. 1183 . . checkGCTrigger := false 1184 . . span := c.alloc[tinySpanClass] 1185 30ms 30ms v := nextFreeFast(span) s.allocCache >>= uint(theBit + 1) malloc.go:940 ⋮ result := s.freeindex + uint16(theBit) malloc.go:934 ⋮ theBit := sys.TrailingZeros64(s.allocCache) // Is there a free object in the allocCache? malloc.go:932 1186 . . if v == 0 { 1187 . 70ms v, span, checkGCTrigger = c.nextFree(tinySpanClass) 1188 . . } 1189 . . x := unsafe.Pointer(v) 1190 . . (*[2]uint64)(x)[0] = 0 // Always zero 1191 . . (*[2]uint64)(x)[1] = 0 1192 . . // See if we need to replace the existing tiny block with the new one 1193 . . // based on amount of remaining free space. 1194 . . if !raceenabled && (size < c.tinyoffset || c.tiny == 0) { 1195 . . // Note: disabled when race detector is on, see comment near end of this function. 1196 . . c.tiny = uintptr(x) 1197 10ms 10ms c.tinyoffset = size 1198 . . } 1199 . . 1200 . . // Ensure that the stores above that initialize x to 1201 . . // type-safe memory and set the heap bits occur before 1202 . . // the caller can make x observable to the garbage 1203 . . // collector. Otherwise, on weakly ordered machines, 1204 . . // the garbage collector could follow a pointer to x, 1205 . . // but see uninitialized memory or stale heap bits. 1206 . . publicationBarrier() 1207 . . 1208 20ms 20ms if writeBarrier.enabled { 1209 . . // Allocate black during GC. 1210 . . // All slots hold nil so no scanning is needed. 1211 . . // This may be racing with GC so do it atomically if there can be 1212 . . // a race marking the bit. 1213 . . gcmarknewobject(span, uintptr(x))
runtime.mallocgcTiny
/usr/lib/go/src/runtime/malloc.go
Total: 20ms 20ms (flat, cum) 0.053% 1230 . . // 1231 . . // TODO(mknyszek): We should really count the header as part 1232 . . // of gc_sys or something. The code below just pretends it is 1233 . . // internal fragmentation and matches the GC's accounting by 1234 . . // using the whole allocation slot. 1235 10ms 10ms c.nextSample -= int64(span.elemsize) 1236 . . if c.nextSample < 0 || MemProfileRate != c.memProfRate { 1237 . . profilealloc(mp, x, span.elemsize) 1238 . . } 1239 . . mp.mallocing = 0 1240 10ms 10ms releasem(mp) mp.locks-- runtime1.go:638 1241 . . 1242 . . if checkGCTrigger { 1243 . . if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { 1244 . . gcStart(t) 1245 . . }
runtime.mallocgcTiny
/usr/lib/go/src/runtime/malloc.go
Total: 10ms 10ms (flat, cum) 0.027% 1258 . . // TODO: enable this padding for all allocations, not just 1259 . . // tinyalloc ones. It's tricky because of pointer maps. 1260 . . // Maybe just all noscan objects? 1261 . . x = add(x, span.elemsize-size) 1262 . . } 1263 10ms 10ms return x, span.elemsize 1264 . . } 1265 . . 1266 . . func mallocgcSmallNoscan(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) { 1267 . . // Set mp.mallocing to keep from being preempted by GC. 1268 . . mp := acquirem()
runtime.mallocgcSmallNoscan
/usr/lib/go/src/runtime/malloc.go
Total: 220ms 560ms (flat, cum) 1.49% 1278 . . } 1279 . . } 1280 . . mp.mallocing = 1 1281 . . 1282 . . checkGCTrigger := false 1283 10ms 10ms c := getMCache(mp) if pp == nil { mcache.go:132 1284 . . var sizeclass uint8 1285 . . if size <= gc.SmallSizeMax-8 { 1286 . . sizeclass = gc.SizeToSizeClass8[divRoundUp(size, gc.SmallSizeDiv)] 1287 . . } else { 1288 . . sizeclass = gc.SizeToSizeClass128[divRoundUp(size-gc.SmallSizeMax, gc.LargeSizeDiv)] 1289 . . } 1290 . . size = uintptr(gc.SizeClassToSize[sizeclass]) 1291 30ms 30ms spc := makeSpanClass(sizeclass, true) ⋮ return spanClass(sizeclass<<1) | spanClass(bool2int(noscan)) mheap.go:594 1292 . . span := c.alloc[spc] 1293 170ms 170ms v := nextFreeFast(span) result := s.freeindex + uint16(theBit) malloc.go:934 ⋮ s.allocCount++ malloc.go:942 ⋮ theBit := sys.TrailingZeros64(s.allocCache) // Is there a free object in the allocCache? malloc.go:932 ⋮ freeidx := result + 1 malloc.go:936 1294 . . if v == 0 { 1295 . 260ms v, span, checkGCTrigger = c.nextFree(spc) 1296 . . } 1297 . . x := unsafe.Pointer(v) 1298 . . if needzero && span.needzero != 0 { 1299 . 80ms memclrNoHeapPointers(x, size) 1300 . . } 1301 . . 1302 . . // Ensure that the stores above that initialize x to 1303 . . // type-safe memory and set the heap bits occur before 1304 . . // the caller can make x observable to the garbage 1305 . . // collector. Otherwise, on weakly ordered machines, 1306 . . // the garbage collector could follow a pointer to x, 1307 . . // but see uninitialized memory or stale heap bits. 1308 . . publicationBarrier() 1309 . . 1310 10ms 10ms if writeBarrier.enabled { 1311 . . // Allocate black during GC. 1312 . . // All slots hold nil so no scanning is needed. 1313 . . // This may be racing with GC so do it atomically if there can be 1314 . . // a race marking the bit. 1315 . . gcmarknewobject(span, uintptr(x))
runtime.mallocgcSmallNoscan
/usr/lib/go/src/runtime/malloc.go
Total: 40ms 100ms (flat, cum) 0.27% 1333 . . // TODO(mknyszek): We should really count the header as part 1334 . . // of gc_sys or something. The code below just pretends it is 1335 . . // internal fragmentation and matches the GC's accounting by 1336 . . // using the whole allocation slot. 1337 . . c.nextSample -= int64(size) 1338 20ms 20ms if c.nextSample < 0 || MemProfileRate != c.memProfRate { 1339 . 40ms profilealloc(mp, x, size) 1340 . . } 1341 . . mp.mallocing = 0 1342 10ms 10ms releasem(mp) mp.locks-- runtime1.go:638 1343 . . 1344 . . if checkGCTrigger { 1345 . 20ms if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { 1346 . . gcStart(t) 1347 . . } 1348 . . } 1349 10ms 10ms return x, size 1350 . . }
runtime.mallocgcSmallScanNoHeader
/usr/lib/go/src/runtime/malloc.go
Total: 1.91s 5.79s (flat, cum) 15.42% 1351 . . 1352 40ms 1.04s func mallocgcSmallScanNoHeader(size uintptr, typ *_type) (unsafe.Pointer, uintptr) { 1353 . . // Set mp.mallocing to keep from being preempted by GC. 1354 120ms 120ms mp := acquirem() return gp.m runtime1.go:632 ⋮ ⋮ gp.m.locks++ runtime1.go:631 1355 . . if doubleCheckMalloc { 1356 . . if mp.mallocing != 0 { 1357 . . throw("malloc deadlock") 1358 . . } 1359 . . if mp.gsignal == getg() { 1360 . . throw("malloc during signal") 1361 . . } 1362 . . if typ == nil || !typ.Pointers() { 1363 . . throw("noscan allocated in scan-only path") 1364 . . } 1365 . . if !heapBitsInSpan(size) { 1366 . . throw("heap bits in not in span for non-header-only path") 1367 . . } 1368 . . } 1369 10ms 10ms mp.mallocing = 1 1370 . . 1371 . . checkGCTrigger := false 1372 100ms 100ms c := getMCache(mp) c = pp.mcache mcache.go:139 ⋮ pp := mp.p.ptr() mcache.go:130 ⋮ 1373 60ms 60ms sizeclass := gc.SizeToSizeClass8[divRoundUp(size, gc.SmallSizeDiv)] 1374 90ms 90ms spc := makeSpanClass(sizeclass, false) return spanClass(sizeclass<<1) | spanClass(bool2int(noscan)) mheap.go:594 ⋮ ⋮ return spanClass(sizeclass<<1) | spanClass(bool2int(noscan)) mheap.go:594 return int(*(*uint8)(unsafe.Pointer(&x))) stubs.go:394 1375 50ms 50ms span := c.alloc[spc] 1376 740ms 740ms v := nextFreeFast(span) return gclinkptr(uintptr(result)*s.elemsize + s.base()) malloc.go:943 ⋮ freeidx := result + 1 malloc.go:936 ⋮ s.allocCache >>= uint(theBit + 1) malloc.go:940 ⋮ return gclinkptr(uintptr(result)*s.elemsize + s.base()) malloc.go:943 return s.startAddr mheap.go:523 ⋮ theBit := sys.TrailingZeros64(s.allocCache) // Is there a free object in the allocCache? malloc.go:932 ⋮ ⋮ s.allocCount++ malloc.go:942 ⋮ result := s.freeindex + uint16(theBit) malloc.go:934 ⋮ ⋮ return gclinkptr(uintptr(result)*s.elemsize + s.base()) malloc.go:943 ⋮ s.freeindex = freeidx malloc.go:941 1377 20ms 20ms if v == 0 { 1378 . 1.56s v, span, checkGCTrigger = c.nextFree(spc) 1379 . . } 1380 10ms 10ms x := unsafe.Pointer(v) 1381 . . if span.needzero != 0 { 1382 10ms 400ms memclrNoHeapPointers(x, size) 1383 . . } 1384 10ms 10ms if goarch.PtrSize == 8 && sizeclass == 1 { 1385 . . // initHeapBits already set the pointer bits for the 8-byte sizeclass 1386 . . // on 64-bit platforms. 1387 10ms 10ms c.scanAlloc += 8 1388 . . } else { 1389 170ms 1s c.scanAlloc += heapSetTypeNoHeader(uintptr(x), size, typ, span) ⋮ scanSize := span.writeHeapBitsSmall(x, dataSize, typ) mbitmap.go:709 ⋮ ⋮ ⋮ scanSize := span.writeHeapBitsSmall(x, dataSize, typ) mbitmap.go:709 ⋮ 1390 . . } 1391 50ms 50ms size = uintptr(gc.SizeClassToSize[sizeclass]) 1392 . . 1393 . . // Ensure that the stores above that initialize x to 1394 . . // type-safe memory and set the heap bits occur before 1395 . . // the caller can make x observable to the garbage 1396 . . // collector. Otherwise, on weakly ordered machines, 1397 . . // the garbage collector could follow a pointer to x, 1398 . . // but see uninitialized memory or stale heap bits. 1399 20ms 20ms publicationBarrier() 1400 . . 1401 40ms 40ms if writeBarrier.enabled { 1402 . . // Allocate black during GC. 1403 . . // All slots hold nil so no scanning is needed. 1404 . . // This may be racing with GC so do it atomically if there can be 1405 . . // a race marking the bit. 1406 . . gcmarknewobject(span, uintptr(x)) 1407 . . } else { 1408 . . // Track the last free index before the mark phase. This field 1409 . . // is only used by the garbage collector. During the mark phase 1410 . . // this is used by the conservative scanner to filter out objects 1411 . . // that are both free and recently-allocated. It's safe to do that 1412 . . // because we allocate-black if the GC is enabled. The conservative 1413 . . // scanner produces pointers out of thin air, so without additional 1414 . . // synchronization it might otherwise observe a partially-initialized 1415 . . // object, which could crash the program. 1416 50ms 50ms span.freeIndexForScan = span.freeindex 1417 . . } 1418 . . 1419 . . // Note cache c only valid while m acquired; see #47302 1420 . . // 1421 . . // N.B. Use the full size because that matches how the GC 1422 . . // will update the mem profile on the "free" side. 1423 . . // 1424 . . // TODO(mknyszek): We should really count the header as part 1425 . . // of gc_sys or something. The code below just pretends it is 1426 . . // internal fragmentation and matches the GC's accounting by 1427 . . // using the whole allocation slot. 1428 20ms 20ms c.nextSample -= int64(size) 1429 10ms 10ms if c.nextSample < 0 || MemProfileRate != c.memProfRate { 1430 100ms 150ms profilealloc(mp, x, size) 1431 . . } 1432 10ms 10ms mp.mallocing = 0 1433 60ms 60ms releasem(mp) mp.locks-- runtime1.go:638 ⋮ if mp.locks == 0 && gp.preempt { runtime1.go:639 ⋮ ⋮ 1434 . . 1435 90ms 90ms if checkGCTrigger { 1436 . 50ms if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { 1437 . . gcStart(t) 1438 . . } 1439 . . } 1440 20ms 20ms return x, size 1441 . . } 1442 . . 1443 . . func mallocgcSmallScanHeader(size uintptr, typ *_type) (unsafe.Pointer, uintptr) { 1444 . . // Set mp.mallocing to keep from being preempted by GC. 1445 . . mp := acquirem()
runtime.mallocgcSmallScanHeader
/usr/lib/go/src/runtime/malloc.go
Total: 70ms 280ms (flat, cum) 0.75% 1458 . . } 1459 . . } 1460 . . mp.mallocing = 1 1461 . . 1462 . . checkGCTrigger := false 1463 10ms 10ms c := getMCache(mp) 1464 . . size += gc.MallocHeaderSize 1465 . . var sizeclass uint8 1466 . . if size <= gc.SmallSizeMax-8 { 1467 . . sizeclass = gc.SizeToSizeClass8[divRoundUp(size, gc.SmallSizeDiv)] 1468 . . } else { 1469 . . sizeclass = gc.SizeToSizeClass128[divRoundUp(size-gc.SmallSizeMax, gc.LargeSizeDiv)] 1470 . . } 1471 . . size = uintptr(gc.SizeClassToSize[sizeclass]) 1472 . . spc := makeSpanClass(sizeclass, false) 1473 . . span := c.alloc[spc] 1474 50ms 50ms v := nextFreeFast(span) return gclinkptr(uintptr(result)*s.elemsize + s.base()) malloc.go:943 ⋮ theBit := sys.TrailingZeros64(s.allocCache) // Is there a free object in the allocCache? malloc.go:932 ⋮ ⋮ s.freeindex = freeidx malloc.go:941 1475 . . if v == 0 { 1476 . 120ms v, span, checkGCTrigger = c.nextFree(spc) 1477 . . } 1478 . . x := unsafe.Pointer(v) 1479 . . if span.needzero != 0 { 1480 . 90ms memclrNoHeapPointers(x, size) 1481 . . } 1482 . . header := (**_type)(x) 1483 . . x = add(x, gc.MallocHeaderSize) 1484 10ms 10ms c.scanAlloc += heapSetTypeSmallHeader(uintptr(x), size-gc.MallocHeaderSize, typ, header, span) *header = typ mbitmap.go:717 1485 . . 1486 . . // Ensure that the stores above that initialize x to 1487 . . // type-safe memory and set the heap bits occur before 1488 . . // the caller can make x observable to the garbage 1489 . . // collector. Otherwise, on weakly ordered machines,
runtime.newobject
/usr/lib/go/src/runtime/malloc.go
Total: 280ms 4.23s (flat, cum) 11.27% 1741 . . } 1742 . . 1743 . . // implementation of new builtin 1744 . . // compiler (both frontend and SSA backend) knows the signature 1745 . . // of this function. 1746 70ms 230ms func newobject(typ *_type) unsafe.Pointer { 1747 210ms 4s return mallocgc(typ.Size_, typ, true) 1748 . . } 1749 . . 1750 . . //go:linkname maps_newobject internal/runtime/maps.newobject 1751 . . func maps_newobject(typ *_type) unsafe.Pointer { 1752 . . return newobject(typ)
runtime.newarray
/usr/lib/go/src/runtime/malloc.go
Total: 20ms 660ms (flat, cum) 1.76% 1786 . . // See go.dev/issue/67401. 1787 . . // 1788 . . //go:linkname newarray 1789 . . func newarray(typ *_type, n int) unsafe.Pointer { 1790 . . if n == 1 { 1791 10ms 540ms return mallocgc(typ.Size_, typ, true) 1792 . . } 1793 . . mem, overflow := math.MulUintptr(typ.Size_, uintptr(n)) 1794 10ms 10ms if overflow || mem > maxAlloc || n < 0 { 1795 . . panic(plainError("runtime: allocation size out of range")) 1796 . . } 1797 . 110ms return mallocgc(mem, typ, true) 1798 . . } 1799 . . 1800 . . // reflect_unsafe_NewArray is meant for package reflect, 1801 . . // but widely used packages access it using linkname. 1802 . . // Notable members of the hall of shame include:
internal/runtime/maps.newarray
/usr/lib/go/src/runtime/malloc.go
Total: 0 660ms (flat, cum) 1.76% 1816 . . return newarray(typ, n) 1817 . . } 1818 . . 1819 . . //go:linkname maps_newarray internal/runtime/maps.newarray 1820 . . func maps_newarray(typ *_type, n int) unsafe.Pointer { 1821 . 660ms return newarray(typ, n) 1822 . . } 1823 . . 1824 . . // profilealloc resets the current mcache's nextSample counter and
runtime.profilealloc
/usr/lib/go/src/runtime/malloc.go
Total: 0 90ms (flat, cum) 0.24% 1825 . . // records a memory profile sample. 1826 . . // 1827 . . // The caller must be non-preemptible and have a P. 1828 . 10ms func profilealloc(mp *m, x unsafe.Pointer, size uintptr) { 1829 . . c := getMCache(mp) 1830 . . if c == nil { 1831 . . throw("profilealloc called without a P or outside bootstrapping") 1832 . . } 1833 . . c.memProfRate = MemProfileRate 1834 . . c.nextSample = nextSample() 1835 . 80ms mProf_Malloc(mp, x, size) 1836 . . } 1837 . . 1838 . . // nextSample returns the next sampling point for heap profiling. The goal is 1839 . . // to sample allocations on average every MemProfileRate bytes, but with a 1840 . . // completely random distribution over the allocation timeline; this
runtime.persistentalloc
/usr/lib/go/src/runtime/malloc.go
Total: 0 10ms (flat, cum) 0.027% 1916 . . // nosplit because it is used during write barriers and must not be preempted. 1917 . . // 1918 . . //go:nosplit 1919 . . func persistentalloc(size, align uintptr, sysStat *sysMemStat) unsafe.Pointer { 1920 . . var p *notInHeap 1921 . 10ms systemstack(func() {
runtime.persistentalloc.func1
/usr/lib/go/src/runtime/malloc.go
Total: 0 10ms (flat, cum) 0.027% 1922 . 10ms p = persistentalloc1(size, align, sysStat) 1923 . . }) 1924 . . return unsafe.Pointer(p) 1925 . . } 1926 . . 1927 . . // Must run on system stack because stack growth can (re)invoke it.
runtime.persistentalloc1
/usr/lib/go/src/runtime/malloc.go
Total: 10ms 10ms (flat, cum) 0.027% 1957 . . persistent = &mp.p.ptr().palloc 1958 . . } else { 1959 . . lock(&globalAlloc.mutex) 1960 . . persistent = &globalAlloc.persistentAlloc 1961 . . } 1962 10ms 10ms persistent.off = alignUp(persistent.off, align) return (n + a - 1) &^ (a - 1) stubs.go:366 1963 . . if persistent.off+size > persistentChunkSize || persistent.base == nil { 1964 . . persistent.base = (*notInHeap)(sysAlloc(persistentChunkSize, &memstats.other_sys, "immortal metadata")) 1965 . . if persistent.base == nil { 1966 . . if persistent == &globalAlloc.persistentAlloc { 1967 . . unlock(&globalAlloc.mutex)
runtime.gopark
/usr/lib/go/src/runtime/proc.go
Total: 30ms 30ms (flat, cum) 0.08% 448 . . gp := mp.curg 449 . . status := readgstatus(gp) 450 . . if status != _Grunning && status != _Gscanrunning { 451 . . throw("gopark: bad g status") 452 . . } 453 20ms 20ms mp.waitlock = lock 454 . . mp.waitunlockf = unlockf 455 . . gp.waitreason = reason 456 . . mp.waitTraceBlockReason = traceReason 457 . . mp.waitTraceSkip = traceskip 458 . . releasem(mp) 459 . . // can't do anything that might move the G between Ms here. 460 . . mcall(park_m) 461 10ms 10ms } 462 . . 463 . . // Puts the current goroutine into a waiting state and unlocks the lock.
runtime.goparkunlock
/usr/lib/go/src/runtime/proc.go
Total: 0 10ms (flat, cum) 0.027% 464 . . // The goroutine can be made runnable again by calling goready(gp). 465 . . func goparkunlock(lock *mutex, reason waitReason, traceReason traceBlockReason, traceskip int) { 466 . 10ms gopark(parkunlock_c, unsafe.Pointer(lock), reason, traceReason, traceskip) 467 . . } 468 . . 469 . . // goready should be an internal detail, 470 . . // but widely used packages access it using linkname. 471 . . // Notable members of the hall of shame include:
runtime.goready
/usr/lib/go/src/runtime/proc.go
Total: 0 380ms (flat, cum) 1.01% 475 . . // Do not remove or change the type signature. 476 . . // See go.dev/issue/67401. 477 . . // 478 . . //go:linkname goready 479 . . func goready(gp *g, traceskip int) { 480 . 380ms systemstack(func() {
runtime.recv.goready.func1
/usr/lib/go/src/runtime/proc.go
Total: 0 380ms (flat, cum) 1.01% 481 . 380ms ready(gp, traceskip, true) 482 . . }) 483 . . }
runtime.acquireSudog
/usr/lib/go/src/runtime/proc.go
Total: 20ms 20ms (flat, cum) 0.053% 484 . . 485 . . //go:nosplit 486 20ms 20ms func acquireSudog() *sudog { 487 . . // Delicate dance: the semaphore implementation calls 488 . . // acquireSudog, acquireSudog calls new(sudog), 489 . . // new calls malloc, malloc can call the garbage collector, 490 . . // and the garbage collector calls the semaphore implementation 491 . . // in stopTheWorld.
runtime.acquireSudog
/usr/lib/go/src/runtime/proc.go
Total: 50ms 50ms (flat, cum) 0.13% 507 . . // If the central cache is empty, allocate a new one. 508 . . if len(pp.sudogcache) == 0 { 509 . . pp.sudogcache = append(pp.sudogcache, new(sudog)) 510 . . } 511 . . } 512 10ms 10ms n := len(pp.sudogcache) 513 . . s := pp.sudogcache[n-1] 514 . . pp.sudogcache[n-1] = nil 515 . . pp.sudogcache = pp.sudogcache[:n-1] 516 40ms 40ms if s.elem != nil { 517 . . throw("acquireSudog: found s.elem != nil in cache") 518 . . } 519 . . releasem(mp) 520 . . return s 521 . . }
runtime.releaseSudog
/usr/lib/go/src/runtime/proc.go
Total: 20ms 20ms (flat, cum) 0.053% 564 . . lock(&sched.sudoglock) 565 . . last.next = sched.sudogcache 566 . . sched.sudogcache = first 567 . . unlock(&sched.sudoglock) 568 . . } 569 10ms 10ms pp.sudogcache = append(pp.sudogcache, s) 570 . . releasem(mp) 571 10ms 10ms } 572 . . 573 . . // called from assembly. 574 . . func badmcall(fn func(*g)) { 575 . . throw("runtime: mcall called on m->g0 stack") 576 . . }
runtime.(*m).becomeSpinning
/usr/lib/go/src/runtime/proc.go
Total: 20ms 20ms (flat, cum) 0.053% 1053 . . //go:linkname pprof_makeProfStack 1054 . . func pprof_makeProfStack() []uintptr { return makeProfStack() } 1055 . . 1056 . . func (mp *m) becomeSpinning() { 1057 . . mp.spinning = true 1058 20ms 20ms sched.nmspinning.Add(1) return Xaddint32(&i.value, delta) types.go:56 1059 . . sched.needspinning.Store(0) 1060 . . } 1061 . . 1062 . . // Take a snapshot of allp, for use after dropping the P. 1063 . . //
runtime.(*m).snapshotAllp
/usr/lib/go/src/runtime/proc.go
Total: 10ms 10ms (flat, cum) 0.027% 1065 . . // the P. The M holds a reference on the snapshot to keep the backing array 1066 . . // alive. 1067 . . // 1068 . . //go:yeswritebarrierrec 1069 . . func (mp *m) snapshotAllp() []*p { 1070 10ms 10ms mp.allpSnapshot = allp 1071 . . return mp.allpSnapshot 1072 . . } 1073 . . 1074 . . // Clear the saved allp snapshot. Should be called as soon as the snapshot is 1075 . . // no longer required.
runtime.(*m).clearAllpSnapshot
/usr/lib/go/src/runtime/proc.go
Total: 20ms 20ms (flat, cum) 0.053% 1076 . . // 1077 . . // Must be called after reacquiring a P, as it requires a write barrier. 1078 . . // 1079 . . //go:yeswritebarrierrec 1080 . . func (mp *m) clearAllpSnapshot() { 1081 10ms 10ms mp.allpSnapshot = nil 1082 10ms 10ms } 1083 . . 1084 . . func (mp *m) hasCgoOnStack() bool { 1085 . . return mp.ncgo > 0 || mp.isextra 1086 . . } 1087 . .
runtime.ready
/usr/lib/go/src/runtime/proc.go
Total: 0 380ms (flat, cum) 1.01% 1110 . . throw("bad g->status in ready") 1111 . . } 1112 . . 1113 . . // status is Gwaiting or Gscanwaiting, make Grunnable and put on runq 1114 . . trace := traceAcquire() 1115 . 20ms casgstatus(gp, _Gwaiting, _Grunnable) 1116 . . if trace.ok() { 1117 . . trace.GoUnpark(gp, traceskip) 1118 . . traceRelease(trace) 1119 . . } 1120 . 10ms runqput(mp.p.ptr(), gp, next) 1121 . 350ms wakep() 1122 . . releasem(mp) 1123 . . } 1124 . . 1125 . . // freezeStopWait is a large value that freezetheworld sets 1126 . . // sched.stopwait to in order to request that all Gs permanently stop.
runtime.readgstatus
/usr/lib/go/src/runtime/proc.go
Total: 10ms 10ms (flat, cum) 0.027% 1186 . . // All reads and writes of g's status go through readgstatus, casgstatus 1187 . . // castogscanstatus, casfrom_Gscanstatus. 1188 . . // 1189 . . //go:nosplit 1190 . . func readgstatus(gp *g) uint32 { 1191 10ms 10ms return gp.atomicstatus.Load() return Load(&u.value) types.go:194 1192 . . } 1193 . . 1194 . . // The Gscanstatuses are acting like locks and this releases them. 1195 . . // If it proves to be a performance hit we should be able to make these 1196 . . // simple atomic stores but for now we are going to throw if
runtime.casgstatus
/usr/lib/go/src/runtime/proc.go
Total: 350ms 350ms (flat, cum) 0.93% 1251 . . // and casfrom_Gscanstatus instead. 1252 . . // casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that 1253 . . // put it in the Gscan state is finished. 1254 . . // 1255 . . //go:nosplit 1256 20ms 20ms func casgstatus(gp *g, oldval, newval uint32) { 1257 10ms 10ms if (oldval&_Gscan != 0) || (newval&_Gscan != 0) || oldval == newval { 1258 . . systemstack(func() { 1259 . . // Call on the systemstack to prevent print and throw from counting 1260 . . // against the nosplit stack reservation. 1261 . . print("runtime: casgstatus: oldval=", hex(oldval), " newval=", hex(newval), "\n") 1262 . . throw("casgstatus: bad incoming values") 1263 . . }) 1264 . . } 1265 . . 1266 . . lockWithRankMayAcquire(nil, lockRankGscan) 1267 . . 1268 . . // See https://golang.org/cl/21503 for justification of the yield delay. 1269 . . const yieldDelay = 5 * 1000 1270 . . var nextYield int64 1271 . . 1272 . . // loop if gp->atomicstatus is in a scan state giving 1273 . . // GC time to finish and change the state to oldval. 1274 270ms 270ms for i := 0; !gp.atomicstatus.CompareAndSwap(oldval, newval); i++ { return Cas(&u.value, old, new) types.go:236 1275 . . if oldval == _Gwaiting && gp.atomicstatus.Load() == _Grunnable { 1276 . . systemstack(func() { 1277 . . // Call on the systemstack to prevent throw from counting 1278 . . // against the nosplit stack reservation. 1279 . . throw("casgstatus: waiting for Gwaiting but is Grunnable") 1280 . . }) 1281 . . } 1282 10ms 10ms if i == 0 { 1283 . . nextYield = nanotime() + yieldDelay 1284 . . } 1285 . . if nanotime() < nextYield { 1286 . . for x := 0; x < 10 && gp.atomicstatus.Load() != oldval; x++ { 1287 . . procyield(1) 1288 . . } 1289 . . } else { 1290 . . osyield() 1291 . . nextYield = nanotime() + yieldDelay/2 1292 . . } 1293 . . } 1294 . . 1295 10ms 10ms if gp.bubble != nil { 1296 . . systemstack(func() { 1297 . . gp.bubble.changegstatus(gp, oldval, newval) 1298 . . }) 1299 . . } 1300 . . 1301 10ms 10ms if oldval == _Grunning { 1302 . . // Track every gTrackingPeriod time a goroutine transitions out of running. 1303 . . if casgstatusAlwaysTrack || gp.trackingSeq%gTrackingPeriod == 0 { 1304 . . gp.tracking = true 1305 . . } 1306 . . gp.trackingSeq++ 1307 . . } 1308 10ms 10ms if !gp.tracking { 1309 . . return 1310 . . } 1311 . . 1312 . . // Handle various kinds of tracking. 1313 . . // 1314 . . // Currently: 1315 . . // - Time spent in runnable. 1316 . . // - Time spent blocked on a sync.Mutex or sync.RWMutex. 1317 . . switch oldval { 1318 . . case _Grunnable: 1319 . . // We transitioned out of runnable, so measure how much 1320 . . // time we spent in this state and add it to 1321 . . // runnableTime. 1322 10ms 10ms now := nanotime() return nanotime1() time_nofake.go:33 1323 . . gp.runnableTime += now - gp.trackingStamp 1324 . . gp.trackingStamp = 0 1325 . . case _Gwaiting: 1326 . . if !gp.waitreason.isMutexWait() { 1327 . . // Not blocking on a lock.
runtime.casgstatus
/usr/lib/go/src/runtime/proc.go
Total: 30ms 140ms (flat, cum) 0.37% 1346 . . now := nanotime() 1347 . . gp.trackingStamp = now 1348 . . case _Grunnable: 1349 . . // We just transitioned into runnable, so record what 1350 . . // time that happened. 1351 30ms 30ms now := nanotime() ⋮ return nanotime1() time_nofake.go:33 1352 . . gp.trackingStamp = now 1353 . . case _Grunning: 1354 . . // We're transitioning into running, so turn off 1355 . . // tracking and record how much time we spent in 1356 . . // runnable. 1357 . . gp.tracking = false 1358 . 110ms sched.timeToRun.record(gp.runnableTime) 1359 . . gp.runnableTime = 0 1360 . . } 1361 . . } 1362 . . 1363 . . // casGToWaiting transitions gp from old to _Gwaiting, and sets the wait reason.
runtime.mPark
/usr/lib/go/src/runtime/proc.go
Total: 0 1.04s (flat, cum) 2.77% 1955 . . // mPark causes a thread to park itself, returning once woken. 1956 . . // 1957 . . //go:nosplit 1958 . . func mPark() { 1959 . . gp := getg() 1960 . 1.04s notesleep(&gp.m.park) 1961 . . noteclear(&gp.m.park) 1962 . . } 1963 . . 1964 . . // mexit tears down and exits the current thread. 1965 . . //
runtime.stopm
/usr/lib/go/src/runtime/proc.go
Total: 50ms 1.23s (flat, cum) 3.28% 2978 . . } 2979 . . } 2980 . . 2981 . . // Stops execution of the current m until new work is available. 2982 . . // Returns with acquired P. 2983 10ms 10ms func stopm() { 2984 . . gp := getg() 2985 . . 2986 . . if gp.m.locks != 0 { 2987 . . throw("stopm holding locks") 2988 . . } 2989 . . if gp.m.p != 0 { 2990 . . throw("stopm holding p") 2991 . . } 2992 . . if gp.m.spinning { 2993 . . throw("stopm spinning") 2994 . . } 2995 . . 2996 . 20ms lock(&sched.lock) lockWithRank(l, getLockRank(l)) lock_spinbit.go:152 lock2(l) lockrank_off.go:24 2997 . 30ms mput(gp.m) 2998 . 30ms unlock(&sched.lock) unlockWithRank(l) lock_spinbit.go:261 unlock2(l) lockrank_off.go:35 2999 . 1.04s mPark() notesleep(&gp.m.park) proc.go:1960 3000 40ms 100ms acquirep(gp.m.nextp.ptr()) 3001 . . gp.m.nextp = 0 3002 . . } 3003 . . 3004 . . func mspinning() { 3005 . . // startm's caller incremented nmspinning. Set the new M's spinning.
runtime.startm
/usr/lib/go/src/runtime/proc.go
Total: 20ms 30ms (flat, cum) 0.08% 3040 . . // context, otherwise such preemption could occur on function entry to 3041 . . // startm. Callers passing a nil P may be preemptible, so we must 3042 . . // disable preemption before acquiring a P from pidleget below. 3043 . . mp := acquirem() 3044 . . if !lockheld { 3045 . 10ms lock(&sched.lock) lockWithRank(l, getLockRank(l)) lock_spinbit.go:152 lock2(l) lockrank_off.go:24 3046 . . } 3047 . . if pp == nil { 3048 . . if spinning { 3049 . . // TODO(prattmic): All remaining calls to this function 3050 . . // with _p_ == nil could be cleaned up to find a P 3051 . . // before calling startm. 3052 . . throw("startm: P required for spinning=true") 3053 . . } 3054 . . pp, _ = pidleget(0) 3055 . . if pp == nil { 3056 . . if !lockheld { 3057 . . unlock(&sched.lock) 3058 . . } 3059 . . releasem(mp) 3060 . . return 3061 . . } 3062 . . } 3063 20ms 20ms nmp := mget() sched.midle = mp.schedlink proc.go:6830 3064 . . if nmp == nil { 3065 . . // No M is available, we must drop sched.lock and call newm. 3066 . . // However, we already own a P to assign to the M. 3067 . . // 3068 . . // Once sched.lock is released, another G (e.g., in a syscall),
runtime.startm
/usr/lib/go/src/runtime/proc.go
Total: 10ms 2.29s (flat, cum) 6.10% 3095 . . return 3096 . . } 3097 . . if !lockheld { 3098 . . unlock(&sched.lock) 3099 . . } 3100 10ms 10ms if nmp.spinning { 3101 . . throw("startm: m is spinning") 3102 . . } 3103 . . if nmp.nextp != 0 { 3104 . . throw("startm: m has p") 3105 . . } 3106 . . if spinning && !runqempty(pp) { 3107 . . throw("startm: p has runnable gs") 3108 . . } 3109 . . // The caller incremented nmspinning, so set m.spinning in the new M. 3110 . . nmp.spinning = spinning 3111 . . nmp.nextp.set(pp) 3112 . 2.28s notewakeup(&nmp.park) 3113 . . // Ownership transfer of pp committed by wakeup. Preemption is now 3114 . . // safe. 3115 . . releasem(mp) 3116 . . } 3117 . .
runtime.wakep
/usr/lib/go/src/runtime/proc.go
Total: 90ms 2.54s (flat, cum) 6.77% 3201 . . // 3202 . . //go:linkname wakep 3203 . . func wakep() { 3204 . . // Be conservative about spinning threads, only start one if none exist 3205 . . // already. 3206 70ms 70ms if sched.nmspinning.Load() != 0 || !sched.nmspinning.CompareAndSwap(0, 1) { ⋮ return Loadint32(&i.value) types.go:21 ⋮ return Casint32(&i.value, old, new) types.go:37 3207 . . return 3208 . . } 3209 . . 3210 . . // Disable preemption until ownership of pp transfers to the next M in 3211 . . // startm. Otherwise preemption here would leave pp stuck waiting to 3212 . . // enter _Pgcstop. 3213 . . // 3214 . . // See preemption comment on acquirem in startm for more details. 3215 . . mp := acquirem() 3216 . . 3217 . . var pp *p 3218 . 20ms lock(&sched.lock) lockWithRank(l, getLockRank(l)) lock_spinbit.go:152 lock2(l) lockrank_off.go:24 3219 . 80ms pp, _ = pidlegetSpinning(0) 3220 . . if pp == nil { 3221 . . if sched.nmspinning.Add(-1) < 0 { 3222 . . throw("wakep: negative nmspinning") 3223 . . } 3224 . . unlock(&sched.lock) 3225 . . releasem(mp) 3226 . . return 3227 . . } 3228 . . // Since we always have a P, the race in the "No M is available" 3229 . . // comment in startm doesn't apply during the small window between the 3230 . . // unlock here and lock in startm. A checkdead in between will always 3231 . . // see at least one running M (ours). 3232 . 30ms unlock(&sched.lock) unlockWithRank(l) lock_spinbit.go:261 unlock2(l) lockrank_off.go:35 3233 . . 3234 . 2.32s startm(pp, true, false) 3235 . . 3236 10ms 10ms releasem(mp) mp.locks-- runtime1.go:638 3237 10ms 10ms } 3238 . . 3239 . . // Stops execution of the current m that is locked to a g until the g is runnable again. 3240 . . // Returns with acquired P. 3241 . . func stoplockedm() { 3242 . . gp := getg()
runtime.execute
/usr/lib/go/src/runtime/proc.go
Total: 60ms 230ms (flat, cum) 0.61% 3331 . . 3332 . . // Assign gp.m before entering _Grunning so running Gs have an M. 3333 . . mp.curg = gp 3334 . . gp.m = mp 3335 . . gp.syncSafePoint = false // Clear the flag, which may have been set by morestack. 3336 . 150ms casgstatus(gp, _Grunnable, _Grunning) 3337 . . gp.waitsince = 0 3338 . . gp.preempt = false 3339 10ms 10ms gp.stackguard0 = gp.stack.lo + stackGuard 3340 . . if !inheritTime { 3341 10ms 10ms mp.p.ptr().schedtick++ 3342 . . } 3343 . . 3344 . . // Check whether the profiler needs to be turned on or off. 3345 . . hz := sched.profilehz 3346 . . if mp.profilehz != hz { 3347 . . setThreadCPUProfiler(hz) 3348 . . } 3349 . . 3350 20ms 20ms trace := traceAcquire() 3351 . . if trace.ok() { 3352 . . trace.GoStart() 3353 . . traceRelease(trace) 3354 . . } 3355 . . 3356 20ms 40ms gogo(&gp.sched) 3357 . . } 3358 . . 3359 . . // Finds a runnable goroutine to execute.
runtime.findRunnable
/usr/lib/go/src/runtime/proc.go
Total: 70ms 140ms (flat, cum) 0.37% 3360 . . // Tries to steal from other P's, get g from local or global queue, poll network. 3361 . . // tryWakeP indicates that the returned goroutine is not normal (GC worker, trace 3362 . . // reader) so the caller should try to wake a P. 3363 10ms 10ms func findRunnable() (gp *g, inheritTime, tryWakeP bool) { 3364 . . mp := getg().m 3365 . . 3366 . . // The conditions here and in handoffp must agree: if 3367 . . // findrunnable would return a G to run, handoffp must start 3368 . . // an M. 3369 . . 3370 . . top: 3371 . . // We may have collected an allp snapshot below. The snapshot is only 3372 . . // required in each loop iteration. Clear it to all GC to collect the 3373 . . // slice. 3374 20ms 40ms mp.clearAllpSnapshot() 3375 . . 3376 . . pp := mp.p.ptr() 3377 . . if sched.gcwaiting.Load() { 3378 . . gcstopm() 3379 . . goto top 3380 . . } 3381 10ms 10ms if pp.runSafePointFn != 0 { 3382 . . runSafePointFn() 3383 . . } 3384 . . 3385 . . // now and pollUntil are saved for work stealing later, 3386 . . // which may steal timers. It's important that between now 3387 . . // and then, nothing blocks, so these numbers remain mostly 3388 . . // relevant. 3389 . 50ms now, pollUntil, _ := pp.timers.check(0, nil) 3390 . . 3391 . . // Try to schedule the trace reader. 3392 10ms 10ms if traceEnabled() || traceShuttingDown() { 3393 . . gp := traceReader() 3394 . . if gp != nil { 3395 . . trace := traceAcquire() 3396 . . casgstatus(gp, _Gwaiting, _Grunnable) 3397 . . if trace.ok() { 3398 . . trace.GoUnpark(gp, 0) 3399 . . traceRelease(trace) 3400 . . } 3401 . . return gp, false, true 3402 . . } 3403 . . } 3404 . . 3405 . . // Try to schedule a GC worker. 3406 20ms 20ms if gcBlackenEnabled != 0 { 3407 . . gp, tnow := gcController.findRunnableGCWorker(pp, now) 3408 . . if gp != nil { 3409 . . return gp, false, true 3410 . . } 3411 . . now = tnow
runtime.findRunnable
/usr/lib/go/src/runtime/proc.go
Total: 120ms 140ms (flat, cum) 0.37% 3422 . . return gp, false, false 3423 . . } 3424 . . } 3425 . . 3426 . . // Wake up the finalizer G. 3427 10ms 10ms if fingStatus.Load()&(fingWait|fingWake) == fingWait|fingWake { 3428 . . if gp := wakefing(); gp != nil { 3429 . . ready(gp, 0, true) 3430 . . } 3431 . . } 3432 . . 3433 . . // Wake up one or more cleanup Gs. 3434 . . if gcCleanups.needsWake() { 3435 . . gcCleanups.wake() 3436 . . } 3437 . . 3438 10ms 10ms if *cgo_yield != nil { 3439 . . asmcgocall(*cgo_yield, nil) 3440 . . } 3441 . . 3442 . . // local runq 3443 70ms 70ms if gp, inheritTime := runqget(pp); gp != nil { if next != 0 && pp.runnext.cas(next, 0) { proc.go:7170 return atomic.Casuintptr((*uintptr)(unsafe.Pointer(gp)), uintptr(old), uintptr(new)) runtime2.go:246 ⋮ t := pp.runqtail proc.go:7176 ⋮ if atomic.CasRel(&pp.runqhead, h, h+1) { // cas-release, commits consume proc.go:7181 ⋮ 3444 . . return gp, inheritTime, false 3445 . . } 3446 . . 3447 . . // global runq 3448 20ms 20ms if !sched.runq.empty() { 3449 . 10ms lock(&sched.lock) lockWithRank(l, getLockRank(l)) lock_spinbit.go:152 lock2(l) lockrank_off.go:24 3450 . . gp, q := globrunqgetbatch(int32(len(pp.runq)) / 2) 3451 . 10ms unlock(&sched.lock) unlockWithRank(l) lock_spinbit.go:261 unlock2(l) lockrank_off.go:35 3452 . . if gp != nil { 3453 . . if runqputbatch(pp, &q); !q.empty() { 3454 . . throw("Couldn't put Gs into empty local runq") 3455 . . } 3456 . . return gp, false, false 3457 . . } 3458 . . } 3459 . . 3460 . . // Poll network. 3461 . . // This netpoll is only an optimization before we resort to stealing. 3462 . . // We can safely skip it if there are no waiters or a thread is blocked 3463 . . // in netpoll already. If there is any kind of logical race with that 3464 . . // blocked thread (e.g. it has already returned from netpoll, but does 3465 . . // not set lastpoll yet), this thread will do blocking netpoll below 3466 . . // anyway. 3467 . . // We only poll from one thread at a time to avoid kernel contention 3468 . . // on machines with many cores. 3469 10ms 10ms if netpollinited() && netpollAnyWaiters() && sched.lastpoll.Load() != 0 && sched.pollingNet.Swap(1) == 0 { return netpollWaiters.Load() > 0 netpoll.go:678 3470 . . list, delta := netpoll(0) 3471 . . sched.pollingNet.Store(0) 3472 . . if !list.empty() { // non-blocking 3473 . . gp := list.pop() 3474 . . injectglist(&list)
runtime.findRunnable
/usr/lib/go/src/runtime/proc.go
Total: 20ms 940ms (flat, cum) 2.50% 3488 . . // Limit the number of spinning Ms to half the number of busy Ps. 3489 . . // This is necessary to prevent excessive CPU consumption when 3490 . . // GOMAXPROCS>>1 but the program parallelism is low. 3491 . . if mp.spinning || 2*sched.nmspinning.Load() < gomaxprocs-sched.npidle.Load() { 3492 . . if !mp.spinning { 3493 20ms 20ms mp.becomeSpinning() sched.nmspinning.Add(1) proc.go:1058 return Xaddint32(&i.value, delta) types.go:56 3494 . . } 3495 . . 3496 . 920ms gp, inheritTime, tnow, w, newWork := stealWork(now) 3497 . . if gp != nil { 3498 . . // Successfully stole. 3499 . . return gp, inheritTime, false 3500 . . } 3501 . . if newWork {
runtime.findRunnable
/usr/lib/go/src/runtime/proc.go
Total: 10ms 80ms (flat, cum) 0.21% 3556 . . // everything up to cap(allp) is immutable. 3557 . . // 3558 . . // We clear the snapshot from the M after return via 3559 . . // mp.clearAllpSnapshop (in schedule) and on each iteration of the top 3560 . . // loop. 3561 . 10ms allpSnapshot := mp.snapshotAllp() 3562 . . // Also snapshot masks. Value changes are OK, but we can't allow 3563 . . // len to change out from under us. 3564 . . idlepMaskSnapshot := idlepMask 3565 . . timerpMaskSnapshot := timerpMask 3566 . . 3567 . . // return P and block 3568 . 60ms lock(&sched.lock) lockWithRank(l, getLockRank(l)) lock_spinbit.go:152 lock2(l) lockrank_off.go:24 3569 10ms 10ms if sched.gcwaiting.Load() || pp.runSafePointFn != 0 { return b.u.Load() != 0 types.go:168 3570 . . unlock(&sched.lock) 3571 . . goto top 3572 . . } 3573 . . if !sched.runq.empty() { 3574 . . gp, q := globrunqgetbatch(int32(len(pp.runq)) / 2)
runtime.findRunnable
/usr/lib/go/src/runtime/proc.go
Total: 0 140ms (flat, cum) 0.37% 3585 . . // See "Delicate dance" comment below. 3586 . . mp.becomeSpinning() 3587 . . unlock(&sched.lock) 3588 . . goto top 3589 . . } 3590 . 20ms if releasep() != pp { 3591 . . throw("findrunnable: wrong p") 3592 . . } 3593 . 100ms now = pidleput(pp, now) 3594 . 20ms unlock(&sched.lock) unlockWithRank(l) lock_spinbit.go:261 unlock2(l) lockrank_off.go:35 3595 . . 3596 . . // Delicate dance: thread transitions from spinning to non-spinning 3597 . . // state, potentially concurrently with submission of new work. We must 3598 . . // drop nmspinning first and then check all sources again (with 3599 . . // #StoreLoad memory barrier in between). If we do it the other way
runtime.findRunnable
/usr/lib/go/src/runtime/proc.go
Total: 30ms 140ms (flat, cum) 0.37% 3630 . . // Also see "Worker thread parking/unparking" comment at the top of the 3631 . . // file. 3632 . . wasSpinning := mp.spinning 3633 . . if mp.spinning { 3634 . . mp.spinning = false 3635 20ms 20ms if sched.nmspinning.Add(-1) < 0 { return Xaddint32(&i.value, delta) types.go:56 3636 . . throw("findrunnable: negative nmspinning") 3637 . . } 3638 . . 3639 . . // Note the for correctness, only the last M transitioning from 3640 . . // spinning to non-spinning must perform these rechecks to 3641 . . // ensure no missed work. However, the runtime has some cases 3642 . . // of transient increments of nmspinning that are decremented 3643 . . // without going through this path, so we must be conservative 3644 . . // and perform the check on all spinning Ms. 3645 . . // 3646 . . // See https://go.dev/issue/43997. 3647 . . 3648 . . // Check global and P runqueues again. 3649 . . 3650 . . lock(&sched.lock) 3651 10ms 10ms if !sched.runq.empty() { 3652 . . pp, _ := pidlegetSpinning(0) 3653 . . if pp != nil { 3654 . . gp, q := globrunqgetbatch(int32(len(pp.runq)) / 2) 3655 . . unlock(&sched.lock) 3656 . . if gp == nil { 3657 . . throw("global runq empty with non-zero runqsize") 3658 . . } 3659 . . if runqputbatch(pp, &q); !q.empty() { 3660 . . throw("Couldn't put Gs into empty local runq") 3661 . . } 3662 . . acquirep(pp) 3663 . . mp.becomeSpinning() 3664 . . return gp, false, false 3665 . . } 3666 . . } 3667 . 50ms unlock(&sched.lock) unlockWithRank(l) lock_spinbit.go:261 unlock2(l) lockrank_off.go:35 3668 . . 3669 . 20ms pp := checkRunqsNoP(allpSnapshot, idlepMaskSnapshot) 3670 . . if pp != nil { 3671 . . acquirep(pp) 3672 . . mp.becomeSpinning() 3673 . . goto top 3674 . . } 3675 . . 3676 . . // Check for idle-priority GC work again. 3677 . 40ms pp, gp := checkIdleGCNoP() 3678 . . if pp != nil { 3679 . . acquirep(pp) 3680 . . mp.becomeSpinning() 3681 . . 3682 . . // Run the idle worker.
runtime.findRunnable
/usr/lib/go/src/runtime/proc.go
Total: 10ms 30ms (flat, cum) 0.08% 3694 . . // transitioning from spinning to non-spinning. 3695 . . // 3696 . . // Note that we cannot use checkTimers here because it calls 3697 . . // adjusttimers which may need to allocate memory, and that isn't 3698 . . // allowed when we don't have an active P. 3699 . 20ms pollUntil = checkTimersNoP(allpSnapshot, timerpMaskSnapshot, pollUntil) 3700 . . } 3701 . . 3702 . . // We don't need allp anymore at this pointer, but can't clear the 3703 . . // snapshot without a P for the write barrier.. 3704 . . 3705 . . // Poll network until next timer. 3706 10ms 10ms if netpollinited() && (netpollAnyWaiters() || pollUntil != 0) && sched.lastpoll.Swap(0) != 0 { 3707 . . sched.pollUntil.Store(pollUntil) 3708 . . if mp.p != 0 { 3709 . . throw("findrunnable: netpoll with p") 3710 . . } 3711 . . if mp.spinning {
runtime.findRunnable
/usr/lib/go/src/runtime/proc.go
Total: 0 10ms (flat, cum) 0.027% 3723 . . } 3724 . . if faketime != 0 { 3725 . . // When using fake time, just poll. 3726 . . delay = 0 3727 . . } 3728 . 10ms list, delta := netpoll(delay) // block until new work is available 3729 . . // Refresh now again, after potentially blocking. 3730 . . now = nanotime() 3731 . . sched.pollUntil.Store(0) 3732 . . sched.lastpoll.Store(now) 3733 . . if faketime != 0 && list.empty() {
runtime.findRunnable
/usr/lib/go/src/runtime/proc.go
Total: 0 1.23s (flat, cum) 3.28% 3765 . . pollerPollUntil := sched.pollUntil.Load() 3766 . . if pollerPollUntil == 0 || pollerPollUntil > pollUntil { 3767 . . netpollBreak() 3768 . . } 3769 . . } 3770 . 1.23s stopm() 3771 . . goto top 3772 . . } 3773 . . 3774 . . // pollWork reports whether there is non-background work this P could 3775 . . // be doing. This is a fairly lightweight check to be used for
runtime.stealWork
/usr/lib/go/src/runtime/proc.go
Total: 290ms 360ms (flat, cum) 0.96% 3804 . . 3805 . . ranTimer := false 3806 . . 3807 . . const stealTries = 4 3808 . . for i := 0; i < stealTries; i++ { 3809 90ms 90ms stealTimersOrRunNextG := i == stealTries-1 3810 . . 3811 80ms 80ms for enum := stealOrder.start(cheaprand()); !enum.done(); enum.next() { mp := getg().m rand.go:228 ⋮ mp.cheaprand += 0xa0761d6478bd642f rand.go:235 ⋮ ⋮ enum.pos = (enum.pos + enum.inc) % enum.count proc.go:7588 ⋮ return enum.i == enum.count proc.go:7583 ⋮ ⋮ 3812 . . if sched.gcwaiting.Load() { 3813 . . // GC work may be available. 3814 . . return nil, false, now, pollUntil, true 3815 . . } 3816 60ms 60ms p2 := allp[enum.position()] return enum.pos proc.go:7592 ⋮ ⋮ 3817 20ms 20ms if pp == p2 { 3818 10ms 10ms continue 3819 . . } 3820 . . 3821 . . // Steal timers from p2. This call to checkTimers is the only place 3822 . . // where we might hold a lock on a different P's timers. We do this 3823 . . // once on the last pass before checking runnext because stealing 3824 . . // from the other P's runnext should be the last resort, so if there 3825 . . // are timers to steal do that first. 3826 . . // 3827 . . // We only check timers on one of the stealing iterations because 3828 . . // the time stored in now doesn't change in this loop and checking 3829 . . // the timers for each P more than once with the same value of now 3830 . . // is probably a waste of time. 3831 . . // 3832 . . // timerpMask tells us whether the P may have timers at all. If it 3833 . . // can't, no need to check at all. 3834 10ms 10ms if stealTimersOrRunNextG && timerpMask.read(enum.position()) { 3835 20ms 90ms tnow, w, ran := p2.timers.check(now, nil) 3836 . . now = tnow 3837 . . if w != 0 && (pollUntil == 0 || w < pollUntil) { 3838 . . pollUntil = w 3839 . . } 3840 . . if ran {
runtime.stealWork
/usr/lib/go/src/runtime/proc.go
Total: 60ms 560ms (flat, cum) 1.49% 3852 . . ranTimer = true 3853 . . } 3854 . . } 3855 . . 3856 . . // Don't bother to attempt to steal if p2 is idle. 3857 60ms 60ms if !idlepMask.read(enum.position()) { return (atomic.Load(&p[word]) & mask) != 0 proc.go:6911 ⋮ return enum.pos proc.go:7592 3858 . 500ms if gp := runqsteal(pp, p2, stealTimersOrRunNextG); gp != nil { 3859 . . return gp, false, now, pollUntil, ranTimer 3860 . . } 3861 . . } 3862 . . } 3863 . . }
runtime.checkRunqsNoP
/usr/lib/go/src/runtime/proc.go
Total: 20ms 20ms (flat, cum) 0.053% 3873 . . // On entry we have no P. If a G is available to steal and a P is available, 3874 . . // the P is returned which the caller should acquire and attempt to steal the 3875 . . // work to. 3876 . . func checkRunqsNoP(allpSnapshot []*p, idlepMaskSnapshot pMask) *p { 3877 . . for id, p2 := range allpSnapshot { 3878 20ms 20ms if !idlepMaskSnapshot.read(uint32(id)) && !runqempty(p2) { head := atomic.Load(&pp.runqhead) proc.go:7019 3879 . . lock(&sched.lock) 3880 . . pp, _ := pidlegetSpinning(0) 3881 . . if pp == nil { 3882 . . // Can't get a P, don't bother checking remaining Ps. 3883 . . unlock(&sched.lock)
runtime.checkTimersNoP
/usr/lib/go/src/runtime/proc.go
Total: 20ms 20ms (flat, cum) 0.053% 3895 . . // Check all Ps for a timer expiring sooner than pollUntil. 3896 . . // 3897 . . // Returns updated pollUntil value. 3898 . . func checkTimersNoP(allpSnapshot []*p, timerpMaskSnapshot pMask, pollUntil int64) int64 { 3899 . . for id, p2 := range allpSnapshot { 3900 10ms 10ms if timerpMaskSnapshot.read(uint32(id)) { mask := uint32(1) << (id % 32) proc.go:6910 3901 10ms 10ms w := p2.timers.wakeTime() nextWhen := ts.minWhenModified.Load() time.go:988 return Loadint64(&i.value) types.go:74 3902 . . if w != 0 && (pollUntil == 0 || w < pollUntil) { 3903 . . pollUntil = w 3904 . . } 3905 . . } 3906 . . }
runtime.checkIdleGCNoP
/usr/lib/go/src/runtime/proc.go
Total: 40ms 40ms (flat, cum) 0.11% 3910 . . 3911 . . // Check for idle-priority GC, without a P on entry. 3912 . . // 3913 . . // If some GC work, a P, and a worker G are all available, the P and G will be 3914 . . // returned. The returned P has not been wired yet. 3915 30ms 30ms func checkIdleGCNoP() (*p, *g) { 3916 . . // N.B. Since we have no P, gcBlackenEnabled may change at any time; we 3917 . . // must check again after acquiring a P. As an optimization, we also check 3918 . . // if an idle mark worker is needed at all. This is OK here, because if we 3919 . . // observe that one isn't needed, at least one is currently running. Even if 3920 . . // it stops running, its own journey into the scheduler should schedule it 3921 . . // again, if need be (at which point, this check will pass, if relevant). 3922 10ms 10ms if atomic.Load(&gcBlackenEnabled) == 0 || !gcController.needIdleMarkWorker() { 3923 . . return nil, nil 3924 . . } 3925 . . if !gcMarkWorkAvailable(nil) { 3926 . . return nil, nil 3927 . . }
runtime.resetspinning
/usr/lib/go/src/runtime/proc.go
Total: 0 1.78s (flat, cum) 4.74% 4003 . . throw("findrunnable: negative nmspinning") 4004 . . } 4005 . . // M wakeup policy is deliberately somewhat conservative, so check if we 4006 . . // need to wakeup another P here. See "Worker thread parking/unparking" 4007 . . // comment at the top of the file for details. 4008 . 1.78s wakep() 4009 . . } 4010 . . 4011 . . // injectglist adds each runnable G on the list to some run queue, 4012 . . // and clears glist. If there is no current P, they are added to the 4013 . . // global queue, and up to npidle M's are started to run them.
runtime.schedule
/usr/lib/go/src/runtime/proc.go
Total: 40ms 2.89s (flat, cum) 7.70% 4104 . . wakep() 4105 . . } 4106 . . 4107 . . // One round of scheduler: find a runnable goroutine and execute it. 4108 . . // Never returns. 4109 10ms 10ms func schedule() { 4110 . . mp := getg().m 4111 . . 4112 . . if mp.locks != 0 { 4113 . . throw("schedule: holding locks") 4114 . . } 4115 . . 4116 . . if mp.lockedg != 0 { 4117 . . stoplockedm() 4118 . . execute(mp.lockedg.ptr(), false) // Never returns. 4119 . . } 4120 . . 4121 . . // We should not schedule away from a g that is executing a cgo call, 4122 . . // since the cgo call is using the m's g0 stack. 4123 20ms 20ms if mp.incgo { 4124 . . throw("schedule: in cgo") 4125 . . } 4126 . . 4127 . . top: 4128 . . pp := mp.p.ptr() 4129 10ms 10ms pp.preempt = false 4130 . . 4131 . . // Safety check: if we are spinning, the run queue should be empty. 4132 . . // Check this before calling checkTimers, as that might call 4133 . . // goready to put a ready goroutine on the local run queue. 4134 . . if mp.spinning && (pp.runnext != 0 || pp.runqhead != pp.runqtail) { 4135 . . throw("schedule: spinning with local work") 4136 . . } 4137 . . 4138 . 2.85s gp, inheritTime, tryWakeP := findRunnable() // blocks until work is available 4139 . . 4140 . . // findRunnable may have collected an allp snapshot. The snapshot is 4141 . . // only required within findRunnable. Clear it to all GC to collect the 4142 . . // slice. 4143 . . mp.clearAllpSnapshot()
runtime.schedule
/usr/lib/go/src/runtime/proc.go
Total: 20ms 1.80s (flat, cum) 4.79% 4156 . . 4157 . . // This thread is going to run a goroutine and is not spinning anymore, 4158 . . // so if it was marked as spinning we need to reset it now and potentially 4159 . . // start a new spinning M. 4160 . . if mp.spinning { 4161 . 1.78s resetspinning() 4162 . . } 4163 . . 4164 20ms 20ms if sched.disable.user && !schedEnabled(gp) { 4165 . . // Scheduling of this goroutine is disabled. Put it on 4166 . . // the list of pending runnable goroutines for when we 4167 . . // re-enable user scheduling and look again. 4168 . . lock(&sched.lock) 4169 . . if schedEnabled(gp) {
runtime.schedule
/usr/lib/go/src/runtime/proc.go
Total: 80ms 310ms (flat, cum) 0.83% 4180 . . // If about to schedule a not-normal goroutine (a GCworker or tracereader), 4181 . . // wake a P if there is one. 4182 . . if tryWakeP { 4183 . . wakep() 4184 . . } 4185 80ms 80ms if gp.lockedm != 0 { 4186 . . // Hands off own p to the locked m, 4187 . . // then blocks waiting for a new p. 4188 . . startlockedm(gp) 4189 . . goto top 4190 . . } 4191 . . 4192 . 230ms execute(gp, inheritTime) 4193 . . } 4194 . . 4195 . . // dropg removes the association between m and the current goroutine m->curg (gp for short). 4196 . . // Typically a caller sets gp's status away from Grunning and then 4197 . . // immediately calls dropg to finish the job. The caller is also responsible
runtime.park_m
/usr/lib/go/src/runtime/proc.go
Total: 20ms 20ms (flat, cum) 0.053% 4210 . . unlock((*mutex)(lock)) 4211 . . return true 4212 . . } 4213 . . 4214 . . // park continuation on g0. 4215 20ms 20ms func park_m(gp *g) { 4216 . . mp := getg().m 4217 . . 4218 . . trace := traceAcquire() 4219 . . 4220 . . // If g is in a synctest group, we don't want to let the group
runtime.park_m
/usr/lib/go/src/runtime/proc.go
Total: 20ms 1.09s (flat, cum) 2.90% 4239 . . traceRelease(trace) 4240 . . } 4241 . . 4242 . . dropg() 4243 . . 4244 10ms 10ms if fn := mp.waitunlockf; fn != nil { 4245 . 70ms ok := fn(gp, mp.waitlock) 4246 10ms 10ms mp.waitunlockf = nil 4247 . . mp.waitlock = nil 4248 . . if !ok { 4249 . . trace := traceAcquire() 4250 . . casgstatus(gp, _Gwaiting, _Grunnable) 4251 . . if bubble != nil { 4252 . . bubble.decActive() 4253 . . } 4254 . . if trace.ok() { 4255 . . trace.GoUnpark(gp, 2) 4256 . . traceRelease(trace) 4257 . . } 4258 . . execute(gp, true) // Schedule it back, never returns. 4259 . . } 4260 . . } 4261 . . 4262 . . if bubble != nil { 4263 . . bubble.decActive() 4264 . . } 4265 . . 4266 . 1s schedule() 4267 . . } 4268 . . 4269 . . func goschedImpl(gp *g, preempted bool) { 4270 . . trace := traceAcquire() 4271 . . status := readgstatus(gp)
runtime.goschedImpl
/usr/lib/go/src/runtime/proc.go
Total: 10ms 260ms (flat, cum) 0.69% 4287 . . if trace.ok() { 4288 . . traceRelease(trace) 4289 . . } 4290 . . 4291 . . dropg() 4292 . 10ms lock(&sched.lock) lockWithRank(l, getLockRank(l)) lock_spinbit.go:152 lock2(l) lockrank_off.go:24 4293 . . globrunqput(gp) 4294 . . unlock(&sched.lock) 4295 . . 4296 10ms 10ms if mainStarted { 4297 . 130ms wakep() 4298 . . } 4299 . . 4300 . 110ms schedule() 4301 . . } 4302 . .
runtime.gosched_m
/usr/lib/go/src/runtime/proc.go
Total: 0 260ms (flat, cum) 0.69% 4303 . . // Gosched continuation on g0. 4304 . . func gosched_m(gp *g) { 4305 . 260ms goschedImpl(gp, false) 4306 . . } 4307 . . 4308 . . // goschedguarded is a forbidden-states-avoided version of gosched_m. 4309 . . func goschedguarded_m(gp *g) { 4310 . . if !canPreemptM(gp.m) {
runtime.goexit0
/usr/lib/go/src/runtime/proc.go
Total: 10ms 4.21s (flat, cum) 11.21% 4428 . . } 4429 . . mcall(goexit0) 4430 . . } 4431 . . 4432 . . // goexit continuation on g0. 4433 10ms 10ms func goexit0(gp *g) { 4434 . 310ms gdestroy(gp) 4435 . 3.89s schedule() 4436 . . } 4437 . . 4438 . . func gdestroy(gp *g) {
runtime.gdestroy
/usr/lib/go/src/runtime/proc.go
Total: 20ms 220ms (flat, cum) 0.59% 4439 . . mp := getg().m 4440 . . pp := mp.p.ptr() 4441 . . 4442 . 20ms casgstatus(gp, _Grunning, _Gdead) 4443 20ms 20ms gcController.addScannableStack(pp, -int64(gp.stack.hi-gp.stack.lo)) c.maxStackScan.Add(pp.maxStackScanDelta) mgcpacer.go:924 return Xadd64(&u.value, delta) types.go:344 4444 . 180ms if isSystemGoroutine(gp, false) { 4445 . . sched.ngsys.Add(-1) 4446 . . } 4447 . . gp.m = nil 4448 . . locked := gp.lockedm != 0 4449 . . gp.lockedm = 0
runtime.gdestroy
/usr/lib/go/src/runtime/proc.go
Total: 0 90ms (flat, cum) 0.24% 4481 . . if mp.isextra { 4482 . . throw("runtime.Goexit called in a thread that was not created by the Go runtime") 4483 . . } 4484 . . throw("exited a goroutine internally locked to the OS thread") 4485 . . } 4486 . 90ms gfput(pp, gp) 4487 . . if locked { 4488 . . // The goroutine may have locked this thread because 4489 . . // it put it in an unusual kernel state. Kill it 4490 . . // rather than returning it to the thread pool. 4491 . .
runtime.save
/usr/lib/go/src/runtime/proc.go
Total: 30ms 30ms (flat, cum) 0.08% 4507 . . // save must not have write barriers because invoking a write barrier 4508 . . // can clobber getg().sched. 4509 . . // 4510 . . //go:nosplit 4511 . . //go:nowritebarrierrec 4512 10ms 10ms func save(pc, sp, bp uintptr) { 4513 . . gp := getg() 4514 . . 4515 10ms 10ms if gp == gp.m.g0 || gp == gp.m.gsignal { 4516 . . // m.g0.sched is special and must describe the context 4517 . . // for exiting the thread. mstart1 writes to it directly. 4518 . . // m.gsignal.sched should not be used at all. 4519 . . // This check makes sure save calls do not accidentally 4520 . . // run in contexts where they'd write to system g's. 4521 . . throw("save on system g not allowed") 4522 . . } 4523 . . 4524 10ms 10ms gp.sched.pc = pc 4525 . . gp.sched.sp = sp 4526 . . gp.sched.lr = 0 4527 . . gp.sched.bp = bp 4528 . . // We need to ensure ctxt is zero, but can't have a write 4529 . . // barrier here. However, it should always already be zero.
runtime.reentersyscall
/usr/lib/go/src/runtime/proc.go
Total: 150ms 270ms (flat, cum) 0.72% 4556 . . // must always point to a valid stack frame. entersyscall below is the normal 4557 . . // entry point for syscalls, which obtains the SP and PC from the caller. 4558 . . // 4559 . . //go:nosplit 4560 . . func reentersyscall(pc, sp, bp uintptr) { 4561 20ms 20ms trace := traceAcquire() if !traceEnabled() { traceruntime.go:188 ⋮ 4562 . . gp := getg() 4563 . . 4564 . . // Disable preemption because during this function g is in Gsyscall status, 4565 . . // but can have inconsistent g->sched, do not let GC observe it. 4566 10ms 10ms gp.m.locks++ 4567 . . 4568 . . // Entersyscall must not call any function that might split/grow the stack. 4569 . . // (See details in comment above.) 4570 . . // Catch calls that might, by replacing the stack guard with something that 4571 . . // will trip any stack check and leaving a flag to tell newstack to die. 4572 . . gp.stackguard0 = stackPreempt 4573 . . gp.throwsplit = true 4574 . . 4575 . . // Leave SP around for GC and traceback. 4576 . 30ms save(pc, sp, bp) 4577 . . gp.syscallsp = sp 4578 20ms 20ms gp.syscallpc = pc 4579 . . gp.syscallbp = bp 4580 . 90ms casgstatus(gp, _Grunning, _Gsyscall) 4581 . . if staticLockRanking { 4582 . . // When doing static lock ranking casgstatus can call 4583 . . // systemstack which clobbers g.sched. 4584 . . save(pc, sp, bp) 4585 . . } 4586 20ms 20ms if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp { 4587 . . systemstack(func() { 4588 . . print("entersyscall inconsistent sp ", hex(gp.syscallsp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4589 . . throw("entersyscall") 4590 . . }) 4591 . . } 4592 . . if gp.syscallbp != 0 && gp.syscallbp < gp.stack.lo || gp.stack.hi < gp.syscallbp { 4593 . . systemstack(func() { 4594 . . print("entersyscall inconsistent bp ", hex(gp.syscallbp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4595 . . throw("entersyscall") 4596 . . }) 4597 . . } 4598 . . 4599 10ms 10ms if trace.ok() { 4600 . . systemstack(func() { 4601 . . trace.GoSysCall() 4602 . . traceRelease(trace) 4603 . . }) 4604 . . // systemstack itself clobbers g.sched.{pc,sp} and we might 4605 . . // need them later when the G is genuinely blocked in a 4606 . . // syscall 4607 . . save(pc, sp, bp) 4608 . . } 4609 . . 4610 10ms 10ms if sched.sysmonwait.Load() { return b.u.Load() != 0 types.go:168 4611 . . systemstack(entersyscall_sysmon) 4612 . . save(pc, sp, bp) 4613 . . } 4614 . . 4615 . . if gp.m.p.ptr().runSafePointFn != 0 { 4616 . . // runSafePointFn may stack split if run on this stack 4617 . . systemstack(runSafePointFn) 4618 . . save(pc, sp, bp) 4619 . . } 4620 . . 4621 . . gp.m.syscalltick = gp.m.p.ptr().syscalltick 4622 . . pp := gp.m.p.ptr() 4623 . . pp.m = 0 4624 . . gp.m.oldp.set(pp) 4625 . . gp.m.p = 0 4626 . . atomic.Store(&pp.status, _Psyscall) 4627 50ms 50ms if sched.gcwaiting.Load() { return b.u.Load() != 0 types.go:168 4628 . . systemstack(entersyscall_gcwait) 4629 . . save(pc, sp, bp) 4630 . . } 4631 . . 4632 . . gp.m.locks-- 4633 10ms 10ms } 4634 . . 4635 . . // Standard syscall entry used by the go syscall library and normal cgo calls. 4636 . . // 4637 . . // This is exported via linkname to assembly in the syscall package and x/sys. 4638 . . //
runtime.entersyscall
/usr/lib/go/src/runtime/proc.go
Total: 0 280ms (flat, cum) 0.75% 4649 . . func entersyscall() { 4650 . . // N.B. getcallerfp cannot be written directly as argument in the call 4651 . . // to reentersyscall because it forces spilling the other arguments to 4652 . . // the stack. This results in exceeding the nosplit stack requirements 4653 . . // on some platforms. 4654 . 10ms fp := getcallerfp() 4655 . 270ms reentersyscall(sys.GetCallerPC(), sys.GetCallerSP(), fp) 4656 . . } 4657 . . 4658 . . func entersyscall_sysmon() { 4659 . . lock(&sched.lock) 4660 . . if sched.sysmonwait.Load() {
runtime.exitsyscall
/usr/lib/go/src/runtime/proc.go
Total: 70ms 280ms (flat, cum) 0.75% 4782 . . // See go.dev/issue/67401. 4783 . . // 4784 . . //go:nosplit 4785 . . //go:nowritebarrierrec 4786 . . //go:linkname exitsyscall 4787 10ms 10ms func exitsyscall() { 4788 . . gp := getg() 4789 . . 4790 10ms 10ms gp.m.locks++ // see comment in entersyscall 4791 . . if sys.GetCallerSP() > gp.syscallsp { 4792 . . throw("exitsyscall: syscall frame is no longer valid") 4793 . . } 4794 . . 4795 . . gp.waitsince = 0 4796 . . oldp := gp.m.oldp.ptr() 4797 . . gp.m.oldp = 0 4798 . 120ms if exitsyscallfast(oldp) { 4799 . . // When exitsyscallfast returns success, we have a P so can now use 4800 . . // write barriers 4801 . . if goroutineProfile.active { 4802 . . // Make sure that gp has had its stack written out to the goroutine 4803 . . // profile, exactly as it was when the goroutine profiler first 4804 . . // stopped the world. 4805 . . systemstack(func() { 4806 . . tryRecordGoroutineProfileWB(gp) 4807 . . }) 4808 . . } 4809 20ms 20ms trace := traceAcquire() 4810 . . if trace.ok() { 4811 . . lostP := oldp != gp.m.p.ptr() || gp.m.syscalltick != gp.m.p.ptr().syscalltick 4812 . . systemstack(func() { 4813 . . // Write out syscall exit eagerly. 4814 . . // 4815 . . // It's important that we write this *after* we know whether we 4816 . . // lost our P or not (determined by exitsyscallfast). 4817 . . trace.GoSysExit(lostP) 4818 . . if lostP { 4819 . . // We lost the P at some point, even though we got it back here. 4820 . . // Trace that we're starting again, because there was a tracev2.GoSysBlock 4821 . . // call somewhere in exitsyscallfast (indicating that this goroutine 4822 . . // had blocked) and we're about to start running again. 4823 . . trace.GoStart() 4824 . . } 4825 . . }) 4826 . . } 4827 . . // There's a cpu for us, so we can run. 4828 10ms 10ms gp.m.p.ptr().syscalltick++ 4829 . . // We need to cas the status and scan before resuming... 4830 . 90ms casgstatus(gp, _Gsyscall, _Grunning) 4831 . . if trace.ok() { 4832 . . traceRelease(trace) 4833 . . } 4834 . . 4835 . . // Garbage collector isn't running (since we are), 4836 . . // so okay to clear syscallsp. 4837 . . gp.syscallsp = 0 4838 10ms 10ms gp.m.locks-- 4839 . . if gp.preempt { 4840 . . // restore the preemption request in case we've cleared it in newstack 4841 . . gp.stackguard0 = stackPreempt 4842 . . } else { 4843 . . // otherwise restore the real stackGuard, we've spoiled it in entersyscall/entersyscallblock 4844 10ms 10ms gp.stackguard0 = gp.stack.lo + stackGuard 4845 . . } 4846 . . gp.throwsplit = false 4847 . . 4848 . . if sched.disable.user && !schedEnabled(gp) { 4849 . . // Scheduling of this goroutine is disabled.
runtime.exitsyscallfast
/usr/lib/go/src/runtime/proc.go
Total: 80ms 120ms (flat, cum) 0.32% 4875 . . if sched.stopwait == freezeStopWait { 4876 . . return false 4877 . . } 4878 . . 4879 . . // Try to re-acquire the last P. 4880 10ms 10ms trace := traceAcquire() 4881 70ms 70ms if oldp != nil && oldp.status == _Psyscall && atomic.Cas(&oldp.status, _Psyscall, _Pidle) { 4882 . . // There's a cpu for us, so we can run. 4883 . 10ms wirep(oldp) 4884 . 30ms exitsyscallfast_reacquired(trace) 4885 . . if trace.ok() { 4886 . . traceRelease(trace) 4887 . . } 4888 . . return true 4889 . . }
runtime.exitsyscallfast_reacquired
/usr/lib/go/src/runtime/proc.go
Total: 30ms 30ms (flat, cum) 0.08% 4907 . . // exitsyscallfast_reacquired is the exitsyscall path on which this G 4908 . . // has successfully reacquired the P it was running on before the 4909 . . // syscall. 4910 . . // 4911 . . //go:nosplit 4912 20ms 20ms func exitsyscallfast_reacquired(trace traceLocker) { 4913 . . gp := getg() 4914 10ms 10ms if gp.m.syscalltick != gp.m.p.ptr().syscalltick { 4915 . . if trace.ok() { 4916 . . // The p was retaken and then enter into syscall again (since gp.m.syscalltick has changed). 4917 . . // tracev2.GoSysBlock for this syscall was already emitted, 4918 . . // but here we effectively retake the p from the new syscall running on the same p. 4919 . . systemstack(func() {
runtime.newproc
/usr/lib/go/src/runtime/proc.go
Total: 0 810ms (flat, cum) 2.16% 5142 . . // Put it on the queue of g's waiting to run. 5143 . . // The compiler turns a go statement into a call to this. 5144 . . func newproc(fn *funcval) { 5145 . . gp := getg() 5146 . . pc := sys.GetCallerPC() 5147 . 810ms systemstack(func() {
runtime.newproc.func1
/usr/lib/go/src/runtime/proc.go
Total: 10ms 790ms (flat, cum) 2.10% 5148 . 480ms newg := newproc1(fn, gp, pc, false, waitReasonZero) 5149 . . 5150 . . pp := getg().m.p.ptr() 5151 . 20ms runqput(pp, newg, true) 5152 . . 5153 . . if mainStarted { 5154 10ms 290ms wakep() 5155 . . }
runtime.newproc
/usr/lib/go/src/runtime/proc.go
Total: 10ms 10ms (flat, cum) 0.027% 5156 . . }) 5157 10ms 10ms } 5158 . . 5159 . . // Create a new g in state _Grunnable (or _Gwaiting if parked is true), starting at fn.
runtime.newproc1
/usr/lib/go/src/runtime/proc.go
Total: 40ms 250ms (flat, cum) 0.67% 5161 . . // for adding the new g to the scheduler. If parked is true, waitreason must be non-zero. 5162 . . func newproc1(fn *funcval, callergp *g, callerpc uintptr, parked bool, waitreason waitReason) *g { 5163 10ms 10ms if fn == nil { 5164 . . fatal("go of nil func value") 5165 . . } 5166 . . 5167 10ms 10ms mp := acquirem() // disable preemption because we hold M and P in local vars. return gp.m runtime1.go:632 5168 10ms 10ms pp := mp.p.ptr() func (pp puintptr) ptr() *p { return (*p)(unsafe.Pointer(pp)) } runtime2.go:266 5169 . 210ms newg := gfget(pp) 5170 . . if newg == nil { 5171 . . newg = malg(stackMin) 5172 . . casgstatus(newg, _Gidle, _Gdead) 5173 . . allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack. 5174 . . } 5175 . . if newg.stack.hi == 0 { 5176 . . throw("newproc1: newg missing stack") 5177 . . } 5178 . . 5179 10ms 10ms if readgstatus(newg) != _Gdead { return gp.atomicstatus.Load() proc.go:1191 return Load(&u.value) types.go:194 5180 . . throw("newproc1: new g is not Gdead") 5181 . . } 5182 . . 5183 . . totalSize := uintptr(4*goarch.PtrSize + sys.MinFrameSize) // extra space in case of reads slightly beyond frame 5184 . . totalSize = alignUp(totalSize, sys.StackAlign)
runtime.newproc1
/usr/lib/go/src/runtime/proc.go
Total: 10ms 150ms (flat, cum) 0.4% 5196 . . memclrNoHeapPointers(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched)) 5197 . . newg.sched.sp = sp 5198 . . newg.stktopsp = sp 5199 . . newg.sched.pc = abi.FuncPCABI0(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in same function 5200 . . newg.sched.g = guintptr(unsafe.Pointer(newg)) 5201 . 20ms gostartcallfn(&newg.sched, fn) 5202 . . newg.parentGoid = callergp.goid 5203 . . newg.gopc = callerpc 5204 . . newg.ancestors = saveAncestors(callergp) 5205 10ms 10ms newg.startpc = fn.fn 5206 . . newg.runningCleanups.Store(false) 5207 . 120ms if isSystemGoroutine(newg, false) { 5208 . . sched.ngsys.Add(1) 5209 . . } else { 5210 . . // Only user goroutines inherit synctest groups and pprof labels. 5211 . . newg.bubble = callergp.bubble 5212 . . if mp.curg != nil {
runtime.newproc1
/usr/lib/go/src/runtime/proc.go
Total: 20ms 80ms (flat, cum) 0.21% 5224 . . // Track initial transition? 5225 . . newg.trackingSeq = uint8(cheaprand()) 5226 . . if newg.trackingSeq%gTrackingPeriod == 0 { 5227 . . newg.tracking = true 5228 . . } 5229 10ms 10ms gcController.addScannableStack(pp, int64(newg.stack.hi-newg.stack.lo)) c.maxStackScan.Add(pp.maxStackScanDelta) mgcpacer.go:924 return Xadd64(&u.value, delta) types.go:344 5230 . . 5231 . . // Get a goid and switch to runnable. Make all this atomic to the tracer. 5232 . . trace := traceAcquire() 5233 . . var status uint32 = _Grunnable 5234 . . if parked { 5235 . . status = _Gwaiting 5236 . . newg.waitreason = waitreason 5237 . . } 5238 . . if pp.goidcache == pp.goidcacheend { 5239 . . // Sched.goidgen is the last allocated id, 5240 . . // this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch]. 5241 . . // At startup sched.goidgen=0, so main goroutine receives goid=1. 5242 10ms 10ms pp.goidcache = sched.goidgen.Add(_GoidCacheBatch) return Xadd64(&u.value, delta) types.go:344 5243 . . pp.goidcache -= _GoidCacheBatch - 1 5244 . . pp.goidcacheend = pp.goidcache + _GoidCacheBatch 5245 . . } 5246 . . newg.goid = pp.goidcache 5247 . 60ms casgstatus(newg, _Gdead, status) 5248 . . pp.goidcache++ 5249 . . newg.trace.reset() 5250 . . if trace.ok() { 5251 . . trace.GoCreate(newg, newg.startpc, parked) 5252 . . traceRelease(trace)
runtime.gfput
/usr/lib/go/src/runtime/proc.go
Total: 50ms 90ms (flat, cum) 0.24% 5301 . . return ancestorsp 5302 . . } 5303 . . 5304 . . // Put on gfree list. 5305 . . // If local list is too long, transfer a batch to the global list. 5306 10ms 10ms func gfput(pp *p, gp *g) { 5307 10ms 10ms if readgstatus(gp) != _Gdead { 5308 . . throw("gfput: bad status (not Gdead)") 5309 . . } 5310 . . 5311 . . stksize := gp.stack.hi - gp.stack.lo 5312 . . 5313 . . if stksize != uintptr(startingStackSize) { 5314 . . // non-standard stack size - free it. 5315 . 40ms stackfree(gp.stack) 5316 . . gp.stack.lo = 0 5317 . . gp.stack.hi = 0 5318 . . gp.stackguard0 = 0 5319 . . if valgrindenabled { 5320 . . valgrindDeregisterStack(gp.valgrindStackID) 5321 . . gp.valgrindStackID = 0 5322 . . } 5323 . . } 5324 . . 5325 10ms 10ms pp.gFree.push(gp) gp.schedlink = l.head proc.go:7388 5326 . . if pp.gFree.size >= 64 { 5327 . . var ( 5328 . . stackQ gQueue 5329 . . noStackQ gQueue 5330 . . ) 5331 . . for pp.gFree.size >= 32 { 5332 20ms 20ms gp := pp.gFree.pop() l.head = gp.schedlink proc.go:7406 5333 . . if gp.stack.lo == 0 { 5334 . . noStackQ.push(gp) 5335 . . } else { 5336 . . stackQ.push(gp) 5337 . . }
runtime.gfget
/usr/lib/go/src/runtime/proc.go
Total: 140ms 210ms (flat, cum) 0.56% 5343 . . } 5344 . . } 5345 . . 5346 . . // Get from gfree list. 5347 . . // If local list is empty, grab a batch from global list. 5348 20ms 20ms func gfget(pp *p) *g { 5349 . . retry: 5350 10ms 10ms if pp.gFree.empty() && (!sched.gFree.stack.empty() || !sched.gFree.noStack.empty()) { 5351 . . lock(&sched.gFree.lock) 5352 . . // Move a batch of free Gs to the P. 5353 . . for pp.gFree.size < 32 { 5354 . . // Prefer Gs with stacks. 5355 . . gp := sched.gFree.stack.pop() 5356 . . if gp == nil { 5357 10ms 10ms gp = sched.gFree.noStack.pop() l.head = gp.schedlink proc.go:7406 5358 . . if gp == nil { 5359 . . break 5360 . . } 5361 . . } 5362 10ms 10ms pp.gFree.push(gp) gp.schedlink = l.head proc.go:7388 5363 . . } 5364 . . unlock(&sched.gFree.lock) 5365 . . goto retry 5366 . . } 5367 70ms 70ms gp := pp.gFree.pop() l.head = gp.schedlink proc.go:7406 5368 . . if gp == nil { 5369 . . return nil 5370 . . } 5371 10ms 10ms if gp.stack.lo != 0 && gp.stack.hi-gp.stack.lo != uintptr(startingStackSize) { 5372 . . // Deallocate old stack. We kept it in gfput because it was the 5373 . . // right size when the goroutine was put on the free list, but 5374 . . // the right size has changed since then. 5375 . . systemstack(func() { 5376 . . stackfree(gp.stack) 5377 . . gp.stack.lo = 0 5378 . . gp.stack.hi = 0 5379 . . gp.stackguard0 = 0 5380 . . if valgrindenabled { 5381 . . valgrindDeregisterStack(gp.valgrindStackID) 5382 . . gp.valgrindStackID = 0 5383 . . } 5384 . . }) 5385 . . } 5386 10ms 10ms if gp.stack.lo == 0 { 5387 . . // Stack was deallocated in gfput or just above. Allocate a new one. 5388 . 70ms systemstack(func() {
runtime.gfget.func2
/usr/lib/go/src/runtime/proc.go
Total: 0 70ms (flat, cum) 0.19% 5389 . 70ms gp.stack = stackalloc(startingStackSize) 5390 . . if valgrindenabled { 5391 . . gp.valgrindStackID = valgrindRegisterStack(unsafe.Pointer(gp.stack.lo), unsafe.Pointer(gp.stack.hi)) 5392 . . } 5393 . . }) 5394 . . gp.stackguard0 = gp.stack.lo + stackGuard
runtime.acquirep
/usr/lib/go/src/runtime/proc.go
Total: 30ms 60ms (flat, cum) 0.16% 6004 . . 6005 . . // Have p; write barriers now allowed. 6006 . . 6007 . . // Perform deferred mcache flush before this P can allocate 6008 . . // from a potentially stale mcache. 6009 10ms 40ms pp.mcache.prepareForSweep() 6010 . . 6011 20ms 20ms trace := traceAcquire() if !traceEnabled() { traceruntime.go:188 6012 . . if trace.ok() { 6013 . . trace.ProcStart() 6014 . . traceRelease(trace) 6015 . . } 6016 . . }
runtime.wirep
/usr/lib/go/src/runtime/proc.go
Total: 10ms 10ms (flat, cum) 0.027% 6019 . . // current M to pp. This is broken out so we can disallow write 6020 . . // barriers for this part, since we don't yet have a P. 6021 . . // 6022 . . //go:nowritebarrierrec 6023 . . //go:nosplit 6024 10ms 10ms func wirep(pp *p) { 6025 . . gp := getg() 6026 . . 6027 . . if gp.m.p != 0 { 6028 . . // Call on the systemstack to avoid a nosplit overflow build failure 6029 . . // on some platforms when built with -N -l. See #64113.
runtime.releasep
/usr/lib/go/src/runtime/proc.go
Total: 20ms 20ms (flat, cum) 0.053% 6047 . . pp.m.set(gp.m) 6048 . . pp.status = _Prunning 6049 . . } 6050 . . 6051 . . // Disassociate p and the current m. 6052 20ms 20ms func releasep() *p { 6053 . . trace := traceAcquire() 6054 . . if trace.ok() { 6055 . . trace.ProcStop(getg().m.p.ptr()) 6056 . . traceRelease(trace) 6057 . . }
runtime.checkdead
/usr/lib/go/src/runtime/proc.go
Total: 10ms 10ms (flat, cum) 0.027% 6115 . . var run0 int32 6116 . . if !iscgo && cgoHasExtraM && extraMLength.Load() > 0 { 6117 . . run0 = 1 6118 . . } 6119 . . 6120 10ms 10ms run := mcount() - sched.nmidle - sched.nmidlelocked - sched.nmsys 6121 . . if run > run0 { 6122 . . return 6123 . . } 6124 . . if run < 0 { 6125 . . print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", mcount(), " nmsys=", sched.nmsys, "\n")
runtime.mput
/usr/lib/go/src/runtime/proc.go
Total: 20ms 30ms (flat, cum) 0.08% 6806 . . // Put mp on midle list. 6807 . . // sched.lock must be held. 6808 . . // May run during STW, so write barriers are not allowed. 6809 . . // 6810 . . //go:nowritebarrierrec 6811 10ms 10ms func mput(mp *m) { 6812 . . assertLockHeld(&sched.lock) 6813 . . 6814 . . mp.schedlink = sched.midle 6815 . . sched.midle.set(mp) 6816 10ms 10ms sched.nmidle++ 6817 . 10ms checkdead() 6818 . . } 6819 . . 6820 . . // Try to get an m from midle list. 6821 . . // sched.lock must be held. 6822 . . // May run during STW, so write barriers are not allowed.
runtime.mget
/usr/lib/go/src/runtime/proc.go
Total: 20ms 20ms (flat, cum) 0.053% 6825 . . func mget() *m { 6826 . . assertLockHeld(&sched.lock) 6827 . . 6828 . . mp := sched.midle.ptr() 6829 . . if mp != nil { 6830 20ms 20ms sched.midle = mp.schedlink 6831 . . sched.nmidle-- 6832 . . } 6833 . . return mp 6834 . . } 6835 . .
runtime.pMask.read
/usr/lib/go/src/runtime/proc.go
Total: 50ms 50ms (flat, cum) 0.13% 6905 . . type pMask []uint32 6906 . . 6907 . . // read returns true if P id's bit is set. 6908 . . func (p pMask) read(id uint32) bool { 6909 . . word := id / 32 6910 10ms 10ms mask := uint32(1) << (id % 32) 6911 40ms 40ms return (atomic.Load(&p[word]) & mask) != 0 6912 . . } 6913 . .
runtime.pMask.set
/usr/lib/go/src/runtime/proc.go
Total: 10ms 10ms (flat, cum) 0.027% 6915 . . func (p pMask) set(id int32) { 6916 . . word := id / 32 6917 10ms 10ms mask := uint32(1) << (id % 32) 6918 . . atomic.Or(&p[word], mask) 6919 . . } 6920 . . 6921 . . // clear clears P id's bit. 6922 . . func (p pMask) clear(id int32) {
runtime.pidleput
/usr/lib/go/src/runtime/proc.go
Total: 100ms 100ms (flat, cum) 0.27% 6946 . . now = nanotime() 6947 . . } 6948 . . if pp.timers.len.Load() == 0 { 6949 . . timerpMask.clear(pp.id) 6950 . . } 6951 20ms 20ms idlepMask.set(pp.id) 6952 30ms 30ms pp.link = sched.pidle 6953 . . sched.pidle.set(pp) 6954 50ms 50ms sched.npidle.Add(1) return Xaddint32(&i.value, delta) types.go:56 6955 . . if !pp.limiterEvent.start(limiterEventIdle, now) { 6956 . . throw("must be able to track idle limiter event") 6957 . . } 6958 . . return now 6959 . . }
runtime.pidleget
/usr/lib/go/src/runtime/proc.go
Total: 30ms 70ms (flat, cum) 0.19% 6972 . . if pp != nil { 6973 . . // Timer may get added at any time now. 6974 . . if now == 0 { 6975 . . now = nanotime() 6976 . . } 6977 10ms 10ms timerpMask.set(pp.id) mask := uint32(1) << (id % 32) proc.go:6917 6978 20ms 20ms idlepMask.clear(pp.id) 6979 . . sched.pidle = pp.link 6980 . . sched.npidle.Add(-1) 6981 . 40ms pp.limiterEvent.stop(limiterEventIdle, now) 6982 . . } 6983 . . return pp, now 6984 . . } 6985 . . 6986 . . // pidlegetSpinning tries to get a p from the _Pidle list, acquiring ownership.
runtime.pidlegetSpinning
/usr/lib/go/src/runtime/proc.go
Total: 10ms 80ms (flat, cum) 0.21% 6991 . . // sched.lock must be held. 6992 . . // 6993 . . // May run during STW, so write barriers are not allowed. 6994 . . // 6995 . . //go:nowritebarrierrec 6996 10ms 10ms func pidlegetSpinning(now int64) (*p, int64) { 6997 . . assertLockHeld(&sched.lock) 6998 . . 6999 . 70ms pp, now := pidleget(now) 7000 . . if pp == nil { 7001 . . // See "Delicate dance" comment in findrunnable. We found work 7002 . . // that we cannot take, we must synchronize with non-spinning 7003 . . // Ms that may be preparing to drop their P. 7004 . . sched.needspinning.Store(1)
runtime.runqempty
/usr/lib/go/src/runtime/proc.go
Total: 20ms 20ms (flat, cum) 0.053% 7014 . . // Defend against a race where 1) pp has G1 in runqnext but runqhead == runqtail, 7015 . . // 2) runqput on pp kicks G1 to the runq, 3) runqget on pp empties runqnext. 7016 . . // Simply observing that runqhead == runqtail and then observing that runqnext == nil 7017 . . // does not mean the queue is empty. 7018 . . for { 7019 20ms 20ms head := atomic.Load(&pp.runqhead) 7020 . . tail := atomic.Load(&pp.runqtail) 7021 . . runnext := atomic.Loaduintptr((*uintptr)(unsafe.Pointer(&pp.runnext))) 7022 . . if tail == atomic.Load(&pp.runqtail) { 7023 . . return head == tail && runnext == 0 7024 . . }
runtime.runqput
/usr/lib/go/src/runtime/proc.go
Total: 30ms 30ms (flat, cum) 0.08% 7057 . . next = false 7058 . . } 7059 . . 7060 . . if next { 7061 . . retryNext: 7062 10ms 10ms oldnext := pp.runnext 7063 10ms 10ms if !pp.runnext.cas(oldnext, guintptr(unsafe.Pointer(gp))) { return atomic.Casuintptr((*uintptr)(unsafe.Pointer(gp)), uintptr(old), uintptr(new)) runtime2.go:246 7064 . . goto retryNext 7065 . . } 7066 . . if oldnext == 0 { 7067 . . return 7068 . . } 7069 . . // Kick the old runnext out to the regular run queue. 7070 . . gp = oldnext.ptr() 7071 . . } 7072 . . 7073 . . retry: 7074 . . h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with consumers 7075 10ms 10ms t := pp.runqtail 7076 . . if t-h < uint32(len(pp.runq)) { 7077 . . pp.runq[t%uint32(len(pp.runq))].set(gp) 7078 . . atomic.StoreRel(&pp.runqtail, t+1) // store-release, makes the item available for consumption 7079 . . return 7080 . . }
runtime.runqget
/usr/lib/go/src/runtime/proc.go
Total: 70ms 70ms (flat, cum) 0.19% 7165 . . // If there's a runnext, it's the next G to run. 7166 . . next := pp.runnext 7167 . . // If the runnext is non-0 and the CAS fails, it could only have been stolen by another P, 7168 . . // because other Ps can race to set runnext to 0, but only the current P can set it to non-0. 7169 . . // Hence, there's no need to retry this CAS if it fails. 7170 10ms 10ms if next != 0 && pp.runnext.cas(next, 0) { return atomic.Casuintptr((*uintptr)(unsafe.Pointer(gp)), uintptr(old), uintptr(new)) runtime2.go:246 7171 . . return next.ptr(), true 7172 . . } 7173 . . 7174 . . for { 7175 . . h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers 7176 40ms 40ms t := pp.runqtail 7177 . . if t == h { 7178 . . return nil, false 7179 . . } 7180 . . gp := pp.runq[h%uint32(len(pp.runq))].ptr() 7181 20ms 20ms if atomic.CasRel(&pp.runqhead, h, h+1) { // cas-release, commits consume 7182 . . return gp, false 7183 . . } 7184 . . } 7185 . . } 7186 . .
runtime.runqgrab
/usr/lib/go/src/runtime/proc.go
Total: 250ms 480ms (flat, cum) 1.28% 7223 . . 7224 . . // Grabs a batch of goroutines from pp's runnable queue into batch. 7225 . . // Batch is a ring buffer starting at batchHead. 7226 . . // Returns number of grabbed goroutines. 7227 . . // Can be executed by any P. 7228 20ms 20ms func runqgrab(pp *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool) uint32 { 7229 . . for { 7230 110ms 110ms h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers 7231 20ms 20ms t := atomic.LoadAcq(&pp.runqtail) // load-acquire, synchronize with the producer 7232 . . n := t - h 7233 . . n = n - n/2 7234 . . if n == 0 { 7235 10ms 10ms if stealRunNextG { 7236 . . // Try to steal from pp.runnext. 7237 20ms 20ms if next := pp.runnext; next != 0 { 7238 . . if pp.status == _Prunning { 7239 . . // Sleep to ensure that pp isn't about to run the g 7240 . . // we are about to steal. 7241 . . // The important use case here is when the g running 7242 . . // on pp ready()s another g and then almost 7243 . . // immediately blocks. Instead of stealing runnext 7244 . . // in this window, back off to give pp a chance to 7245 . . // schedule runnext. This will avoid thrashing gs 7246 . . // between different Ps. 7247 . . // A sync chan send/recv takes ~50ns as of time of 7248 . . // writing, so 3us gives ~50x overshoot. 7249 . . if !osHasLowResTimer { 7250 . 230ms usleep(3) 7251 . . } else { 7252 . . // On some platforms system timer granularity is 7253 . . // 1-15ms, which is way too much for this 7254 . . // optimization. So just yield. 7255 . . osyield() 7256 . . } 7257 . . } 7258 . . if !pp.runnext.cas(next, 0) { 7259 . . continue 7260 . . } 7261 . . batch[batchHead%uint32(len(batch))] = next 7262 . . return 1 7263 . . } 7264 . . } 7265 10ms 10ms return 0 7266 . . } 7267 . . if n > uint32(len(pp.runq)/2) { // read inconsistent h and t 7268 . . continue 7269 . . } 7270 10ms 10ms for i := uint32(0); i < n; i++ { 7271 . . g := pp.runq[(h+i)%uint32(len(pp.runq))] 7272 20ms 20ms batch[(batchHead+i)%uint32(len(batch))] = g 7273 . . } 7274 30ms 30ms if atomic.CasRel(&pp.runqhead, h, h+n) { // cas-release, commits consume 7275 . . return n 7276 . . } 7277 . . } 7278 . . } 7279 . .
runtime.runqsteal
/usr/lib/go/src/runtime/proc.go
Total: 20ms 500ms (flat, cum) 1.33% 7280 . . // Steal half of elements from local runnable queue of p2 7281 . . // and put onto local runnable queue of p. 7282 . . // Returns one of the stolen elements (or nil if failed). 7283 . . func runqsteal(pp, p2 *p, stealRunNextG bool) *g { 7284 . . t := pp.runqtail 7285 10ms 490ms n := runqgrab(p2, &pp.runq, t, stealRunNextG) 7286 . . if n == 0 { 7287 . . return nil 7288 . . } 7289 . . n-- 7290 . . gp := pp.runq[(t+n)%uint32(len(pp.runq))].ptr() 7291 . . if n == 0 { 7292 . . return gp 7293 . . } 7294 10ms 10ms h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with consumers 7295 . . if t-h+n >= uint32(len(pp.runq)) { 7296 . . throw("runqsteal: runq overflow") 7297 . . } 7298 . . atomic.StoreRel(&pp.runqtail, t+n) // store-release, makes the item available for consumption 7299 . . return gp
runtime.(*gList).push
/usr/lib/go/src/runtime/proc.go
Total: 20ms 20ms (flat, cum) 0.053% 7383 . . return l.head == 0 7384 . . } 7385 . . 7386 . . // push adds gp to the head of l. 7387 . . func (l *gList) push(gp *g) { 7388 20ms 20ms gp.schedlink = l.head 7389 . . l.head.set(gp) 7390 . . l.size++ 7391 . . } 7392 . . 7393 . . // pushAll prepends all Gs in q to l. After this q must not be used.
runtime.(*gList).pop
/usr/lib/go/src/runtime/proc.go
Total: 100ms 100ms (flat, cum) 0.27% 7401 . . 7402 . . // pop removes and returns the head of l. If l is empty, it returns nil. 7403 . . func (l *gList) pop() *g { 7404 . . gp := l.head.ptr() 7405 . . if gp != nil { 7406 100ms 100ms l.head = gp.schedlink 7407 . . l.size-- 7408 . . } 7409 . . return gp 7410 . . } 7411 . .
runtime.(*randomEnum).done
/usr/lib/go/src/runtime/proc.go
Total: 10ms 10ms (flat, cum) 0.027% 7578 . . inc: ord.coprimes[i/ord.count%uint32(len(ord.coprimes))], 7579 . . } 7580 . . } 7581 . . 7582 . . func (enum *randomEnum) done() bool { 7583 10ms 10ms return enum.i == enum.count 7584 . . } 7585 . .
runtime.(*randomEnum).next
/usr/lib/go/src/runtime/proc.go
Total: 10ms 10ms (flat, cum) 0.027% 7586 . . func (enum *randomEnum) next() { 7587 . . enum.i++ 7588 10ms 10ms enum.pos = (enum.pos + enum.inc) % enum.count 7589 . . }
runtime.(*randomEnum).position
/usr/lib/go/src/runtime/proc.go
Total: 30ms 30ms (flat, cum) 0.08% 7591 . . func (enum *randomEnum) position() uint32 { 7592 30ms 30ms return enum.pos 7593 . . } 7594 . . 7595 . . func gcd(a, b uint32) uint32 { 7596 . . for b != 0 { 7597 . . a, b = b, a%b
runtime.(*moduledata).textAddr
/usr/lib/go/src/runtime/symtab.go
Total: 230ms 230ms (flat, cum) 0.61% 683 . . // relocated baseaddr to compute the function address. 684 . . // 685 . . // It is nosplit because it is part of the findfunc implementation. 686 . . // 687 . . //go:nosplit 688 20ms 20ms func (md *moduledata) textAddr(off32 uint32) uintptr { 689 20ms 20ms off := uintptr(off32) 690 20ms 20ms res := md.text + off 691 40ms 40ms if len(md.textsectmap) > 1 { 692 . . for i, sect := range md.textsectmap { 693 . . // For the last section, include the end address (etext), as it is included in the functab. 694 . . if off >= sect.vaddr && off < sect.end || (i == len(md.textsectmap)-1 && off == sect.end) { 695 . . res = sect.baseaddr + off - sect.vaddr 696 . . break 697 . . } 698 . . } 699 . . if res > md.etext && GOARCH != "wasm" { // on wasm, functions do not live in the same address space as the linear memory 700 . . println("runtime: textAddr", hex(res), "out of range", hex(md.text), "-", hex(md.etext)) 701 . . throw("runtime: text offset out of range") 702 . . } 703 . . } 704 . . if GOARCH == "wasm" { 705 . . // On Wasm, a text offset (e.g. in the method table) is function index, whereas 706 . . // the "PC" is function index << 16 + block index. 707 . . res <<= 16 708 . . } 709 130ms 130ms return res 710 . . } 711 . . 712 . . // textOff is the opposite of textAddr. It converts a PC to a (virtual) offset 713 . . // to md.text, and returns if the PC is in any Go text section. 714 . . //
runtime.(*moduledata).funcName
/usr/lib/go/src/runtime/symtab.go
Total: 20ms 170ms (flat, cum) 0.45% 749 . . // funcName returns the string at nameOff in the function name table. 750 . . func (md *moduledata) funcName(nameOff int32) string { 751 . . if nameOff == 0 { 752 . . return "" 753 . . } 754 20ms 170ms return gostringnocopy(&md.funcnametab[nameOff]) ⋮ ⋮ ss := stringStruct{str: unsafe.Pointer(str), len: findnull(str)} string.go:538 755 . . } 756 . . 757 . . // Despite being an exported symbol, 758 . . // FuncForPC is linknamed by widely used packages. 759 . . // Notable members of the hall of shame include:
runtime.findmoduledatap
/usr/lib/go/src/runtime/symtab.go
Total: 60ms 60ms (flat, cum) 0.16% 854 . . // implementation. 855 . . // 856 . . //go:nosplit 857 . . func findmoduledatap(pc uintptr) *moduledata { 858 . . for datap := &firstmoduledata; datap != nil; datap = datap.next { 859 60ms 60ms if datap.minpc <= pc && pc < datap.maxpc { 860 . . return datap 861 . . } 862 . . } 863 . . return nil 864 . . }
runtime.funcInfo.entry
/usr/lib/go/src/runtime/symtab.go
Total: 110ms 340ms (flat, cum) 0.91% 889 . . // - github.com/phuslu/log 890 . . // 891 . . // Do not remove or change the type signature. 892 . . // See go.dev/issue/67401. 893 . . func (f funcInfo) entry() uintptr { 894 110ms 340ms return f.datap.textAddr(f.entryOff) 895 . . } 896 . . 897 . . //go:linkname badFuncInfoEntry runtime.funcInfo.entry 898 . . func badFuncInfoEntry(funcInfo) uintptr 899 . .
runtime.findfunc
/usr/lib/go/src/runtime/symtab.go
Total: 380ms 380ms (flat, cum) 1.01% 910 . . // Do not remove or change the type signature. 911 . . // See go.dev/issue/67401. 912 . . // 913 . . //go:nosplit 914 . . //go:linkname findfunc 915 10ms 10ms func findfunc(pc uintptr) funcInfo { 916 60ms 60ms datap := findmoduledatap(pc) if datap.minpc <= pc && pc < datap.maxpc { symtab.go:859 ⋮ ⋮ 917 . . if datap == nil { 918 . . return funcInfo{} 919 . . } 920 . . const nsub = uintptr(len(findfuncbucket{}.subbuckets)) 921 . . 922 . . pcOff, ok := datap.textOff(pc) 923 . . if !ok { 924 . . return funcInfo{} 925 . . } 926 . . 927 10ms 10ms x := uintptr(pcOff) + datap.text - datap.minpc // TODO: are datap.text and datap.minpc always equal? 928 . . if GOARCH == "wasm" { 929 . . // On Wasm, pcOff is the function index, whereas 930 . . // the "PC" is function index << 16 + block index. 931 . . x = uintptr(pcOff)<<16 + datap.text - datap.minpc 932 . . } 933 . . b := x / abi.FuncTabBucketSize 934 . . i := x % abi.FuncTabBucketSize / (abi.FuncTabBucketSize / nsub) 935 . . 936 . . ffb := (*findfuncbucket)(add(unsafe.Pointer(datap.findfunctab), b*unsafe.Sizeof(findfuncbucket{}))) 937 130ms 130ms idx := ffb.idx + uint32(ffb.subbuckets[i]) 938 . . 939 . . // Find the ftab entry. 940 140ms 140ms for datap.ftab[idx+1].entryoff <= pcOff { 941 . . idx++ 942 . . } 943 . . 944 10ms 10ms funcoff := datap.ftab[idx].funcoff 945 20ms 20ms return funcInfo{(*_func)(unsafe.Pointer(&datap.pclntable[funcoff])), datap} 946 . . } 947 . . 948 . . // A srcFunc represents a logical function in the source code. This may 949 . . // correspond to an actual symbol in the binary text, or it may correspond to a 950 . . // source function that has been inlined.
runtime.pcvalueCacheKey
/usr/lib/go/src/runtime/symtab.go
Total: 10ms 10ms (flat, cum) 0.027% 996 . . // pcvalueCacheKey returns the outermost index in a pcvalueCache to use for targetpc. 997 . . // It must be very cheap to calculate. 998 . . // For now, align to goarch.PtrSize and reduce mod the number of entries. 999 . . // In practice, this appears to be fairly randomly and evenly distributed. 1000 . . func pcvalueCacheKey(targetpc uintptr) uintptr { 1001 10ms 10ms return (targetpc / goarch.PtrSize) % uintptr(len(pcvalueCache{}.entries)) 1002 . . }
runtime.pcvalue
/usr/lib/go/src/runtime/symtab.go
Total: 820ms 1.78s (flat, cum) 4.74% 1004 . . // Returns the PCData value, and the PC where this value starts. 1005 10ms 10ms func pcvalue(f funcInfo, off uint32, targetpc uintptr, strict bool) (int32, uintptr) { 1006 . . // If true, when we get a cache hit, still look up the data and make sure it 1007 . . // matches the cached contents. 1008 . . const debugCheckCache = false 1009 . . 1010 . . // If true, skip checking the cache entirely. 1011 . . const skipCache = false 1012 . . 1013 20ms 20ms if off == 0 { 1014 . . return -1, 0 1015 . . } 1016 . . 1017 . . // Check the cache. This speeds up walks of deep stacks, which 1018 . . // tend to have the same recursive functions over and over, 1019 . . // or repetitive stacks between goroutines. 1020 . . var checkVal int32 1021 . . var checkPC uintptr 1022 10ms 10ms ck := pcvalueCacheKey(targetpc) return (targetpc / goarch.PtrSize) % uintptr(len(pcvalueCache{}.entries)) symtab.go:1001 1023 . . if !skipCache { 1024 20ms 20ms mp := acquirem() gp.m.locks++ runtime1.go:631 1025 . . cache := &mp.pcvalueCache 1026 . . // The cache can be used by the signal handler on this M. Avoid 1027 . . // re-entrant use of the cache. The signal handler can also write inUse, 1028 . . // but will always restore its value, so we can use a regular increment 1029 . . // even if we get signaled in the middle of it. 1030 . . cache.inUse++ 1031 . . if cache.inUse == 1 { 1032 110ms 110ms for i := range cache.entries[ck] { 1033 . . // We check off first because we're more 1034 . . // likely to have multiple entries with 1035 . . // different offsets for the same targetpc 1036 . . // than the other way around, so we'll usually 1037 . . // fail in the first clause. 1038 10ms 10ms ent := &cache.entries[ck][i] 1039 100ms 100ms if ent.off == off && ent.targetpc == targetpc { 1040 . . val, pc := ent.val, ent.valPC 1041 . . if debugCheckCache { 1042 . . checkVal, checkPC = ent.val, ent.valPC 1043 . . break 1044 . . } else { 1045 . . cache.inUse-- 1046 10ms 10ms releasem(mp) if mp.locks == 0 && gp.preempt { runtime1.go:639 1047 . . return val, pc 1048 . . } 1049 . . } 1050 . . } 1051 . . } else if debugCheckCache && (cache.inUse < 1 || cache.inUse > 2) { 1052 . . // Catch accounting errors or deeply reentrant use. In principle 1053 . . // "inUse" should never exceed 2. 1054 . . throw("cache.inUse out of range") 1055 . . } 1056 . . cache.inUse-- 1057 20ms 20ms releasem(mp) if mp.locks == 0 && gp.preempt { runtime1.go:639 1058 . . } 1059 . . 1060 10ms 10ms if !f.valid() { 1061 . . if strict && panicking.Load() == 0 { 1062 . . println("runtime: no module data for", hex(f.entry())) 1063 . . throw("no module data") 1064 . . } 1065 . . return -1, 0 1066 . . } 1067 . . datap := f.datap 1068 20ms 20ms p := datap.pctab[off:] 1069 10ms 60ms pc := f.entry() return f.datap.textAddr(f.entryOff) symtab.go:894 ⋮ 1070 . . prevpc := pc 1071 . . val := int32(-1) 1072 10ms 10ms for { 1073 . . var ok bool 1074 130ms 1.04s p, ok = step(p, &pc, &val, pc == f.entry()) return f.datap.textAddr(f.entryOff) symtab.go:894 ⋮ ⋮ ⋮ ⋮ return f.datap.textAddr(f.entryOff) symtab.go:894 ⋮ ⋮ return f.datap.textAddr(f.entryOff) symtab.go:894 ⋮ ⋮ ⋮ 1075 10ms 10ms if !ok { 1076 . . break 1077 . . } 1078 130ms 130ms if targetpc < pc { 1079 . . // Replace a random entry in the cache. Random 1080 . . // replacement prevents a performance cliff if 1081 . . // a recursive stack's cycle is slightly 1082 . . // larger than the cache. 1083 . . // Put the new element at the beginning, 1084 . . // since it is the most likely to be newly used. 1085 . . if debugCheckCache && checkPC != 0 { 1086 . . if checkVal != val || checkPC != prevpc { 1087 . . print("runtime: table value ", val, "@", prevpc, " != cache value ", checkVal, "@", checkPC, " at PC ", targetpc, " off ", off, "\n") 1088 . . throw("bad pcvalue cache") 1089 . . } 1090 . . } else { 1091 40ms 40ms mp := acquirem() gp.m.locks++ runtime1.go:631 ⋮ ⋮ 1092 . . cache := &mp.pcvalueCache 1093 . . cache.inUse++ 1094 . . if cache.inUse == 1 { 1095 . . e := &cache.entries[ck] 1096 10ms 10ms ci := cheaprandn(uint32(len(cache.entries[ck]))) return uint32((uint64(cheaprand()) * uint64(n)) >> 32) rand.go:293 hi, lo := math.Mul64(mp.cheaprand, mp.cheaprand^0xe7037ed1a0b428db) rand.go:236 1097 100ms 100ms e[ci] = e[0] 1098 . . e[0] = pcvalueCacheEnt{ 1099 . . targetpc: targetpc, 1100 . . off: off, 1101 . . val: val, 1102 . . valPC: prevpc, 1103 . . } 1104 . . } 1105 30ms 30ms cache.inUse-- 1106 . . releasem(mp) 1107 . . } 1108 . . 1109 10ms 10ms return val, prevpc 1110 . . } 1111 . . prevpc = pc 1112 . . } 1113 . . 1114 . . // If there was a table, it should have covered all program counters.
runtime.funcname
/usr/lib/go/src/runtime/symtab.go
Total: 10ms 180ms (flat, cum) 0.48% 1137 . . 1138 . . func funcname(f funcInfo) string { 1139 . . if !f.valid() { 1140 . . return "" 1141 . . } 1142 10ms 180ms return f.datap.funcName(f.nameOff) 1143 . . } 1144 . . 1145 . . func funcpkgpath(f funcInfo) string { 1146 . . name := funcNameForPrint(funcname(f)) 1147 . . i := len(name) - 1
runtime.funcspdelta
/usr/lib/go/src/runtime/symtab.go
Total: 0 1s (flat, cum) 2.66% 1198 . . func funcline(f funcInfo, targetpc uintptr) (file string, line int32) { 1199 . . return funcline1(f, targetpc, true) 1200 . . } 1201 . . 1202 . . func funcspdelta(f funcInfo, targetpc uintptr) int32 { 1203 . 1s x, _ := pcvalue(f, f.pcsp, targetpc, true) 1204 . . if debugPcln && x&(goarch.PtrSize-1) != 0 { 1205 . . print("invalid spdelta ", funcname(f), " ", hex(f.entry()), " ", hex(targetpc), " ", hex(f.pcsp), " ", x, "\n") 1206 . . throw("bad spdelta") 1207 . . } 1208 . . return x
runtime.funcMaxSPDelta
/usr/lib/go/src/runtime/symtab.go
Total: 40ms 100ms (flat, cum) 0.27% 1209 . . } 1210 . . 1211 . . // funcMaxSPDelta returns the maximum spdelta at any point in f. 1212 . . func funcMaxSPDelta(f funcInfo) int32 { 1213 . . datap := f.datap 1214 20ms 20ms p := datap.pctab[f.pcsp:] 1215 . . pc := f.entry() 1216 . . val := int32(-1) 1217 . . most := int32(0) 1218 . . for { 1219 . . var ok bool 1220 20ms 80ms p, ok = step(p, &pc, &val, pc == f.entry()) ⋮ return f.datap.textAddr(f.entryOff) symtab.go:894 ⋮ ⋮ return f.datap.textAddr(f.entryOff) symtab.go:894 1221 . . if !ok { 1222 . . return most 1223 . . } 1224 . . most = max(most, val)
runtime.pcdatastart
/usr/lib/go/src/runtime/symtab.go
Total: 10ms 10ms (flat, cum) 0.027% 1225 . . } 1226 . . } 1227 . . 1228 . . func pcdatastart(f funcInfo, table uint32) uint32 { 1229 10ms 10ms return *(*uint32)(add(unsafe.Pointer(&f.nfuncdata), unsafe.Sizeof(f.nfuncdata)+uintptr(table)*4)) 1230 . . }
runtime.pcdatavalue
/usr/lib/go/src/runtime/symtab.go
Total: 50ms 830ms (flat, cum) 2.21% 1231 . . 1232 10ms 10ms func pcdatavalue(f funcInfo, table uint32, targetpc uintptr) int32 { 1233 10ms 10ms if table >= f.npcdata { 1234 . . return -1 1235 . . } 1236 30ms 810ms r, _ := pcvalue(f, pcdatastart(f, table), targetpc, true) return *(*uint32)(add(unsafe.Pointer(&f.nfuncdata), unsafe.Sizeof(f.nfuncdata)+uintptr(table)*4)) symtab.go:1229 ⋮ ⋮ 1237 . . return r
runtime.pcdatavalue1
/usr/lib/go/src/runtime/symtab.go
Total: 10ms 10ms (flat, cum) 0.027% 1239 . . 1240 10ms 10ms func pcdatavalue1(f funcInfo, table uint32, targetpc uintptr, strict bool) int32 { 1241 . . if table >= f.npcdata { 1242 . . return -1 1243 . . } 1244 . . r, _ := pcvalue(f, pcdatastart(f, table), targetpc, strict) 1245 . . return r
runtime.funcdata
/usr/lib/go/src/runtime/symtab.go
Total: 50ms 50ms (flat, cum) 0.13% 1254 . . } 1255 . . 1256 . . // funcdata returns a pointer to the ith funcdata for f. 1257 . . // funcdata should be kept in sync with cmd/link:writeFuncs. 1258 . . func funcdata(f funcInfo, i uint8) unsafe.Pointer { 1259 10ms 10ms if i < 0 || i >= f.nfuncdata { 1260 . . return nil 1261 . . } 1262 . . base := f.datap.gofunc // load gofunc address early so that we calculate during cache misses 1263 . . p := uintptr(unsafe.Pointer(&f.nfuncdata)) + unsafe.Sizeof(f.nfuncdata) + uintptr(f.npcdata)*4 + uintptr(i)*4 1264 . . off := *(*uint32)(unsafe.Pointer(p)) 1265 . . // Return off == ^uint32(0) ? 0 : f.datap.gofunc + uintptr(off), but without branches. 1266 . . // The compiler calculates mask on most architectures using conditional assignment. 1267 . . var mask uintptr 1268 . . if off == ^uint32(0) { 1269 . . mask = 1 1270 . . } 1271 . . mask-- 1272 30ms 30ms raw := base + uintptr(off) 1273 10ms 10ms return unsafe.Pointer(raw & mask) 1274 . . }
runtime.step
/usr/lib/go/src/runtime/symtab.go
Total: 810ms 810ms (flat, cum) 2.16% 1276 . . // step advances to the next pc, value pair in the encoded table. 1277 30ms 30ms func step(p []byte, pc *uintptr, val *int32, first bool) (newp []byte, ok bool) { 1278 . . // For both uvdelta and pcdelta, the common case (~70%) 1279 . . // is that they are a single byte. If so, avoid calling readvarint. 1280 . . uvdelta := uint32(p[0]) 1281 350ms 350ms if uvdelta == 0 && !first { 1282 . . return nil, false 1283 . . } 1284 . . n := uint32(1) 1285 10ms 10ms if uvdelta&0x80 != 0 { 1286 110ms 110ms n, uvdelta = readvarint(p) for { symtab.go:1304 ⋮ b := p[n] symtab.go:1305 ⋮ v |= uint32(b&0x7F) << (shift & 31) symtab.go:1307 ⋮ ⋮ b := p[n] symtab.go:1305 ⋮ 1287 . . } 1288 50ms 50ms *val += int32(-(uvdelta & 1) ^ (uvdelta >> 1)) 1289 40ms 40ms p = p[n:] 1290 . . 1291 20ms 20ms pcdelta := uint32(p[0]) 1292 . . n = 1 1293 30ms 30ms if pcdelta&0x80 != 0 { 1294 60ms 60ms n, pcdelta = readvarint(p) v |= uint32(b&0x7F) << (shift & 31) symtab.go:1307 ⋮ for { symtab.go:1304 1295 . . } 1296 60ms 60ms p = p[n:] 1297 20ms 20ms *pc += uintptr(pcdelta * sys.PCQuantum) 1298 30ms 30ms return p, true 1299 . . } 1300 . .
runtime.readvarint
/usr/lib/go/src/runtime/symtab.go
Total: 130ms 130ms (flat, cum) 0.35% 1302 . . func readvarint(p []byte) (read uint32, val uint32) { 1303 . . var v, shift, n uint32 1304 70ms 70ms for { 1305 20ms 20ms b := p[n] 1306 . . n++ 1307 40ms 40ms v |= uint32(b&0x7F) << (shift & 31) 1308 . . if b&0x80 == 0 { 1309 . . break 1310 . . } 1311 . . shift += 7 1312 . . }
runtime.stackmapdata
/usr/lib/go/src/runtime/symtab.go
Total: 10ms 10ms (flat, cum) 0.027% 1325 . . // The invariant is already checked by many of stackmapdata's callers, 1326 . . // and disabling it by default allows stackmapdata to be inlined. 1327 . . if stackDebug > 0 && (n < 0 || n >= stkmap.n) { 1328 . . throw("stackmapdata: index out of range") 1329 . . } 1330 10ms 10ms return bitvector{stkmap.nbit, addb(&stkmap.bytedata[0], uintptr(n*((stkmap.nbit+7)>>3)))} 1331 . . }
runtime.(*mspan).initHeapBits
/usr/lib/go/src/runtime/mbitmap.go
Total: 20ms 320ms (flat, cum) 0.85% 503 . . p[0] = *srcx 504 . . } 505 . . } 506 . . 507 . . // initHeapBits initializes the heap bitmap for a span. 508 10ms 10ms func (s *mspan) initHeapBits() { 509 . . if goarch.PtrSize == 8 && !s.spanclass.noscan() && s.spanclass.sizeclass() == 1 { 510 . . b := s.heapBits() 511 . . for i := range b { 512 . . b[i] = ^uintptr(0) 513 . . } 514 10ms 10ms } else if (!s.spanclass.noscan() && heapBitsInSpan(s.elemsize)) || s.isUserArenaChunk { 515 . . b := s.heapBits() 516 . 230ms clear(b) 517 . . } 518 . . if goexperiment.GreenTeaGC && gcUsesSpanInlineMarkBits(s.elemsize) { 519 . 70ms s.initInlineMarkBits() s.inlineMarkBits().init(s.spanclass, s.needzero != 0) mgcmark_greenteagc.go:190 520 . . } 521 . . } 522 . . 523 . . // heapBits returns the heap ptr/scalar bits stored at the end of the span for 524 . . // small object spans and heap arena spans.
runtime.spanHeapBitsRange
/usr/lib/go/src/runtime/mbitmap.go
Total: 80ms 80ms (flat, cum) 0.21% 567 . . } 568 . . 569 . . //go:nosplit 570 . . func spanHeapBitsRange(spanBase, spanSize, elemsize uintptr) (base, size uintptr) { 571 . . size = spanSize / goarch.PtrSize / 8 572 20ms 20ms base = spanBase + spanSize - size 573 30ms 30ms if goexperiment.GreenTeaGC && gcUsesSpanInlineMarkBits(elemsize) { return heapBitsInSpan(size) && size >= 16 mgcmark_greenteagc.go:259 ⋮ 574 30ms 30ms base -= unsafe.Sizeof(spanInlineMarkBits{}) 575 . . } 576 . . return 577 . . } 578 . . 579 . . // heapBitsSmallForAddr loads the heap bits for the object stored at addr from span.heapBits.
runtime.(*mspan).writeHeapBitsSmall
/usr/lib/go/src/runtime/mbitmap.go
Total: 830ms 830ms (flat, cum) 2.21% 619 . . // 620 . . // Assumes dataSize is <= ptrBits*goarch.PtrSize. x must be a pointer into the span. 621 . . // heapBitsInSpan(dataSize) must be true. dataSize must be >= typ.Size_. 622 . . // 623 . . //go:nosplit 624 20ms 20ms func (span *mspan) writeHeapBitsSmall(x, dataSize uintptr, typ *_type) (scanSize uintptr) { 625 . . // The objects here are always really small, so a single load is sufficient. 626 260ms 260ms src0 := readUintptr(getGCMask(typ)) ⋮ if t.TFlag&abi.TFlagGCMaskOnDemand != 0 { type.go:89 ⋮ ⋮ return t.GCData type.go:93 627 . . 628 . . // Create repetitions of the bitmap if we have a small slice backing store. 629 . . scanSize = typ.PtrBytes 630 . . src := src0 631 60ms 60ms if typ.Size_ == goarch.PtrSize { 632 . . src = (1 << (dataSize / goarch.PtrSize)) - 1 633 . . } else { 634 . . // N.B. We rely on dataSize being an exact multiple of the type size. 635 . . // The alternative is to be defensive and mask out src to the length 636 . . // of dataSize. The purpose is to save on one additional masking operation. 637 . . if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { 638 . . throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") 639 . . } 640 20ms 20ms for i := typ.Size_; i < dataSize; i += typ.Size_ { 641 40ms 40ms src |= src0 << (i / goarch.PtrSize) 642 10ms 10ms scanSize += typ.Size_ 643 . . } 644 . . if asanenabled { 645 . . // Mask src down to dataSize. dataSize is going to be a strange size because of 646 . . // the redzone required for allocations when asan is enabled. 647 . . src &= (1 << (dataSize / goarch.PtrSize)) - 1 648 . . } 649 . . } 650 . . 651 . . // Since we're never writing more than one uintptr's worth of bits, we're either going 652 . . // to do one or two writes. 653 100ms 100ms dstBase, _ := spanHeapBitsRange(span.base(), pageSize, span.elemsize) base -= unsafe.Sizeof(spanInlineMarkBits{}) mbitmap.go:574 ⋮ if goexperiment.GreenTeaGC && gcUsesSpanInlineMarkBits(elemsize) { mbitmap.go:573 return heapBitsInSpan(size) && size >= 16 mgcmark_greenteagc.go:259 ⋮ return s.startAddr mheap.go:523 ⋮ base = spanBase + spanSize - size mbitmap.go:572 ⋮ if goexperiment.GreenTeaGC && gcUsesSpanInlineMarkBits(elemsize) { mbitmap.go:573 654 . . dst := unsafe.Pointer(dstBase) 655 . . o := (x - span.base()) / goarch.PtrSize 656 . . i := o / ptrBits 657 10ms 10ms j := o % ptrBits 658 70ms 70ms bits := span.elemsize / goarch.PtrSize 659 10ms 10ms if j+bits > ptrBits { 660 . . // Two writes. 661 10ms 10ms bits0 := ptrBits - j 662 . . bits1 := bits - bits0 663 . . dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) 664 50ms 50ms dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) return unsafe.Pointer(uintptr(p) + x) stubs.go:25 665 . . *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) 666 . . *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) 667 . . } else { 668 . . // One write. 669 130ms 130ms dst := (*uintptr)(add(dst, i*goarch.PtrSize)) 670 10ms 10ms *dst = (*dst)&^(((1<<bits)-1)<<j) | (src << j) 671 . . } 672 . . 673 . . const doubleCheck = false 674 . . if doubleCheck { 675 . . srcRead := span.heapBitsSmallForAddr(x) 676 . . if srcRead != src { 677 . . print("runtime: x=", hex(x), " i=", i, " j=", j, " bits=", bits, "\n") 678 . . print("runtime: dataSize=", dataSize, " typ.Size_=", typ.Size_, " typ.PtrBytes=", typ.PtrBytes, "\n") 679 . . print("runtime: src0=", hex(src0), " src=", hex(src), " srcRead=", hex(srcRead), "\n") 680 . . throw("bad pointer bits written for small object") 681 . . } 682 . . } 683 30ms 30ms return 684 . . } 685 . . 686 . . // heapSetType* functions record that the new allocation [x, x+size) 687 . . // holds in [x, x+dataSize) one or more values of type typ. 688 . . // (The number of values is given by dataSize / typ.Size.)
runtime.heapSetTypeNoHeader
/usr/lib/go/src/runtime/mbitmap.go
Total: 110ms 940ms (flat, cum) 2.50% 704 . . 705 . . func heapSetTypeNoHeader(x, dataSize uintptr, typ *_type, span *mspan) uintptr { 706 . . if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(span.elemsize)) { 707 . . throw("tried to write heap bits, but no heap bits in span") 708 . . } 709 110ms 940ms scanSize := span.writeHeapBitsSmall(x, dataSize, typ) 710 . . if doubleCheckHeapSetType { 711 . . doubleCheckHeapType(x, dataSize, typ, nil, span) 712 . . }
runtime.heapSetTypeSmallHeader
/usr/lib/go/src/runtime/mbitmap.go
Total: 10ms 10ms (flat, cum) 0.027% 714 . . } 715 . . 716 . . func heapSetTypeSmallHeader(x, dataSize uintptr, typ *_type, header **_type, span *mspan) uintptr { 717 10ms 10ms *header = typ 718 . . if doubleCheckHeapSetType { 719 . . doubleCheckHeapType(x, dataSize, typ, header, span) 720 . . } 721 . . return span.elemsize 722 . . }
runtime.addb
/usr/lib/go/src/runtime/mbitmap.go
Total: 10ms 10ms (flat, cum) 0.027% 1007 . . //go:nosplit 1008 . . func addb(p *byte, n uintptr) *byte { 1009 . . // Note: wrote out full expression instead of calling add(p, n) 1010 . . // to reduce the number of temporaries generated by the 1011 . . // compiler for this trivial expression during inlining. 1012 10ms 10ms return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n)) 1013 . . } 1014 . . 1015 . . // subtractb returns the byte pointer p-n. 1016 . . // 1017 . . //go:nowritebarrier
runtime.(*mspan).refillAllocCache
/usr/lib/go/src/runtime/mbitmap.go
Total: 120ms 120ms (flat, cum) 0.32% 1071 . . // refillAllocCache takes 8 bytes s.allocBits starting at whichByte 1072 . . // and negates them so that ctz (count trailing zeros) instructions 1073 . . // can be used. It then places these 8 bytes into the cached 64 bit 1074 . . // s.allocCache. 1075 . . func (s *mspan) refillAllocCache(whichByte uint16) { 1076 120ms 120ms bytes := (*[8]uint8)(unsafe.Pointer(s.allocBits.bytep(uintptr(whichByte)))) 1077 . . aCache := uint64(0) 1078 . . aCache |= uint64(bytes[0]) 1079 . . aCache |= uint64(bytes[1]) << (1 * 8) 1080 . . aCache |= uint64(bytes[2]) << (2 * 8) 1081 . . aCache |= uint64(bytes[3]) << (3 * 8)
runtime.(*mspan).nextFreeIndex
/usr/lib/go/src/runtime/mbitmap.go
Total: 10ms 50ms (flat, cum) 0.13% 1088 . . 1089 . . // nextFreeIndex returns the index of the next free object in s at 1090 . . // or after s.freeindex. 1091 . . // There are hardware instructions that can be used to make this 1092 . . // faster if profiling warrants it. 1093 . 40ms func (s *mspan) nextFreeIndex() uint16 { 1094 . . sfreeindex := s.freeindex 1095 . . snelems := s.nelems 1096 10ms 10ms if sfreeindex == snelems { 1097 . . return sfreeindex 1098 . . } 1099 . . if sfreeindex > snelems { 1100 . . throw("s.freeindex > s.nelems") 1101 . . }
runtime.(*mspan).nextFreeIndex
/usr/lib/go/src/runtime/mbitmap.go
Total: 20ms 100ms (flat, cum) 0.27% 1116 . . aCache = s.allocCache 1117 . . bitIndex = sys.TrailingZeros64(aCache) 1118 . . // nothing available in cached bits 1119 . . // grab the next 8 bytes and try again. 1120 . . } 1121 10ms 10ms result := sfreeindex + uint16(bitIndex) 1122 . . if result >= snelems { 1123 . . s.freeindex = snelems 1124 . . return snelems 1125 . . } 1126 . . 1127 . . s.allocCache >>= uint(bitIndex + 1) 1128 . . sfreeindex = result + 1 1129 . . 1130 . . if sfreeindex%64 == 0 && sfreeindex != snelems { 1131 . . // We just incremented s.freeindex so it isn't 0. 1132 . . // As each 1 in s.allocCache was encountered and used for allocation 1133 . . // it was shifted away. At this point s.allocCache contains all 0s. 1134 . . // Refill s.allocCache so that it corresponds 1135 . . // to the bits at s.allocBits starting at s.freeindex. 1136 . . whichByte := sfreeindex / 8 1137 . 80ms s.refillAllocCache(whichByte) 1138 . . } 1139 . . s.freeindex = sfreeindex 1140 10ms 10ms return result 1141 . . } 1142 . . 1143 . . // isFree reports whether the index'th object in s is unallocated. 1144 . . // 1145 . . // The caller must ensure s.state is mSpanInUse, and there must have
runtime.findObject
/usr/lib/go/src/runtime/mbitmap.go
Total: 10ms 10ms (flat, cum) 0.027% 1331 . . } 1332 . . // If p is a bad pointer, it may not be in s's bounds. 1333 . . // 1334 . . // Check s.state to synchronize with span initialization 1335 . . // before checking other fields. See also spanOfHeap. 1336 10ms 10ms if state := s.state.get(); state != mSpanInUse || p < s.base() || p >= s.limit { 1337 . . // Pointers into stacks are also ok, the runtime manages these explicitly. 1338 . . if state == mSpanManual { 1339 . . return 1340 . . } 1341 . . // The following ensures that we are rigorous about what data
runtime.(*mspan).countAlloc
/usr/lib/go/src/runtime/mbitmap.go
Total: 10ms 10ms (flat, cum) 0.027% 1455 . . bytes := divRoundUp(uintptr(s.nelems), 8) 1456 . . // Iterate over each 8-byte chunk and count allocations 1457 . . // with an intrinsic. Note that newMarkBits guarantees that 1458 . . // gcmarkBits will be 8-byte aligned, so we don't have to 1459 . . // worry about edge cases, irrelevant bits will simply be zero. 1460 10ms 10ms for i := uintptr(0); i < bytes; i += 8 { 1461 . . // Extract 64 bits from the byte pointer and get a OnesCount. 1462 . . // Note that the unsafe cast here doesn't preserve endianness, 1463 . . // but that's OK. We only care about how many bits are 1, not 1464 . . // about the order we discover them in. 1465 . . mrkBits := *(*uint64)(unsafe.Pointer(s.gcmarkBits.bytep(i)))
internal/runtime/atomic.(*Int32).Load
/usr/lib/go/src/internal/runtime/atomic/types.go
Total: 10ms 10ms (flat, cum) 0.027% 16 . . 17 . . // Load accesses and returns the value atomically. 18 . . // 19 . . //go:nosplit 20 . . func (i *Int32) Load() int32 { 21 10ms 10ms return Loadint32(&i.value) 22 . . } 23 . . 24 . . // Store updates the value atomically. 25 . . // 26 . . //go:nosplit
internal/runtime/atomic.(*Int32).CompareAndSwap
/usr/lib/go/src/internal/runtime/atomic/types.go
Total: 10ms 10ms (flat, cum) 0.027% 32 . . // and if they're equal, swaps i's value with new. 33 . . // It reports whether the swap ran. 34 . . // 35 . . //go:nosplit 36 . . func (i *Int32) CompareAndSwap(old, new int32) bool { 37 10ms 10ms return Casint32(&i.value, old, new) 38 . . } 39 . . 40 . . // Swap replaces i's value with new, returning 41 . . // i's value before the replacement. 42 . . //
internal/runtime/atomic.(*Int32).Add
/usr/lib/go/src/internal/runtime/atomic/types.go
Total: 90ms 90ms (flat, cum) 0.24% 51 . . // This operation wraps around in the usual 52 . . // two's-complement way. 53 . . // 54 . . //go:nosplit 55 . . func (i *Int32) Add(delta int32) int32 { 56 90ms 90ms return Xaddint32(&i.value, delta) 57 . . } 58 . . 59 . . // Int64 is an atomically accessed int64 value. 60 . . // 61 . . // 8-byte aligned on all platforms, unlike a regular int64.
internal/runtime/atomic.(*Int64).Load
/usr/lib/go/src/internal/runtime/atomic/types.go
Total: 70ms 70ms (flat, cum) 0.19% 69 . . 70 . . // Load accesses and returns the value atomically. 71 . . // 72 . . //go:nosplit 73 . . func (i *Int64) Load() int64 { 74 70ms 70ms return Loadint64(&i.value) 75 . . } 76 . . 77 . . // Store updates the value atomically. 78 . . // 79 . . //go:nosplit
internal/runtime/atomic.(*Int64).Add
/usr/lib/go/src/internal/runtime/atomic/types.go
Total: 10ms 10ms (flat, cum) 0.027% 104 . . // This operation wraps around in the usual 105 . . // two's-complement way. 106 . . // 107 . . //go:nosplit 108 . . func (i *Int64) Add(delta int64) int64 { 109 10ms 10ms return Xaddint64(&i.value, delta) 110 . . } 111 . . 112 . . // Uint8 is an atomically accessed uint8 value. 113 . . // 114 . . // A Uint8 must not be copied.
internal/runtime/atomic.(*Uint8).Load
/usr/lib/go/src/internal/runtime/atomic/types.go
Total: 10ms 10ms (flat, cum) 0.027% 119 . . 120 . . // Load accesses and returns the value atomically. 121 . . // 122 . . //go:nosplit 123 . . func (u *Uint8) Load() uint8 { 124 10ms 10ms return Load8(&u.value) 125 . . } 126 . . 127 . . // Store updates the value atomically. 128 . . // 129 . . //go:nosplit
internal/runtime/atomic.(*Bool).Load
/usr/lib/go/src/internal/runtime/atomic/types.go
Total: 70ms 70ms (flat, cum) 0.19% 163 . . 164 . . // Load accesses and returns the value atomically. 165 . . // 166 . . //go:nosplit 167 . . func (b *Bool) Load() bool { 168 70ms 70ms return b.u.Load() != 0 169 . . } 170 . . 171 . . // Store updates the value atomically. 172 . . // 173 . . //go:nosplit
internal/runtime/atomic.(*Uint32).Load
/usr/lib/go/src/internal/runtime/atomic/types.go
Total: 30ms 30ms (flat, cum) 0.08% 189 . . 190 . . // Load accesses and returns the value atomically. 191 . . // 192 . . //go:nosplit 193 . . func (u *Uint32) Load() uint32 { 194 30ms 30ms return Load(&u.value) 195 . . } 196 . . 197 . . // LoadAcquire is a partially unsynchronized version 198 . . // of Load that relaxes ordering constraints. Other threads 199 . . // may observe operations that precede this operation to
internal/runtime/atomic.(*Uint32).CompareAndSwap
/usr/lib/go/src/internal/runtime/atomic/types.go
Total: 330ms 330ms (flat, cum) 0.88% 231 . . // and if they're equal, swaps u's value with new. 232 . . // It reports whether the swap ran. 233 . . // 234 . . //go:nosplit 235 . . func (u *Uint32) CompareAndSwap(old, new uint32) bool { 236 330ms 330ms return Cas(&u.value, old, new) 237 . . } 238 . . 239 . . // CompareAndSwapRelease is a partially unsynchronized version 240 . . // of Cas that relaxes ordering constraints. Other threads 241 . . // may observe operations that occur after this operation to
internal/runtime/atomic.(*Uint32).Add
/usr/lib/go/src/internal/runtime/atomic/types.go
Total: 90ms 90ms (flat, cum) 0.24% 286 . . // This operation wraps around in the usual 287 . . // two's-complement way. 288 . . // 289 . . //go:nosplit 290 . . func (u *Uint32) Add(delta int32) uint32 { 291 90ms 90ms return Xadd(&u.value, delta) 292 . . } 293 . . 294 . . // Uint64 is an atomically accessed uint64 value. 295 . . // 296 . . // 8-byte aligned on all platforms, unlike a regular uint64.
internal/runtime/atomic.(*Uint64).Load
/usr/lib/go/src/internal/runtime/atomic/types.go
Total: 20ms 20ms (flat, cum) 0.053% 304 . . 305 . . // Load accesses and returns the value atomically. 306 . . // 307 . . //go:nosplit 308 . . func (u *Uint64) Load() uint64 { 309 20ms 20ms return Load64(&u.value) 310 . . } 311 . . 312 . . // Store updates the value atomically. 313 . . // 314 . . //go:nosplit
internal/runtime/atomic.(*Uint64).CompareAndSwap
/usr/lib/go/src/internal/runtime/atomic/types.go
Total: 50ms 50ms (flat, cum) 0.13% 320 . . // and if they're equal, swaps u's value with new. 321 . . // It reports whether the swap ran. 322 . . // 323 . . //go:nosplit 324 . . func (u *Uint64) CompareAndSwap(old, new uint64) bool { 325 50ms 50ms return Cas64(&u.value, old, new) 326 . . } 327 . . 328 . . // Swap replaces u's value with new, returning 329 . . // u's value before the replacement. 330 . . //
internal/runtime/atomic.(*Uint64).Add
/usr/lib/go/src/internal/runtime/atomic/types.go
Total: 250ms 250ms (flat, cum) 0.67% 339 . . // This operation wraps around in the usual 340 . . // two's-complement way. 341 . . // 342 . . //go:nosplit 343 . . func (u *Uint64) Add(delta int64) uint64 { 344 250ms 250ms return Xadd64(&u.value, delta) 345 . . } 346 . . 347 . . // Uintptr is an atomically accessed uintptr value. 348 . . // 349 . . // A Uintptr must not be copied.
internal/runtime/atomic.(*Uintptr).Add
/usr/lib/go/src/internal/runtime/atomic/types.go
Total: 30ms 30ms (flat, cum) 0.08% 415 . . // This operation wraps around in the usual 416 . . // two's-complement way. 417 . . // 418 . . //go:nosplit 419 . . func (u *Uintptr) Add(delta uintptr) uintptr { 420 30ms 30ms return Xadduintptr(&u.value, delta) 421 . . } 422 . . 423 . . // Float64 is an atomically accessed float64 value. 424 . . // 425 . . // 8-byte aligned on all platforms, unlike a regular float64.
internal/runtime/atomic.(*UnsafePointer).StoreNoWB
/usr/lib/go/src/internal/runtime/atomic/types.go
Total: 40ms 40ms (flat, cum) 0.11% 474 . . // It is safe to use with values not found in the Go heap. 475 . . // Prefer Store instead. 476 . . // 477 . . //go:nosplit 478 . . func (u *UnsafePointer) StoreNoWB(value unsafe.Pointer) { 479 40ms 40ms StorepNoWB(unsafe.Pointer(&u.value), value) 480 . . } 481 . . 482 . . // Store updates the value atomically. 483 . . func (u *UnsafePointer) Store(value unsafe.Pointer) { 484 . . storePointer(&u.value, value)
internal/runtime/atomic.(*Pointer[go.shape.struct { runtime.spanSetBlockHeader2; runtime.spans [512]runtime.atomicMSpanPointer }]).StoreNoWB
/usr/lib/go/src/internal/runtime/atomic/types.go
Total: 30ms 30ms (flat, cum) 0.08% 534 . . // It is safe to use with values not found in the Go heap. 535 . . // Prefer Store instead. 536 . . // 537 . . //go:nosplit 538 . . func (p *Pointer[T]) StoreNoWB(value *T) { 539 30ms 30ms p.u.StoreNoWB(unsafe.Pointer(value)) StorepNoWB(unsafe.Pointer(&u.value), value) types.go:479 540 . . } 541 . . 542 . . // Store updates the value atomically. 543 . . // 544 . . //go:nosplit
runtime.lock
/usr/lib/go/src/runtime/lock_spinbit.go
Total: 10ms 720ms (flat, cum) 1.92% 147 . . func mutexContended(l *mutex) bool { 148 . . return atomic.Loaduintptr(&l.key)&^mutexMMask != 0 149 . . } 150 . . 151 . . func lock(l *mutex) { 152 10ms 720ms lockWithRank(l, getLockRank(l)) lock2(l) lockrank_off.go:24 ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ lock2(l) lockrank_off.go:24 ⋮ 153 . . } 154 . .
runtime.lock2
/usr/lib/go/src/runtime/lock_spinbit.go
Total: 440ms 440ms (flat, cum) 1.17% 155 . . func lock2(l *mutex) { 156 . . gp := getg() 157 20ms 20ms if gp.m.locks < 0 { 158 . . throw("runtime·lock: lock count") 159 . . } 160 . . gp.m.locks++ 161 . . 162 . . k8 := key8(&l.key) 163 . . 164 . . // Speculative grab for lock. 165 10ms 10ms v8 := atomic.Xchg8(k8, mutexLocked) 166 380ms 380ms if v8&mutexLocked == 0 { 167 10ms 10ms if v8&mutexSleeping != 0 { 168 . . atomic.Or8(k8, mutexSleeping) 169 . . } 170 20ms 20ms return 171 . . } 172 . . semacreate(gp.m) 173 . . 174 . . var startTime int64 175 . . // On uniprocessors, no point spinning.
runtime.lock2
/usr/lib/go/src/runtime/lock_spinbit.go
Total: 40ms 170ms (flat, cum) 0.45% 189 . . if next&^mutexMMask == 0 { 190 . . // The fast-path Xchg8 may have cleared mutexSleeping. Fix 191 . . // the hint so unlock2 knows when to use its slow path. 192 . . next = next &^ mutexSleeping 193 . . } 194 20ms 20ms if atomic.Casuintptr(&l.key, v, next) { 195 . . gp.m.mLockProfile.end(startTime) 196 . . return 197 . . } 198 . . } else { 199 . . prev8 := atomic.Xchg8(k8, mutexLocked|mutexSleeping) 200 . . if prev8&mutexLocked == 0 { 201 . . gp.m.mLockProfile.end(startTime) 202 . . return 203 . . } 204 . . } 205 . . v = atomic.Loaduintptr(&l.key) 206 . . continue tryAcquire 207 . . } 208 . . 209 10ms 10ms if !weSpin && v&mutexSpinning == 0 && atomic.Casuintptr(&l.key, v, v|mutexSpinning) { 210 . . v |= mutexSpinning 211 . . weSpin = true 212 . . } 213 . . 214 . . if weSpin || atTail || mutexPreferLowLatency(l) { 215 10ms 10ms if i < spin { 216 . . procyield(mutexActiveSpinSize) 217 . . v = atomic.Loaduintptr(&l.key) 218 . . continue tryAcquire 219 . . } else if i < spin+mutexPassiveSpinCount { 220 . 130ms osyield() // TODO: Consider removing this step. See https://go.dev/issue/69268. 221 . . v = atomic.Loaduintptr(&l.key) 222 . . continue tryAcquire 223 . . } 224 . . } 225 . .
runtime.lock2
/usr/lib/go/src/runtime/lock_spinbit.go
Total: 10ms 100ms (flat, cum) 0.27% 245 . . } 246 . . 247 . . if atomic.Casuintptr(&l.key, v, next) { 248 . . weSpin = false 249 . . // We've pushed ourselves onto the stack of waiters. Wait. 250 . 90ms semasleep(-1) 251 . . atTail = gp.m.mWaitList.next == 0 // we were at risk of starving 252 . . i = 0 253 . . } 254 . . 255 . . gp.m.mWaitList.next = 0 256 10ms 10ms v = atomic.Loaduintptr(&l.key) 257 . . } 258 . . }
runtime.unlock
/usr/lib/go/src/runtime/lock_spinbit.go
Total: 10ms 550ms (flat, cum) 1.47% 259 . . 260 . . func unlock(l *mutex) { 261 10ms 550ms unlockWithRank(l) unlock2(l) lockrank_off.go:35 ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ 262 . . } 263 . .
runtime.unlock2
/usr/lib/go/src/runtime/lock_spinbit.go
Total: 60ms 60ms (flat, cum) 0.16% 265 . . // 266 . . //go:nowritebarrier 267 10ms 10ms func unlock2(l *mutex) { 268 . . gp := getg() 269 . . 270 . . var prev8 uint8 271 . . var haveStackLock bool 272 . . var endTicks int64 273 40ms 40ms if !mutexSampleContention() { if rate := int64(atomic.Load64(&mutexprofilerate)); rate <= 0 { lock_spinbit.go:330 274 . . // Not collecting a sample for the contention profile, do the quick release 275 10ms 10ms prev8 = atomic.Xchg8(key8(&l.key), 0) 276 . . } else { 277 . . // If there's contention, we'll sample it. Don't allow another 278 . . // lock2/unlock2 pair to finish before us and take our blame. Prevent 279 . . // that by trading for the stack lock with a CAS. 280 . . v := atomic.Loaduintptr(&l.key)
runtime.unlock2
/usr/lib/go/src/runtime/lock_spinbit.go
Total: 390ms 480ms (flat, cum) 1.28% 309 . . if prev8&mutexLocked == 0 { 310 . . throw("unlock of unlocked lock") 311 . . } 312 . . 313 . . if prev8&mutexSleeping != 0 { 314 280ms 370ms unlock2Wake(l, haveStackLock, endTicks) 315 . . } 316 . . 317 40ms 40ms gp.m.mLockProfile.store() if gp := getg(); gp.m.locks == 1 && gp.m.mLockProfile.haveStack { mprof.go:756 ⋮ ⋮ 318 10ms 10ms gp.m.locks-- 319 40ms 40ms if gp.m.locks < 0 { 320 . . throw("runtime·unlock: lock count") 321 . . } 322 20ms 20ms if gp.m.locks == 0 && gp.preempt { // restore the preemption request in case we've cleared it in newstack 323 . . gp.stackguard0 = stackPreempt 324 . . } 325 . . }
runtime.mutexSampleContention
/usr/lib/go/src/runtime/lock_spinbit.go
Total: 40ms 40ms (flat, cum) 0.11% 327 . . // mutexSampleContention returns whether the current mutex operation should 328 . . // report any contention it discovers. 329 . . func mutexSampleContention() bool { 330 40ms 40ms if rate := int64(atomic.Load64(&mutexprofilerate)); rate <= 0 { 331 . . return false 332 . . } else { 333 . . // TODO: have SetMutexProfileFraction do the clamping 334 . . rate32 := uint32(rate) 335 . . if int64(rate32) != rate {
runtime.unlock2Wake
/usr/lib/go/src/runtime/lock_spinbit.go
Total: 0 90ms (flat, cum) 0.24% 456 . . 457 . . next := headM | flags 458 . . if atomic.Casuintptr(&l.key, v, next) { 459 . . if wakem != nil { 460 . . // Claimed an M. Wake it. 461 . 90ms semawakeup(wakem) futexwakeup(&mp.waitsema, 1) lock_futex.go:161 462 . . } 463 . . return 464 . . } 465 . . 466 . . v = atomic.Loaduintptr(&l.key)
runtime.stackpoolalloc
/usr/lib/go/src/runtime/stack.go
Total: 40ms 40ms (flat, cum) 0.11% 222 . . s.manualFreeList = x 223 . . } 224 . . list.insert(s) 225 . . } 226 . . x := s.manualFreeList 227 10ms 10ms if x.ptr() == nil { return (*gclink)(unsafe.Pointer(p)) mcache.go:76 228 . . throw("span has no free stacks") 229 . . } 230 20ms 20ms s.manualFreeList = x.ptr().next 231 . . s.allocCount++ 232 . . if s.manualFreeList.ptr() == nil { 233 . . // all stacks in s are allocated. 234 10ms 10ms list.remove(s) 235 . . } 236 . . return x 237 . . }
runtime.stackpoolfree
/usr/lib/go/src/runtime/stack.go
Total: 20ms 20ms (flat, cum) 0.053% 239 . . // Adds stack x to the free pool. Must be called with stackpool[order].item.mu held. 240 . . func stackpoolfree(x gclinkptr, order uint8) { 241 . . s := spanOfUnchecked(uintptr(x)) 242 20ms 20ms if s.state.get() != mSpanManual { ⋮ return mSpanState(b.s.Load()) mheap.go:417 return Load8(&u.value) types.go:124 243 . . throw("freeing stack not in a stack span") 244 . . } 245 . . if s.manualFreeList.ptr() == nil { 246 . . // s will now have a free stack 247 . . stackpool[order].item.span.insert(s)
runtime.stackcacherefill
/usr/lib/go/src/runtime/stack.go
Total: 0 40ms (flat, cum) 0.11% 285 . . // Grab half of the allowed capacity (to prevent thrashing). 286 . . var list gclinkptr 287 . . var size uintptr 288 . . lock(&stackpool[order].item.mu) 289 . . for size < _StackCacheSize/2 { 290 . 40ms x := stackpoolalloc(order) 291 . . x.ptr().next = list 292 . . list = x 293 . . size += fixedStack << order 294 . . } 295 . . unlock(&stackpool[order].item.mu)
runtime.stackcacherelease
/usr/lib/go/src/runtime/stack.go
Total: 0 40ms (flat, cum) 0.11% 302 . . if stackDebug >= 1 { 303 . . print("stackcacherelease order=", order, "\n") 304 . . } 305 . . x := c.stackcache[order].list 306 . . size := c.stackcache[order].size 307 . 10ms lock(&stackpool[order].item.mu) lockWithRank(l, getLockRank(l)) lock_spinbit.go:152 lock2(l) lockrank_off.go:24 308 . . for size > _StackCacheSize/2 { 309 . . y := x.ptr().next 310 . 20ms stackpoolfree(x, order) 311 . . x = y 312 . . size -= fixedStack << order 313 . . } 314 . 10ms unlock(&stackpool[order].item.mu) unlockWithRank(l) lock_spinbit.go:261 unlock2(l) lockrank_off.go:35 315 . . c.stackcache[order].list = x 316 . . c.stackcache[order].size = size 317 . . } 318 . . 319 . . //go:systemstack
runtime.stackalloc
/usr/lib/go/src/runtime/stack.go
Total: 80ms 120ms (flat, cum) 0.32% 354 . . } 355 . . if stackDebug >= 1 { 356 . . print("stackalloc ", n, "\n") 357 . . } 358 . . 359 10ms 10ms if debug.efence != 0 || stackFromSystem != 0 { 360 . . n = uint32(alignUp(uintptr(n), physPageSize)) 361 . . v := sysAlloc(uintptr(n), &memstats.stacks_sys, "goroutine stack (system)") 362 . . if v == nil { 363 . . throw("out of memory (stackalloc)") 364 . . } 365 . . return stack{uintptr(v), uintptr(v) + uintptr(n)} 366 . . } 367 . . 368 . . // Small stacks are allocated with a fixed-size free-list allocator. 369 . . // If we need a stack of a bigger size, we fall back on allocating 370 . . // a dedicated span. 371 . . var v unsafe.Pointer 372 . . if n < fixedStack<<_NumStackOrders && n < _StackCacheSize { 373 . . order := uint8(0) 374 . . n2 := n 375 . . for n2 > fixedStack { 376 10ms 10ms order++ 377 . . n2 >>= 1 378 . . } 379 . . var x gclinkptr 380 . . if stackNoCache != 0 || thisg.m.p == 0 || thisg.m.preemptoff != "" { 381 . . // thisg.m.p == 0 can happen in the guts of exitsyscall 382 . . // or procresize. Just get a stack from the global pool. 383 . . // Also don't touch stackcache during gc 384 . . // as it's flushed concurrently. 385 . . lock(&stackpool[order].item.mu) 386 . . x = stackpoolalloc(order) 387 . . unlock(&stackpool[order].item.mu) 388 . . } else { 389 . . c := thisg.m.p.ptr().mcache 390 10ms 10ms x = c.stackcache[order].list 391 10ms 10ms if x.ptr() == nil { return (*gclink)(unsafe.Pointer(p)) mcache.go:76 392 . 40ms stackcacherefill(c, order) 393 . . x = c.stackcache[order].list 394 . . } 395 40ms 40ms c.stackcache[order].list = x.ptr().next 396 . . c.stackcache[order].size -= uintptr(n) 397 . . } 398 . . if valgrindenabled { 399 . . // We're about to allocate the stack region starting at x.ptr(). 400 . . // To prevent valgrind from complaining about overlapping allocations,
runtime.stackfree
/usr/lib/go/src/runtime/stack.go
Total: 10ms 10ms (flat, cum) 0.027% 458 . . // 459 . . // stackfree must run on the system stack because it uses per-P 460 . . // resources and must not split the stack. 461 . . // 462 . . //go:systemstack 463 10ms 10ms func stackfree(stk stack) { 464 . . gp := getg() 465 . . v := unsafe.Pointer(stk.lo) 466 . . n := stk.hi - stk.lo 467 . . if n&(n-1) != 0 { 468 . . throw("stack not a power of 2")
runtime.stackfree
/usr/lib/go/src/runtime/stack.go
Total: 30ms 70ms (flat, cum) 0.19% 499 . . valgrindFree(v) 500 . . } 501 . . if n < fixedStack<<_NumStackOrders && n < _StackCacheSize { 502 . . order := uint8(0) 503 . . n2 := n 504 10ms 10ms for n2 > fixedStack { 505 . . order++ 506 . . n2 >>= 1 507 . . } 508 . . x := gclinkptr(v) 509 . . if stackNoCache != 0 || gp.m.p == 0 || gp.m.preemptoff != "" { 510 . . lock(&stackpool[order].item.mu) 511 . . if valgrindenabled { 512 . . // x.ptr() is the head of the list of free stacks, and will be used 513 . . // when allocating a new stack, so it has to be marked allocated. 514 . . valgrindMalloc(unsafe.Pointer(x.ptr()), unsafe.Sizeof(x.ptr())) 515 . . } 516 . . stackpoolfree(x, order) 517 . . unlock(&stackpool[order].item.mu) 518 . . } else { 519 . . c := gp.m.p.ptr().mcache 520 10ms 10ms if c.stackcache[order].size >= _StackCacheSize { 521 . 40ms stackcacherelease(c, order) 522 . . } 523 . . if valgrindenabled { 524 . . // x.ptr() is the head of the list of free stacks, and will 525 . . // be used when allocating a new stack, so it has to be 526 . . // marked allocated. 527 . . valgrindMalloc(unsafe.Pointer(x.ptr()), unsafe.Sizeof(x.ptr())) 528 . . } 529 . . x.ptr().next = c.stackcache[order].list 530 . . c.stackcache[order].list = x 531 10ms 10ms c.stackcache[order].size += n 532 . . } 533 . . } else { 534 . . s := spanOfUnchecked(uintptr(v)) 535 . . if s.state.get() != mSpanManual { 536 . . println(hex(s.base()), v)
runtime.adjustpointer
/usr/lib/go/src/runtime/stack.go
Total: 50ms 50ms (flat, cum) 0.13% 607 . . 608 . . // adjustpointer checks whether *vpp is in the old stack described by adjinfo. 609 . . // If so, it rewrites *vpp to point into the new stack. 610 . . func adjustpointer(adjinfo *adjustinfo, vpp unsafe.Pointer) { 611 . . pp := (*uintptr)(vpp) 612 10ms 10ms p := *pp 613 . . if stackDebug >= 4 { 614 . . print(" ", pp, ":", hex(p), "\n") 615 . . } 616 . . if valgrindenabled { 617 . . // p is a pointer on a stack, it is inherently initialized, as 618 . . // everything on the stack is, but valgrind for _some unknown reason_ 619 . . // sometimes thinks it's uninitialized, and flags operations on p below 620 . . // as uninitialized. We just initialize it if valgrind thinks its 621 . . // uninitialized. 622 . . // 623 . . // See go.dev/issues/73801. 624 . . valgrindMakeMemDefined(unsafe.Pointer(&p), unsafe.Sizeof(&p)) 625 . . } 626 40ms 40ms if adjinfo.old.lo <= p && p < adjinfo.old.hi { 627 . . *pp = p + adjinfo.delta 628 . . if stackDebug >= 3 { 629 . . print(" adjust ptr ", pp, ":", hex(p), " -> ", hex(*pp), "\n") 630 . . } 631 . . }
runtime.adjustpointers
/usr/lib/go/src/runtime/stack.go
Total: 250ms 250ms (flat, cum) 0.67% 647 . . return (b >> (i % 8)) & 1 648 . . } 649 . . 650 . . // bv describes the memory starting at address scanp. 651 . . // Adjust any pointers contained therein. 652 10ms 10ms func adjustpointers(scanp unsafe.Pointer, bv *bitvector, adjinfo *adjustinfo, f funcInfo) { 653 . . minp := adjinfo.old.lo 654 . . maxp := adjinfo.old.hi 655 . . delta := adjinfo.delta 656 30ms 30ms num := uintptr(bv.n) 657 . . // If this frame might contain channel receive slots, use CAS 658 . . // to adjust pointers. If the slot hasn't been received into 659 . . // yet, it may contain stack pointers and a concurrent send 660 . . // could race with adjusting those pointers. (The sent value 661 . . // itself can never contain stack pointers.) 662 . . useCAS := uintptr(scanp) < adjinfo.sghi 663 20ms 20ms for i := uintptr(0); i < num; i += 8 { 664 . . if stackDebug >= 4 { 665 . . for j := uintptr(0); j < 8; j++ { 666 . . print(" ", add(scanp, (i+j)*goarch.PtrSize), ":", ptrnames[bv.ptrbit(i+j)], ":", hex(*(*uintptr)(add(scanp, (i+j)*goarch.PtrSize))), " # ", i, " ", *addb(bv.bytedata, i/8), "\n") 667 . . } 668 . . } 669 10ms 10ms b := *(addb(bv.bytedata, i/8)) return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n)) mbitmap.go:1012 670 50ms 50ms for b != 0 { 671 50ms 50ms j := uintptr(sys.TrailingZeros8(b)) 672 . . b &= b - 1 673 . . pp := (*uintptr)(add(scanp, (i+j)*goarch.PtrSize)) 674 . . retry: 675 10ms 10ms p := *pp 676 40ms 40ms if f.valid() && 0 < p && p < minLegalPointer && debug.invalidptr != 0 { 677 . . // Looks like a junk value in a pointer slot. 678 . . // Live analysis wrong? 679 . . getg().m.traceback = 2 680 . . print("runtime: bad pointer in frame ", funcname(f), " at ", pp, ": ", hex(p), "\n") 681 . . throw("invalid pointer found on stack") 682 . . } 683 10ms 10ms if minp <= p && p < maxp { 684 . . if stackDebug >= 3 { 685 . . print("adjust ptr ", hex(p), " ", funcname(f), "\n") 686 . . } 687 10ms 10ms if useCAS { 688 . . ppu := (*unsafe.Pointer)(unsafe.Pointer(pp)) 689 . . if !atomic.Casp1(ppu, unsafe.Pointer(p), unsafe.Pointer(p+delta)) { 690 . . goto retry 691 . . } 692 . . } else { 693 . . *pp = p + delta 694 . . } 695 . . } 696 . . } 697 . . } 698 10ms 10ms } 699 . .
runtime.adjustframe
/usr/lib/go/src/runtime/stack.go
Total: 270ms 1.90s (flat, cum) 5.06% 700 . . // Note: the argument/return area is adjusted by the callee. 701 10ms 10ms func adjustframe(frame *stkframe, adjinfo *adjustinfo) { 702 10ms 10ms if frame.continpc == 0 { 703 . . // Frame is dead. 704 . . return 705 . . } 706 . . f := frame.fn 707 . . if stackDebug >= 2 { 708 . . print(" adjusting ", funcname(f), " frame=[", hex(frame.sp), ",", hex(frame.fp), "] pc=", hex(frame.pc), " continpc=", hex(frame.continpc), "\n") 709 . . } 710 . . 711 . . // Adjust saved frame pointer if there is one. 712 10ms 10ms if (goarch.ArchFamily == goarch.AMD64 || goarch.ArchFamily == goarch.ARM64) && frame.argp-frame.varp == 2*goarch.PtrSize { 713 . . if stackDebug >= 3 { 714 . . print(" saved bp\n") 715 . . } 716 . . if debugCheckBP { 717 . . // Frame pointers should always point to the next higher frame on 718 . . // the Go stack (or be nil, for the top frame on the stack). 719 . . bp := *(*uintptr)(unsafe.Pointer(frame.varp)) 720 . . if bp != 0 && (bp < adjinfo.old.lo || bp >= adjinfo.old.hi) { 721 . . println("runtime: found invalid frame pointer") 722 . . print("bp=", hex(bp), " min=", hex(adjinfo.old.lo), " max=", hex(adjinfo.old.hi), "\n") 723 . . throw("bad frame pointer") 724 . . } 725 . . } 726 . . // On AMD64, this is the caller's frame pointer saved in the current 727 . . // frame. 728 . . // On ARM64, this is the frame pointer of the caller's caller saved 729 . . // by the caller in its frame (one word below its SP). 730 40ms 40ms adjustpointer(adjinfo, unsafe.Pointer(frame.varp)) if adjinfo.old.lo <= p && p < adjinfo.old.hi { stack.go:626 ⋮ p := *pp stack.go:612 ⋮ if adjinfo.old.lo <= p && p < adjinfo.old.hi { stack.go:626 731 . . } 732 . . 733 . 1.38s locals, args, objs := frame.getStackMap(true) 734 . . 735 . . // Adjust local variables if stack frame has been allocated. 736 . . if locals.n > 0 { 737 . . size := uintptr(locals.n) * goarch.PtrSize 738 10ms 130ms adjustpointers(unsafe.Pointer(frame.varp-size), &locals, adjinfo, f) 739 . . } 740 . . 741 . . // Adjust arguments. 742 . . if args.n > 0 { 743 . . if stackDebug >= 3 { 744 . . print(" args\n") 745 . . } 746 20ms 150ms adjustpointers(unsafe.Pointer(frame.argp), &args, adjinfo, funcInfo{}) 747 . . } 748 . . 749 . . // Adjust pointers in all stack objects (whether they are live or not). 750 . . // See comments in mgcmark.go:scanframeworker. 751 . . if frame.varp != 0 { 752 . . for i := range objs { 753 10ms 10ms obj := &objs[i] 754 . . off := obj.off 755 . . base := frame.varp // locals base pointer 756 10ms 10ms if off >= 0 { 757 . . base = frame.argp // arguments and return values base pointer 758 . . } 759 . . p := base + uintptr(off) 760 10ms 10ms if p < frame.sp { 761 . . // Object hasn't been allocated in the frame yet. 762 . . // (Happens when the stack bounds check fails and 763 . . // we call into morestack.) 764 . . continue 765 . . } 766 20ms 20ms ptrBytes, gcData := obj.gcdata() ptr := uintptr(unsafe.Pointer(r)) stack.go:1341 ⋮ if datap.gofunc <= ptr && ptr < datap.end { stack.go:1344 767 20ms 20ms for i := uintptr(0); i < ptrBytes; i += goarch.PtrSize { 768 70ms 70ms if *addb(gcData, i/(8*goarch.PtrSize))>>(i/goarch.PtrSize&7)&1 != 0 { 769 20ms 20ms adjustpointer(adjinfo, unsafe.Pointer(p+i)) ⋮ if adjinfo.old.lo <= p && p < adjinfo.old.hi { stack.go:626 770 . . } 771 . . } 772 . . } 773 . . } 774 10ms 10ms } 775 . .
runtime.adjustctxt
/usr/lib/go/src/runtime/stack.go
Total: 30ms 30ms (flat, cum) 0.08% 776 . . func adjustctxt(gp *g, adjinfo *adjustinfo) { 777 10ms 10ms adjustpointer(adjinfo, unsafe.Pointer(&gp.sched.ctxt)) 778 . . if !framepointer_enabled { 779 . . return 780 . . } 781 . . if debugCheckBP { 782 . . bp := gp.sched.bp 783 . . if bp != 0 && (bp < adjinfo.old.lo || bp >= adjinfo.old.hi) { 784 . . println("runtime: found invalid top frame pointer") 785 . . print("bp=", hex(bp), " min=", hex(adjinfo.old.lo), " max=", hex(adjinfo.old.hi), "\n") 786 . . throw("bad top frame pointer") 787 . . } 788 . . } 789 10ms 10ms oldfp := gp.sched.bp 790 . . adjustpointer(adjinfo, unsafe.Pointer(&gp.sched.bp)) 791 . . if GOARCH == "arm64" { 792 . . // On ARM64, the frame pointer is saved one word *below* the SP, 793 . . // which is not copied or adjusted in any frame. Do it explicitly 794 . . // here. 795 10ms 10ms if oldfp == gp.sched.sp-goarch.PtrSize { 796 . . memmove(unsafe.Pointer(gp.sched.bp), unsafe.Pointer(oldfp), goarch.PtrSize) 797 . . adjustpointer(adjinfo, unsafe.Pointer(gp.sched.bp)) 798 . . } 799 . . } 800 . . }
runtime.adjustsudogs
/usr/lib/go/src/runtime/stack.go
Total: 20ms 20ms (flat, cum) 0.053% 818 . . } 819 . . 820 . . func adjustsudogs(gp *g, adjinfo *adjustinfo) { 821 . . // the data elements pointed to by a SudoG structure 822 . . // might be in the stack. 823 20ms 20ms for s := gp.waiting; s != nil; s = s.waitlink { 824 . . adjustpointer(adjinfo, unsafe.Pointer(&s.elem)) 825 . . } 826 . . } 827 . . 828 . . func fillstack(stk stack, b byte) {
runtime.copystack
/usr/lib/go/src/runtime/stack.go
Total: 0 50ms (flat, cum) 0.13% 910 . . // It's also fine if we have no P, addScannableStack can deal with 911 . . // that case. 912 . . gcController.addScannableStack(getg().m.p.ptr(), int64(newsize)-int64(old.hi-old.lo)) 913 . . 914 . . // allocate new stack 915 . 50ms new := stackalloc(uint32(newsize)) 916 . . if stackPoisonCopy != 0 { 917 . . fillstack(new, 0xfd) 918 . . } 919 . . if stackDebug >= 1 { 920 . . print("copystack gp=", gp, " [", hex(old.lo), " ", hex(old.hi-used), " ", hex(old.hi), "]", " -> [", hex(new.lo), " ", hex(new.hi-used), " ", hex(new.hi), "]/", newsize, "\n")
runtime.copystack
/usr/lib/go/src/runtime/stack.go
Total: 40ms 3.89s (flat, cum) 10.36% 933 . . // parking on a channel, but it is safe to grow since we do that 934 . . // ourselves and explicitly don't want to synchronize with channels 935 . . // since we could self-deadlock. 936 . . throw("racy sudog adjustment due to parking on channel") 937 . . } 938 20ms 20ms adjustsudogs(gp, &adjinfo) for s := gp.waiting; s != nil; s = s.waitlink { stack.go:823 939 . . } else { 940 . . // sudogs may be pointing in to the stack and gp has 941 . . // released channel locks, so other goroutines could 942 . . // be writing to gp's stack. Find the highest such 943 . . // pointer so we can handle everything there and below 944 . . // carefully. (This shouldn't be far from the bottom 945 . . // of the stack, so there's little cost in handling 946 . . // everything below it carefully.) 947 . . adjinfo.sghi = findsghi(gp, old) 948 . . 949 . . // Synchronize with channel ops and copy the part of 950 . . // the stack they may interact with. 951 . . ncopy -= syncadjustsudogs(gp, used, &adjinfo) 952 . . } 953 . . 954 . . // Copy the stack (or the rest of it) to the new location 955 . 70ms memmove(unsafe.Pointer(new.hi-ncopy), unsafe.Pointer(old.hi-ncopy), ncopy) 956 . . 957 . . // Adjust remaining structures that have pointers into stacks. 958 . . // We have to do most of these before we traceback the new 959 . . // stack because gentraceback uses them. 960 . 30ms adjustctxt(gp, &adjinfo) 961 . . adjustdefers(gp, &adjinfo) 962 . . adjustpanics(gp, &adjinfo) 963 . . if adjinfo.sghi != 0 { 964 . . adjinfo.sghi += adjinfo.delta 965 . . } 966 . . 967 . . // Swap out old stack for new one 968 . . gp.stack = new 969 . . gp.stackguard0 = new.lo + stackGuard // NOTE: might clobber a preempt request 970 . . gp.sched.sp = new.hi - used 971 . . gp.stktopsp += adjinfo.delta 972 . . 973 . . // Adjust pointers in the new stack. 974 . 20ms var u unwinder 975 10ms 1.80s for u.init(gp, 0); u.valid(); u.next() { ⋮ u.initAt(^uintptr(0), ^uintptr(0), ^uintptr(0), gp, flags) traceback.go:129 ⋮ 976 10ms 1.91s adjustframe(&u.frame, &adjinfo) 977 . . } 978 . . 979 . . if valgrindenabled { 980 . . if gp.valgrindStackID == 0 { 981 . . gp.valgrindStackID = valgrindRegisterStack(unsafe.Pointer(new.lo), unsafe.Pointer(new.hi)) 982 . . } else { 983 . . valgrindChangeStack(gp.valgrindStackID, unsafe.Pointer(new.lo), unsafe.Pointer(new.hi)) 984 . . } 985 . . } 986 . . 987 . . // free old stack 988 . . if stackPoisonCopy != 0 { 989 . . fillstack(old, 0xfc) 990 . . } 991 . 40ms stackfree(old) 992 . . } 993 . . 994 . . // round x up to a power of 2. 995 . . func round2(x int32) int32 { 996 . . s := uint(0)
runtime.newstack
/usr/lib/go/src/runtime/stack.go
Total: 10ms 10ms (flat, cum) 0.027% 1013 . . // 1014 . . //go:nowritebarrierrec 1015 . . func newstack() { 1016 . . thisg := getg() 1017 . . // TODO: double check all gp. shouldn't be getg(). 1018 10ms 10ms if thisg.m.morebuf.g.ptr().stackguard0 == stackFork { 1019 . . throw("stack growth after fork") 1020 . . } 1021 . . if thisg.m.morebuf.g.ptr() != thisg.m.curg { 1022 . . print("runtime: newstack called from g=", hex(thisg.m.morebuf.g), "\n"+"\tm=", thisg.m, " m->curg=", thisg.m.curg, " m->g0=", thisg.m.g0, " m->gsignal=", thisg.m.gsignal, "\n") 1023 . . morebuf := thisg.m.morebuf
runtime.newstack
/usr/lib/go/src/runtime/stack.go
Total: 20ms 20ms (flat, cum) 0.053% 1055 . . thisg.m.morebuf.g = 0 1056 . . 1057 . . // NOTE: stackguard0 may change underfoot, if another thread 1058 . . // is about to try to preempt gp. Read it just once and use that same 1059 . . // value now and below. 1060 20ms 20ms stackguard0 := atomic.Loaduintptr(&gp.stackguard0) 1061 . . 1062 . . // Be conservative about where we preempt. 1063 . . // We are interested in preempting user Go code, not runtime code. 1064 . . // If we're holding locks, mallocing, or preemption is disabled, don't 1065 . . // preempt.
runtime.newstack
/usr/lib/go/src/runtime/stack.go
Total: 10ms 10ms (flat, cum) 0.027% 1079 . . gp.stackguard0 = gp.stack.lo + stackGuard 1080 . . gogo(&gp.sched) // never return 1081 . . } 1082 . . } 1083 . . 1084 10ms 10ms if gp.stack.lo == 0 { 1085 . . throw("missing stack in newstack") 1086 . . } 1087 . . sp := gp.sched.sp 1088 . . if goarch.ArchFamily == goarch.AMD64 || goarch.ArchFamily == goarch.I386 || goarch.ArchFamily == goarch.WASM { 1089 . . // The call to morestack cost a word.
runtime.newstack
/usr/lib/go/src/runtime/stack.go
Total: 40ms 4.15s (flat, cum) 11.05% 1131 . . newsize := oldsize * 2 1132 . . 1133 . . // Make sure we grow at least as much as needed to fit the new frame. 1134 . . // (This is just an optimization - the caller of morestack will 1135 . . // recheck the bounds on return.) 1136 . 10ms if f := findfunc(gp.sched.pc); f.valid() { 1137 . 100ms max := uintptr(funcMaxSPDelta(f)) 1138 . . needed := max + stackGuard 1139 . . used := gp.stack.hi - gp.sched.sp 1140 20ms 20ms for newsize-used < needed { 1141 . . newsize *= 2 1142 . . } 1143 . . } 1144 . . 1145 10ms 10ms if stackguard0 == stackForceMove { 1146 . . // Forced stack movement used for debugging. 1147 . . // Don't double the stack (or we may quickly run out 1148 . . // if this is done repeatedly). 1149 . . newsize = oldsize 1150 . . } 1151 . . 1152 10ms 10ms if newsize > maxstacksize || newsize > maxstackceiling { 1153 . . if maxstacksize < maxstackceiling { 1154 . . print("runtime: goroutine stack exceeds ", maxstacksize, "-byte limit\n") 1155 . . } else { 1156 . . print("runtime: goroutine stack exceeds ", maxstackceiling, "-byte limit\n") 1157 . . } 1158 . . print("runtime: sp=", hex(sp), " stack=[", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n") 1159 . . throw("stack overflow") 1160 . . } 1161 . . 1162 . . // The goroutine must be executing in order to call newstack, 1163 . . // so it must be Grunning (or Gscanrunning). 1164 . 10ms casgstatus(gp, _Grunning, _Gcopystack) 1165 . . 1166 . . // The concurrent GC will not scan the stack while we are doing the copy since 1167 . . // the gp is in a Gcopystack status. 1168 . 3.94s copystack(gp, newsize) 1169 . . if stackDebug >= 1 { 1170 . . print("stack grow done\n") 1171 . . } 1172 . 50ms casgstatus(gp, _Gcopystack, _Grunning) 1173 . . gogo(&gp.sched) 1174 . . } 1175 . . 1176 . . //go:nosplit 1177 . . func nilfunc() {
runtime.gostartcallfn
/usr/lib/go/src/runtime/stack.go
Total: 20ms 20ms (flat, cum) 0.053% 1185 . . if fv != nil { 1186 . . fn = unsafe.Pointer(fv.fn) 1187 . . } else { 1188 . . fn = unsafe.Pointer(abi.FuncPCABIInternal(nilfunc)) 1189 . . } 1190 20ms 20ms gostartcall(gobuf, fn, unsafe.Pointer(fv)) if buf.lr != 0 { sys_arm64.go:12 1191 . . } 1192 . . 1193 . . // isShrinkStackSafe returns whether it's safe to attempt to shrink 1194 . . // gp's stack. Shrinking the stack is only safe when we have precise 1195 . . // pointer maps for all frames on the stack. The caller must hold the
runtime.(*stackObjectRecord).gcdata
/usr/lib/go/src/runtime/stack.go
Total: 20ms 20ms (flat, cum) 0.053% 1336 . . 1337 . . // gcdata returns the number of bytes that contain pointers, and 1338 . . // a ptr/nonptr bitmask covering those bytes. 1339 . . // Note that this bitmask might be larger than internal/abi.MaxPtrmaskBytes. 1340 . . func (r *stackObjectRecord) gcdata() (uintptr, *byte) { 1341 10ms 10ms ptr := uintptr(unsafe.Pointer(r)) 1342 . . var mod *moduledata 1343 . . for datap := &firstmoduledata; datap != nil; datap = datap.next { 1344 10ms 10ms if datap.gofunc <= ptr && ptr < datap.end { 1345 . . mod = datap 1346 . . break 1347 . . } 1348 . . } 1349 . . // If you get a panic here due to a nil mod,
runtime.memclrNoHeapPointers
/usr/lib/go/src/runtime/memclr_arm64.s
Total: 340ms 340ms (flat, cum) 0.91% 7 . . // See memclrNoHeapPointers Go doc for important implementation constraints. 8 . . 9 . . // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) 10 . . // Also called from assembly in sys_windows_arm64.s without g (but using Go stack convention). 11 . . TEXT runtime·memclrNoHeapPointers<ABIInternal>(SB),NOSPLIT,$0-16 12 20ms 20ms CMP $16, R1 13 . . // If n is equal to 16 bytes, use zero_exact_16 to zero 14 . . BEQ zero_exact_16 15 . . 16 . . // If n is greater than 16 bytes, use zero_by_16 to zero 17 10ms 10ms BHI zero_by_16 18 . . 19 . . // n is less than 16 bytes 20 10ms 10ms ADD R1, R0, R7 21 . . TBZ $3, R1, less_than_8 22 . . MOVD ZR, (R0) 23 . . MOVD ZR, -8(R7) 24 . . RET 25 . . 26 . . less_than_8: 27 . . TBZ $2, R1, less_than_4 28 . . MOVW ZR, (R0) 29 . . MOVW ZR, -4(R7) 30 . . RET 31 . . 32 . . less_than_4: 33 . . CBZ R1, ending 34 10ms 10ms MOVB ZR, (R0) 35 . . TBZ $1, R1, ending 36 . . MOVH ZR, -2(R7) 37 . . 38 . . ending: 39 . . RET 40 . . 41 . . zero_exact_16: 42 . . // n is exactly 16 bytes 43 . . STP (ZR, ZR), (R0) 44 30ms 30ms RET 45 . . 46 . . zero_by_16: 47 . . // n greater than 16 bytes, check if the start address is aligned 48 10ms 10ms NEG R0, R4 49 . . ANDS $15, R4, R4 50 . . // Try zeroing using zva if the start address is aligned with 16 51 . . BEQ try_zva 52 . . 53 . . // Non-aligned store 54 . . STP (ZR, ZR), (R0) 55 . . // Make the destination aligned 56 . . SUB R4, R1, R1 57 . . ADD R4, R0, R0 58 . . B try_zva 59 . . 60 . . tail_maybe_long: 61 20ms 20ms CMP $64, R1 62 10ms 10ms BHS no_zva 63 . . 64 . . tail63: 65 . . ANDS $48, R1, R3 66 . . BEQ last16 67 10ms 10ms CMPW $32, R3 68 . . BEQ last48 69 . . BLT last32 70 20ms 20ms STP.P (ZR, ZR), 16(R0) 71 . . last48: 72 30ms 30ms STP.P (ZR, ZR), 16(R0) 73 . . last32: 74 50ms 50ms STP.P (ZR, ZR), 16(R0) 75 . . // The last store length is at most 16, so it is safe to use 76 . . // stp to write last 16 bytes 77 . . last16: 78 30ms 30ms ANDS $15, R1, R1 79 . . CBZ R1, last_end 80 . . ADD R1, R0, R0 81 . . STP (ZR, ZR), -16(R0) 82 . . last_end: 83 . . RET 84 . . 85 . . PCALIGN $16 86 . . no_zva: 87 . . SUB $16, R0, R0 88 10ms 10ms SUB $64, R1, R1 89 . . 90 . . loop_64: 91 30ms 30ms STP (ZR, ZR), 16(R0) 92 20ms 20ms STP (ZR, ZR), 32(R0) 93 . . STP (ZR, ZR), 48(R0) 94 10ms 10ms STP.W (ZR, ZR), 64(R0) 95 . . SUBS $64, R1, R1 96 . . BGE loop_64 97 . . ANDS $63, R1, ZR 98 . . ADD $16, R0, R0 99 . . BNE tail63 100 . . RET 101 . . 102 . . PCALIGN $16 103 . . try_zva: 104 . . // Try using the ZVA feature to zero entire cache lines 105 . . // It is not meaningful to use ZVA if the block size is less than 64, 106 . . // so make sure that n is greater than or equal to 64 107 10ms 10ms CMP $63, R1 108 . . BLE tail63 109 . . 110 . . CMP $128, R1 111 . . // Ensure n is at least 128 bytes, so that there is enough to copy after 112 . . // alignment.
runtime.memclrNoHeapPointers
/usr/lib/go/src/runtime/memclr_arm64.s
Total: 470ms 470ms (flat, cum) 1.25% 160 . . // We now have at least 64 bytes to zero, update n 161 . . MOVD R3, R1 162 . . 163 . . loop_zva_prolog: 164 . . STP (ZR, ZR), (R0) 165 20ms 20ms STP (ZR, ZR), 16(R0) 166 . . STP (ZR, ZR), 32(R0) 167 . . SUBS $64, R4, R4 168 . . STP (ZR, ZR), 48(R0) 169 . . ADD $64, R0, R0 170 . . BGE loop_zva_prolog 171 . . 172 . . ADD R4, R0, R0 173 . . 174 . . aligned: 175 10ms 10ms SUB R5, R1, R1 176 . . 177 . . PCALIGN $16 178 . . loop_zva: 179 390ms 390ms WORD $0xd50b7420 // DC ZVA, R0 180 50ms 50ms ADD R5, R0, R0 181 . . SUBS R5, R1, R1 182 . . BHS loop_zva 183 . . ANDS R6, R1, R1 184 . . BNE tail_maybe_long 185 . . RET
runtime.(*mSpanStateBox).get
/usr/lib/go/src/runtime/mheap.go
Total: 10ms 10ms (flat, cum) 0.027% 412 . . // It is nosplit because it's called indirectly by typedmemclr, 413 . . // which must not be preempted. 414 . . 415 . . //go:nosplit 416 . . func (b *mSpanStateBox) get() mSpanState { 417 10ms 10ms return mSpanState(b.s.Load()) return Load8(&u.value) types.go:124 418 . . } 419 . . 420 . . type mspan struct { 421 . . _ sys.NotInHeap 422 . . next *mspan // next span in list, or nil if none
runtime.(*mspan).base
/usr/lib/go/src/runtime/mheap.go
Total: 40ms 40ms (flat, cum) 0.11% 518 . . userArenaChunkFree addrRange // interval for managing chunk allocation 519 . . largeType *_type // malloc header for large objects. 520 . . } 521 . . 522 . . func (s *mspan) base() uintptr { 523 40ms 40ms return s.startAddr 524 . . } 525 . . 526 . . func (s *mspan) layout() (size, n, total uintptr) { 527 . . total = s.npages << gc.PageShift 528 . . size = s.elemsize
runtime.makeSpanClass
/usr/lib/go/src/runtime/mheap.go
Total: 90ms 90ms (flat, cum) 0.24% 589 . . numSpanClasses = gc.NumSizeClasses << 1 590 . . tinySpanClass = spanClass(tinySizeClass<<1 | 1) 591 . . ) 592 . . 593 . . func makeSpanClass(sizeclass uint8, noscan bool) spanClass { 594 90ms 90ms return spanClass(sizeclass<<1) | spanClass(bool2int(noscan)) ⋮ return int(*(*uint8)(unsafe.Pointer(&x))) stubs.go:394 ⋮ 595 . . } 596 . . 597 . . //go:nosplit 598 . . func (sc spanClass) sizeclass() int8 { 599 . . return int8(sc >> 1)
runtime.arenaIndex
/usr/lib/go/src/runtime/mheap.go
Total: 20ms 20ms (flat, cum) 0.053% 615 . . // It is nosplit because it's called by spanOf and several other 616 . . // nosplit functions. 617 . . // 618 . . //go:nosplit 619 . . func arenaIndex(p uintptr) arenaIdx { 620 20ms 20ms return arenaIdx((p - arenaBaseOffset) / heapArenaBytes) 621 . . } 622 . . 623 . . // arenaBase returns the low address of the region covered by heap 624 . . // arena i. 625 . . func arenaBase(i arenaIdx) uintptr {
runtime.spanOf
/usr/lib/go/src/runtime/mheap.go
Total: 50ms 50ms (flat, cum) 0.13% 713 . . // If there's an L1, then ri.l1() can be out of bounds but ri.l2() can't. 714 . . if ri.l1() >= uint(len(mheap_.arenas)) { 715 . . return nil 716 . . } 717 . . } 718 10ms 10ms l2 := mheap_.arenas[ri.l1()] 719 . . if arenaL1Bits != 0 && l2 == nil { // Should never happen if there's no L1. 720 . . return nil 721 . . } 722 30ms 30ms ha := l2[ri.l2()] 723 . . if ha == nil { 724 . . return nil 725 . . } 726 10ms 10ms return ha.spans[(p/pageSize)%pagesPerArena] 727 . . } 728 . . 729 . . // spanOfUnchecked is equivalent to spanOf, but the caller must ensure 730 . . // that p points into an allocated heap arena. 731 . . //
runtime.spanOfHeap
/usr/lib/go/src/runtime/mheap.go
Total: 20ms 20ms (flat, cum) 0.053% 742 . . // 743 . . // Must be nosplit because it has callers that are nosplit. 744 . . // 745 . . //go:nosplit 746 . . func spanOfHeap(p uintptr) *mspan { 747 20ms 20ms s := spanOf(p) return ha.spans[(p/pageSize)%pagesPerArena] mheap.go:726 ⋮ l2 := mheap_.arenas[ri.l1()] mheap.go:718 748 . . // s is nil if it's never been allocated. Otherwise, we check 749 . . // its state first because we don't trust this pointer, so we 750 . . // have to synchronize with span initialization. Then, it's 751 . . // still possible we picked up a stale span pointer, so we 752 . . // have to check the span's bounds.
runtime.(*mheap).alloc
/usr/lib/go/src/runtime/mheap.go
Total: 10ms 10ms (flat, cum) 0.027% 1001 . . // 1002 . . // spanclass indicates the span's size class and scannability. 1003 . . // 1004 . . // Returns a span that has been fully initialized. span.needzero indicates 1005 . . // whether the span has been zeroed. Note that it may not be. 1006 10ms 10ms func (h *mheap) alloc(npages uintptr, spanclass spanClass) *mspan { 1007 . . // Don't do any operations that lock the heap on the G stack. 1008 . . // It might trigger stack growth, and the stack growth code needs
runtime.(*mheap).alloc.func1
/usr/lib/go/src/runtime/mheap.go
Total: 30ms 1.14s (flat, cum) 3.04% 1009 . . // to be able to allocate heap. 1010 . . var s *mspan 1011 20ms 590ms systemstack(func() { 1012 . . // To prevent excessive heap growth, before allocating n pages 1013 . . // we need to sweep and reclaim at least n pages. 1014 10ms 10ms if !isSweepDone() { return sweep.active.isDone() mgcsweep.go:458 return a.state.Load() == sweepDrainedMask mgcsweep.go:214 1015 . . h.reclaim(npages) 1016 . . } 1017 . 540ms s = h.allocSpan(npages, spanAllocHeap, spanclass) 1018 . . }) 1019 . . return s 1020 . . } 1021 . . 1022 . . // allocManual allocates a manually-managed span of npage pages.
runtime.(*mheap).allocNeedsZero
/usr/lib/go/src/runtime/mheap.go
Total: 90ms 90ms (flat, cum) 0.24% 1068 . . // they're fresh from the operating system. It updates heapArena metadata that is 1069 . . // critical for future page allocations. 1070 . . // 1071 . . // There are no locking constraints on this method. 1072 . . func (h *mheap) allocNeedsZero(base, npage uintptr) (needZero bool) { 1073 10ms 10ms for npage > 0 { 1074 20ms 20ms ai := arenaIndex(base) return arenaIdx((p - arenaBaseOffset) / heapArenaBytes) mheap.go:620 1075 . . ha := h.arenas[ai.l1()][ai.l2()] 1076 . . 1077 20ms 20ms zeroedBase := atomic.Loaduintptr(&ha.zeroedBase) 1078 40ms 40ms arenaBase := base % heapArenaBytes 1079 . . if arenaBase < zeroedBase { 1080 . . // We extended into the non-zeroed part of the 1081 . . // arena, so this region needs to be zeroed before use. 1082 . . // 1083 . . // zeroedBase is monotonically increasing, so if we see this now then
runtime.(*mheap).tryAllocMSpan
/usr/lib/go/src/runtime/mheap.go
Total: 10ms 10ms (flat, cum) 0.027% 1136 . . //go:systemstack 1137 . . func (h *mheap) tryAllocMSpan() *mspan { 1138 . . pp := getg().m.p.ptr() 1139 . . // If we don't have a p or the cache is empty, we can't do 1140 . . // anything here. 1141 10ms 10ms if pp == nil || pp.mspancache.len == 0 { 1142 . . return nil 1143 . . } 1144 . . // Pull off the last entry in the cache. 1145 . . s := pp.mspancache.buf[pp.mspancache.len-1] 1146 . . pp.mspancache.len--
runtime.(*mheap).allocMSpanLocked
/usr/lib/go/src/runtime/mheap.go
Total: 10ms 140ms (flat, cum) 0.37% 1167 . . } 1168 . . // Refill the cache if necessary. 1169 . . if pp.mspancache.len == 0 { 1170 . . const refillCount = len(pp.mspancache.buf) / 2 1171 . . for i := 0; i < refillCount; i++ { 1172 10ms 140ms pp.mspancache.buf[i] = (*mspan)(h.spanalloc.alloc()) 1173 . . } 1174 . . pp.mspancache.len = refillCount 1175 . . } 1176 . . // Pull off the last entry in the cache. 1177 . . s := pp.mspancache.buf[pp.mspancache.len-1]
runtime.(*mheap).freeMSpanLocked
/usr/lib/go/src/runtime/mheap.go
Total: 10ms 10ms (flat, cum) 0.027% 1199 . . pp.mspancache.len++ 1200 . . return 1201 . . } 1202 . . // Failing that (or if we don't have a p), just free it to 1203 . . // the heap. 1204 10ms 10ms h.spanalloc.free(unsafe.Pointer(s)) f.inuse -= f.size mfixalloc.go:105 1205 . . } 1206 . . 1207 . . // allocSpan allocates an mspan which owns npages worth of memory. 1208 . . // 1209 . . // If typ.manual() == false, allocSpan allocates a heap span of class spanclass
runtime.(*mheap).allocSpan
/usr/lib/go/src/runtime/mheap.go
Total: 20ms 40ms (flat, cum) 0.11% 1235 . . // If the allocation is small enough, try the page cache! 1236 . . // The page cache does not support aligned allocations, so we cannot use 1237 . . // it if we need to provide a physical page aligned stack allocation. 1238 . . pp := gp.m.p.ptr() 1239 . . if !needPhysPageAlign && pp != nil && npages < pageCachePages/4 { 1240 10ms 10ms c := &pp.pcache 1241 . . 1242 . . // If the cache is empty, refill it. 1243 . . if c.empty() { 1244 . . lock(&h.lock) 1245 . 10ms *c = h.pages.allocToCache() 1246 . . unlock(&h.lock) 1247 . . } 1248 . . 1249 . . // Try to allocate from the cache. 1250 . . base, scav = c.alloc(npages) 1251 . . if base != 0 { 1252 10ms 10ms s = h.tryAllocMSpan() if pp == nil || pp.mspancache.len == 0 { mheap.go:1141 1253 . . if s != nil { 1254 . . goto HaveSpan 1255 . . } 1256 . . // We have a base but no mspan, so we need 1257 . . // to lock the heap. 1258 . . } 1259 . . } 1260 . . 1261 . . // For one reason or another, we couldn't get the 1262 . . // whole job done without the heap lock. 1263 . 10ms lock(&h.lock) lockWithRank(l, getLockRank(l)) lock_spinbit.go:152 lock2(l) lockrank_off.go:24 1264 . . 1265 . . if needPhysPageAlign { 1266 . . // Overallocate by a physical page to allow for later alignment. 1267 . . extraPages := physPageSize / pageSize 1268 . .
runtime.(*mheap).allocSpan
/usr/lib/go/src/runtime/mheap.go
Total: 0 160ms (flat, cum) 0.43% 1291 . . scav = h.pages.allocRange(base, npages) 1292 . . } 1293 . . 1294 . . if base == 0 { 1295 . . // Try to acquire a base address. 1296 . 10ms base, scav = h.pages.alloc(npages) 1297 . . if base == 0 { 1298 . . var ok bool 1299 . 10ms growth, ok = h.grow(npages) 1300 . . if !ok { 1301 . . unlock(&h.lock) 1302 . . return nil 1303 . . } 1304 . . base, scav = h.pages.alloc(npages) 1305 . . if base == 0 { 1306 . . throw("grew heap, but no adequate free space found") 1307 . . } 1308 . . } 1309 . . } 1310 . . if s == nil { 1311 . . // We failed to get an mspan earlier, so grab 1312 . . // one now that we have the heap lock. 1313 . 140ms s = h.allocMSpanLocked() 1314 . . } 1315 . . unlock(&h.lock) 1316 . . 1317 . . HaveSpan: 1318 . . // Decide if we need to scavenge in response to what we just allocated.
runtime.(*mheap).allocSpan
/usr/lib/go/src/runtime/mheap.go
Total: 30ms 340ms (flat, cum) 0.91% 1388 . . } 1389 . . scavenge.assistTime.Add(now - start) 1390 . . } 1391 . . 1392 . . // Initialize the span. 1393 10ms 290ms h.initSpan(s, typ, spanclass, base, npages) 1394 . . 1395 . . if valgrindenabled { 1396 . . valgrindMempoolMalloc(unsafe.Pointer(arenaBase(arenaIndex(base))), unsafe.Pointer(base), npages*pageSize) 1397 . . } 1398 . . 1399 . . // Commit and account for any scavenged memory that the span now owns. 1400 . . nbytes := npages * pageSize 1401 . . if scav != 0 { 1402 . . // sysUsed all the pages that are actually available 1403 . . // in the span since some of them might be scavenged. 1404 . . sysUsed(unsafe.Pointer(base), nbytes, scav) 1405 . . gcController.heapReleased.add(-int64(scav)) 1406 . . } 1407 . . // Update stats. 1408 . . gcController.heapFree.add(-int64(nbytes - scav)) 1409 . . if typ == spanAllocHeap { 1410 . . gcController.heapInUse.add(int64(nbytes)) 1411 . . } 1412 . . // Update consistent stats. 1413 . 10ms stats := memstats.heapStats.acquire() 1414 10ms 10ms atomic.Xaddint64(&stats.committed, int64(scav)) 1415 10ms 10ms atomic.Xaddint64(&stats.released, -int64(scav)) 1416 . . switch typ { 1417 . . case spanAllocHeap: 1418 . . atomic.Xaddint64(&stats.inHeap, int64(nbytes)) 1419 . . case spanAllocStack: 1420 . . atomic.Xaddint64(&stats.inStacks, int64(nbytes)) 1421 . . case spanAllocWorkBuf: 1422 . . atomic.Xaddint64(&stats.inWorkBufs, int64(nbytes)) 1423 . . } 1424 . 20ms memstats.heapStats.release() 1425 . . 1426 . . // Trace the span alloc. 1427 . . if traceAllocFreeEnabled() { 1428 . . trace := traceAcquire() 1429 . . if trace.ok() {
runtime.(*mheap).initSpan
/usr/lib/go/src/runtime/mheap.go
Total: 40ms 240ms (flat, cum) 0.64% 1434 . . return s 1435 . . } 1436 . . 1437 . . // initSpan initializes a blank span s which will represent the range 1438 . . // [base, base+npages*pageSize). typ is the type of span being allocated. 1439 10ms 10ms func (h *mheap) initSpan(s *mspan, typ spanAllocType, spanclass spanClass, base, npages uintptr) { 1440 . . // At this point, both s != nil and base != 0, and the heap 1441 . . // lock is no longer held. Initialize the span. 1442 . 50ms s.init(base, npages) 1443 . 90ms if h.allocNeedsZero(base, npages) { 1444 . . s.needzero = 1 1445 . . } 1446 . . nbytes := npages * pageSize 1447 . . if typ.manual() { 1448 . . s.manualFreeList = 0 1449 . . s.nelems = 0 1450 . . s.state.set(mSpanManual) 1451 . . } else { 1452 . . // We must set span properties before the span is published anywhere 1453 . . // since we're not holding the heap lock. 1454 . . s.spanclass = spanclass 1455 . . if sizeclass := spanclass.sizeclass(); sizeclass == 0 { 1456 . . s.elemsize = nbytes 1457 . . s.nelems = 1 1458 . . s.divMul = 0 1459 . . } else { 1460 20ms 20ms s.elemsize = uintptr(gc.SizeClassToSize[sizeclass]) 1461 . . if goexperiment.GreenTeaGC { 1462 . . var reserve uintptr 1463 . . if gcUsesSpanInlineMarkBits(s.elemsize) { 1464 . . // Reserve space for the inline mark bits. 1465 . . reserve += unsafe.Sizeof(spanInlineMarkBits{}) 1466 . . } 1467 . . if heapBitsInSpan(s.elemsize) && !s.spanclass.noscan() { 1468 . . // Reserve space for the pointer/scan bitmap at the end. 1469 . . reserve += nbytes / goarch.PtrSize / 8 1470 . . } 1471 . . s.nelems = uint16((nbytes - reserve) / s.elemsize) 1472 . . } else { 1473 . . if !s.spanclass.noscan() && heapBitsInSpan(s.elemsize) { 1474 . . // Reserve space for the pointer/scan bitmap at the end. 1475 . . s.nelems = uint16((nbytes - (nbytes / goarch.PtrSize / 8)) / s.elemsize) 1476 . . } else { 1477 . . s.nelems = uint16(nbytes / s.elemsize) 1478 . . } 1479 . . } 1480 10ms 10ms s.divMul = gc.SizeClassToDivMagic[sizeclass] 1481 . . } 1482 . . 1483 . . // Initialize mark and allocation structures. 1484 . . s.freeindex = 0 1485 . . s.freeIndexForScan = 0 1486 . . s.allocCache = ^uint64(0) // all 1s indicating all free. 1487 . 50ms s.gcmarkBits = newMarkBits(uintptr(s.nelems)) 1488 . 10ms s.allocBits = newAllocBits(uintptr(s.nelems)) return newMarkBits(nelems) mheap.go:2978 1489 . . 1490 . . // Adjust s.limit down to the object-containing part of the span. 1491 . . s.limit = s.base() + uintptr(s.elemsize)*uintptr(s.nelems) 1492 . . 1493 . . // It's safe to access h.sweepgen without the heap lock because it's
runtime.(*mheap).initSpan
/usr/lib/go/src/runtime/mheap.go
Total: 40ms 40ms (flat, cum) 0.11% 1525 . . // prior to this line. 1526 . . arena, pageIdx, pageMask := pageIndexOf(s.base()) 1527 . . atomic.Or8(&arena.pageInUse[pageIdx], pageMask) 1528 . . 1529 . . // Mark packed span. 1530 20ms 20ms if gcUsesSpanInlineMarkBits(s.elemsize) { 1531 . . atomic.Or8(&arena.pageUseSpanInlineMarkBits[pageIdx], pageMask) 1532 . . } 1533 . . 1534 . . // Update related page sweeper stats. 1535 20ms 20ms h.pagesInUse.Add(npages) return Xadduintptr(&u.value, delta) types.go:420 1536 . . } 1537 . . 1538 . . // Make sure the newly allocated span will be observed 1539 . . // by the GC before pointers into the span are published. 1540 . . publicationBarrier()
runtime.(*mheap).grow
/usr/lib/go/src/runtime/mheap.go
Total: 0 10ms (flat, cum) 0.027% 1611 . . // Transition the space we're going to use from Reserved to Prepared. 1612 . . // 1613 . . // The allocation is always aligned to the heap arena 1614 . . // size which is always > physPageSize, so its safe to 1615 . . // just add directly to heapReleased. 1616 . 10ms sysMap(unsafe.Pointer(v), nBase-v, &gcController.heapReleased, "heap") 1617 . . 1618 . . // The memory just allocated counts as both released 1619 . . // and idle, even though it's not yet backed by spans. 1620 . . stats := memstats.heapStats.acquire() 1621 . . atomic.Xaddint64(&stats.released, int64(nBase-v))
runtime.(*sweepLocked).sweep.(*mheap).freeSpan.func2
/usr/lib/go/src/runtime/mheap.go
Total: 10ms 500ms (flat, cum) 1.33% 1628 . . return totalGrowth, true 1629 . . } 1630 . . 1631 . . // Free the span back into the heap. 1632 . . func (h *mheap) freeSpan(s *mspan) { 1633 10ms 260ms systemstack(func() { 1634 . . // Trace the span free. 1635 . . if traceAllocFreeEnabled() { 1636 . . trace := traceAcquire() 1637 . . if trace.ok() { 1638 . . trace.SpanFree(s) 1639 . . traceRelease(trace) 1640 . . } 1641 . . } 1642 . . 1643 . 90ms lock(&h.lock) 1644 . . if msanenabled { 1645 . . // Tell msan that this entire span is no longer in use. 1646 . . base := unsafe.Pointer(s.base()) 1647 . . bytes := s.npages << gc.PageShift 1648 . . msanfree(base, bytes) 1649 . . } 1650 . . if asanenabled { 1651 . . // Tell asan that this entire span is no longer in use. 1652 . . base := unsafe.Pointer(s.base()) 1653 . . bytes := s.npages << gc.PageShift 1654 . . asanpoison(base, bytes) 1655 . . } 1656 . . if valgrindenabled { 1657 . . base := s.base() 1658 . . valgrindMempoolFree(unsafe.Pointer(arenaBase(arenaIndex(base))), unsafe.Pointer(base)) 1659 . . } 1660 . 120ms h.freeSpanLocked(s, spanAllocHeap) 1661 . 30ms unlock(&h.lock) 1662 . . }) 1663 . . } 1664 . . 1665 . . // freeManual frees a manually-managed span returned by allocManual. 1666 . . // typ must be the same as the spanAllocType passed to the allocManual that
runtime.(*mheap).freeSpanLocked
/usr/lib/go/src/runtime/mheap.go
Total: 50ms 120ms (flat, cum) 0.32% 1707 . . } 1708 . . if s.allocCount != 0 || s.sweepgen != h.sweepgen { 1709 . . print("mheap.freeSpanLocked - span ", s, " ptr ", hex(s.base()), " allocCount ", s.allocCount, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n") 1710 . . throw("mheap.freeSpanLocked - invalid free") 1711 . . } 1712 10ms 10ms h.pagesInUse.Add(-s.npages) return Xadduintptr(&u.value, delta) types.go:420 1713 . . 1714 . . // Clear in-use bit in arena page bitmap. 1715 . . arena, pageIdx, pageMask := pageIndexOf(s.base()) 1716 . . atomic.And8(&arena.pageInUse[pageIdx], ^pageMask) 1717 . . 1718 . . // Clear small heap span bit if necessary. 1719 10ms 10ms if gcUsesSpanInlineMarkBits(s.elemsize) { 1720 . . atomic.And8(&arena.pageUseSpanInlineMarkBits[pageIdx], ^pageMask) 1721 . . } 1722 . . default: 1723 . . throw("mheap.freeSpanLocked - invalid span state") 1724 . . } 1725 . . 1726 . . // Update stats. 1727 . . // 1728 . . // Mirrors the code in allocSpan. 1729 . . nbytes := s.npages * pageSize 1730 . . gcController.heapFree.add(int64(nbytes)) 1731 . . if typ == spanAllocHeap { 1732 . 10ms gcController.heapInUse.add(-int64(nbytes)) 1733 . . } 1734 . . // Update consistent stats. 1735 . . stats := memstats.heapStats.acquire() 1736 . . switch typ { 1737 . . case spanAllocHeap: 1738 20ms 20ms atomic.Xaddint64(&stats.inHeap, -int64(nbytes)) 1739 . . case spanAllocStack: 1740 . . atomic.Xaddint64(&stats.inStacks, -int64(nbytes)) 1741 . . case spanAllocWorkBuf: 1742 . . atomic.Xaddint64(&stats.inWorkBufs, -int64(nbytes)) 1743 . . } 1744 . . memstats.heapStats.release() 1745 . . 1746 . . // Mark the space as free. 1747 . 60ms h.pages.free(s.base(), s.npages) 1748 . . 1749 . . // Free the span structure. We no longer have a use for it. 1750 . . s.state.set(mSpanDead) 1751 10ms 10ms h.freeMSpanLocked(s) h.spanalloc.free(unsafe.Pointer(s)) mheap.go:1204 f.inuse -= f.size mfixalloc.go:105 1752 . . } 1753 . . 1754 . . // scavengeAll acquires the heap lock (blocking any additional 1755 . . // manipulation of the page allocator) and iterates over the whole 1756 . . // heap, scavenging every free page available.
runtime.(*mspan).init
/usr/lib/go/src/runtime/mheap.go
Total: 50ms 50ms (flat, cum) 0.13% 1782 . . } 1783 . . 1784 . . // Initialize a new span with the given start and npages. 1785 . . func (span *mspan) init(base uintptr, npages uintptr) { 1786 . . // span is *not* zeroed. 1787 10ms 10ms span.next = nil 1788 40ms 40ms span.prev = nil 1789 . . span.list = nil 1790 . . span.startAddr = base 1791 . . span.npages = npages 1792 . . span.limit = base + npages*gc.PageSize // see go.dev/issue/74288; adjusted later for heap spans 1793 . . span.allocCount = 0
runtime.spanHasNoSpecials
/usr/lib/go/src/runtime/mheap.go
Total: 10ms 10ms (flat, cum) 0.027% 2032 . . // spanHasNoSpecials marks a span as having no specials in the arena bitmap. 2033 . . func spanHasNoSpecials(s *mspan) { 2034 . . arenaPage := (s.base() / pageSize) % pagesPerArena 2035 . . ai := arenaIndex(s.base()) 2036 . . ha := mheap_.arenas[ai.l1()][ai.l2()] 2037 10ms 10ms atomic.And8(&ha.pageSpecials[arenaPage/8], ^(uint8(1) << (arenaPage % 8))) 2038 . . } 2039 . . 2040 . . // addspecial adds the special record s to the list of special records for 2041 . . // the object p. All fields of s should be filled in except for
runtime.addspecial
/usr/lib/go/src/runtime/mheap.go
Total: 20ms 60ms (flat, cum) 0.16% 2042 . . // offset & next, which this routine will fill in. 2043 . . // Returns true if the special was successfully added, false otherwise. 2044 . . // (The add will fail only if a record with the same p and s->kind 2045 . . // already exists unless force is set to true.) 2046 10ms 10ms func addspecial(p unsafe.Pointer, s *special, force bool) bool { 2047 . 10ms span := spanOfHeap(uintptr(p)) 2048 . . if span == nil { 2049 . . throw("addspecial on invalid pointer") 2050 . . } 2051 . . 2052 . . // Ensure that the span is swept. 2053 . . // Sweeping accesses the specials list w/o locks, so we have 2054 . . // to synchronize with it. And it's just much safer. 2055 . . mp := acquirem() 2056 . 10ms span.ensureSwept() 2057 . . 2058 . . offset := uintptr(p) - span.base() 2059 . . kind := s.kind 2060 . . 2061 . 10ms lock(&span.speciallock) lockWithRank(l, getLockRank(l)) lock_spinbit.go:152 lock2(l) lockrank_off.go:24 2062 . . 2063 . . // Find splice point, check for existing record. 2064 . . iter, exists := span.specialFindSplicePoint(offset, kind) 2065 . . if !exists || force { 2066 . . // Splice in record, fill in offset. 2067 . . s.offset = offset 2068 . . s.next = *iter 2069 . . *iter = s 2070 . . spanHasSpecials(span) 2071 . . } 2072 . . 2073 10ms 20ms unlock(&span.speciallock) unlockWithRank(l) lock_spinbit.go:261 unlock2(l) lockrank_off.go:35 ⋮ 2074 . . releasem(mp) 2075 . . // We're converting p to a uintptr and looking it up, and we 2076 . . // don't want it to die and get swept while we're doing so. 2077 . . KeepAlive(p) 2078 . . return !exists || force // already exists or addition was forced
runtime.removespecial
/usr/lib/go/src/runtime/mheap.go
Total: 40ms 70ms (flat, cum) 0.19% 2079 . . } 2080 . . 2081 . . // Removes the Special record of the given kind for the object p. 2082 . . // Returns the record if the record existed, nil otherwise. 2083 . . // The caller must FixAlloc_Free the result. 2084 10ms 10ms func removespecial(p unsafe.Pointer, kind uint8) *special { 2085 . 10ms span := spanOfHeap(uintptr(p)) 2086 . . if span == nil { 2087 . . throw("removespecial on invalid pointer") 2088 . . } 2089 . . 2090 . . // Ensure that the span is swept. 2091 . . // Sweeping accesses the specials list w/o locks, so we have 2092 . . // to synchronize with it. And it's just much safer. 2093 . . mp := acquirem() 2094 . 10ms span.ensureSwept() 2095 . . 2096 . . offset := uintptr(p) - span.base() 2097 . . 2098 . . var result *special 2099 . . lock(&span.speciallock) 2100 . . 2101 20ms 20ms iter, exists := span.specialFindSplicePoint(offset, kind) if offset == uintptr(s.offset) && kind == s.kind { mheap.go:2127 ⋮ if s == nil { mheap.go:2124 2102 . . if exists { 2103 . . s := *iter 2104 . . *iter = s.next 2105 . . result = s 2106 . . } 2107 . . if span.specials == nil { 2108 10ms 10ms spanHasNoSpecials(span) atomic.And8(&ha.pageSpecials[arenaPage/8], ^(uint8(1) << (arenaPage % 8))) mheap.go:2037 2109 . . } 2110 . 10ms unlock(&span.speciallock) unlockWithRank(l) lock_spinbit.go:261 unlock2(l) lockrank_off.go:35 2111 . . releasem(mp) 2112 . . return result 2113 . . } 2114 . . 2115 . . // Find a splice point in the sorted list and check for an already existing
runtime.(*mspan).specialFindSplicePoint
/usr/lib/go/src/runtime/mheap.go
Total: 20ms 20ms (flat, cum) 0.053% 2119 . . // Find splice point, check for existing record. 2120 . . iter := &span.specials 2121 . . found := false 2122 . . for { 2123 . . s := *iter 2124 10ms 10ms if s == nil { 2125 . . break 2126 . . } 2127 10ms 10ms if offset == uintptr(s.offset) && kind == s.kind { 2128 . . found = true 2129 . . break 2130 . . } 2131 . . if offset < uintptr(s.offset) || (offset == uintptr(s.offset) && kind < s.kind) { 2132 . . break
runtime.addfinalizer
/usr/lib/go/src/runtime/mheap.go
Total: 20ms 160ms (flat, cum) 0.43% 2148 . . fint *_type // May be a heap pointer, but always live. 2149 . . ot *ptrtype // May be a heap pointer, but always live. 2150 . . } 2151 . . 2152 . . // Adds a finalizer to the object p. Returns true if it succeeded. 2153 20ms 20ms func addfinalizer(p unsafe.Pointer, f *funcval, nret uintptr, fint *_type, ot *ptrtype) bool { 2154 . 30ms lock(&mheap_.speciallock) lockWithRank(l, getLockRank(l)) lock_spinbit.go:152 lock2(l) lockrank_off.go:24 2155 . 20ms s := (*specialfinalizer)(mheap_.specialfinalizeralloc.alloc()) 2156 . 30ms unlock(&mheap_.speciallock) unlockWithRank(l) lock_spinbit.go:261 unlock2(l) lockrank_off.go:35 2157 . . s.special.kind = _KindSpecialFinalizer 2158 . . s.fn = f 2159 . . s.nret = nret 2160 . . s.fint = fint 2161 . . s.ot = ot 2162 . 60ms if addspecial(p, &s.special, false) { 2163 . . // This is responsible for maintaining the same 2164 . . // GC-related invariants as markrootSpans in any 2165 . . // situation where it's possible that markrootSpans 2166 . . // has already run but mark termination hasn't yet. 2167 . . if gcphase != _GCoff {
runtime.removefinalizer
/usr/lib/go/src/runtime/mheap.go
Total: 0 120ms (flat, cum) 0.32% 2188 . . return false 2189 . . } 2190 . . 2191 . . // Removes the finalizer (if any) from the object p. 2192 . . func removefinalizer(p unsafe.Pointer) { 2193 . 70ms s := (*specialfinalizer)(unsafe.Pointer(removespecial(p, _KindSpecialFinalizer))) 2194 . . if s == nil { 2195 . . return // there wasn't a finalizer to remove 2196 . . } 2197 . 30ms lock(&mheap_.speciallock) lockWithRank(l, getLockRank(l)) lock_spinbit.go:152 lock2(l) lockrank_off.go:24 2198 . . mheap_.specialfinalizeralloc.free(unsafe.Pointer(s)) 2199 . 20ms unlock(&mheap_.speciallock) unlockWithRank(l) lock_spinbit.go:261 unlock2(l) lockrank_off.go:35 2200 . . } 2201 . . 2202 . . // The described object has a cleanup set for it. 2203 . . type specialCleanup struct { 2204 . . _ sys.NotInHeap
runtime.freeSpecial
/usr/lib/go/src/runtime/mheap.go
Total: 0 10ms (flat, cum) 0.027% 2815 . . lock(&mheap_.speciallock) 2816 . . mheap_.specialWeakHandleAlloc.free(unsafe.Pointer(s)) 2817 . . unlock(&mheap_.speciallock) 2818 . . case _KindSpecialProfile: 2819 . . sp := (*specialprofile)(unsafe.Pointer(s)) 2820 . 10ms mProf_Free(sp.b, size) 2821 . . lock(&mheap_.speciallock) 2822 . . mheap_.specialprofilealloc.free(unsafe.Pointer(sp)) 2823 . . unlock(&mheap_.speciallock) 2824 . . case _KindSpecialReachable: 2825 . . sp := (*specialReachable)(unsafe.Pointer(s))
runtime.(*gcBitsArena).tryAlloc
/usr/lib/go/src/runtime/mheap.go
Total: 70ms 70ms (flat, cum) 0.19% 2901 . . } 2902 . . 2903 . . // tryAlloc allocates from b or returns nil if b does not have enough room. 2904 . . // This is safe to call concurrently. 2905 . . func (b *gcBitsArena) tryAlloc(bytes uintptr) *gcBits { 2906 40ms 40ms if b == nil || atomic.Loaduintptr(&b.free)+bytes > uintptr(len(b.bits)) { 2907 . . return nil 2908 . . } 2909 . . // Try to allocate from this block. 2910 30ms 30ms end := atomic.Xadduintptr(&b.free, bytes) 2911 . . if end > uintptr(len(b.bits)) { 2912 . . return nil 2913 . . } 2914 . . // There was enough room. 2915 . . start := end - bytes
runtime.newMarkBits
/usr/lib/go/src/runtime/mheap.go
Total: 80ms 80ms (flat, cum) 0.21% 2922 . . blocksNeeded := (nelems + 63) / 64 2923 . . bytesNeeded := blocksNeeded * 8 2924 . . 2925 . . // Try directly allocating from the current head arena. 2926 . . head := (*gcBitsArena)(atomic.Loadp(unsafe.Pointer(&gcBitsArenas.next))) 2927 70ms 70ms if p := head.tryAlloc(bytesNeeded); p != nil { if b == nil || atomic.Loaduintptr(&b.free)+bytes > uintptr(len(b.bits)) { mheap.go:2906 ⋮ ⋮ end := atomic.Xadduintptr(&b.free, bytes) mheap.go:2910 2928 10ms 10ms return p 2929 . . } 2930 . . 2931 . . // There's not enough room in the head arena. We may need to 2932 . . // allocate a new arena. 2933 . . lock(&gcBitsArenas.lock)
runtime.newAllocBits
/usr/lib/go/src/runtime/mheap.go
Total: 0 10ms (flat, cum) 0.027% 2973 . . // newAllocBits is used to provide newly initialized spans 2974 . . // allocation bits. For spans not being initialized the 2975 . . // mark bits are repurposed as allocation bits when 2976 . . // the span is swept. 2977 . . func newAllocBits(nelems uintptr) *gcBits { 2978 . 10ms return newMarkBits(nelems) 2979 . . } 2980 . . 2981 . . // nextMarkBitArenaEpoch establishes a new epoch for the arenas 2982 . . // holding the mark bits. The arenas are named relative to the 2983 . . // current GC cycle which is demarcated by the call to finishweep_m.
runtime.(*unwinder).init
/usr/lib/go/src/runtime/traceback.go
Total: 0 220ms (flat, cum) 0.59% 124 . . // provide a "valid" method. Alternatively, this could start in a "before 125 . . // the first frame" state and "next" could return whether it was able to 126 . . // move to the next frame, but that's both more awkward to use in a "for" 127 . . // loop and is harder to implement because we have to do things differently 128 . . // for the first frame. 129 . 220ms u.initAt(^uintptr(0), ^uintptr(0), ^uintptr(0), gp, flags) 130 . . }
runtime.(*unwinder).initAt
/usr/lib/go/src/runtime/traceback.go
Total: 30ms 30ms (flat, cum) 0.08% 131 . . 132 30ms 30ms func (u *unwinder) initAt(pc0, sp0, lr0 uintptr, gp *g, flags unwindFlags) { 133 . . // Don't call this "g"; it's too easy get "g" and "gp" confused. 134 . . if ourg := getg(); ourg == gp && ourg == ourg.m.curg { 135 . . // The starting sp has been passed in as a uintptr, and the caller may 136 . . // have other uintptr-typed stack references as well. 137 . . // If during one of the calls that got us here or during one of the
runtime.(*unwinder).initAt
/usr/lib/go/src/runtime/traceback.go
Total: 20ms 20ms (flat, cum) 0.053% 162 . . lr0 = gp.sched.lr 163 . . } 164 . . } 165 . . } 166 . . 167 20ms 20ms var frame stkframe 168 . . frame.pc = pc0 169 . . frame.sp = sp0 170 . . if usesLR { 171 . . frame.lr = lr0 172 . . }
runtime.(*unwinder).initAt
/usr/lib/go/src/runtime/traceback.go
Total: 20ms 170ms (flat, cum) 0.45% 195 . . // LR are not touched. 196 . . frame.pc = frame.lr 197 . . frame.lr = 0 198 . . } 199 . . 200 . 20ms f := findfunc(frame.pc) 201 . . if !f.valid() { 202 . . if flags&unwindSilentErrors == 0 { 203 . . print("runtime: g ", gp.goid, " gp=", gp, ": unknown pc ", hex(frame.pc), "\n") 204 . . tracebackHexdump(gp.stack, &frame, 0) 205 . . } 206 . . if flags&(unwindPrintErrors|unwindSilentErrors) == 0 { 207 . . throw("unknown pc") 208 . . } 209 . . *u = unwinder{} 210 . . return 211 . . } 212 . . frame.fn = f 213 . . 214 . . // Populate the unwinder. 215 10ms 10ms *u = unwinder{ 216 . . frame: frame, 217 . . g: gp.guintptr(), 218 10ms 10ms cgoCtxt: len(gp.cgoCtxt) - 1, 219 . . calleeFuncID: abi.FuncIDNormal, 220 . . flags: flags, 221 . . } 222 . . 223 . . isSyscall := frame.pc == pc0 && frame.sp == sp0 && pc0 == gp.syscallpc && sp0 == gp.syscallsp 224 . 130ms u.resolveInternal(true, isSyscall) 225 . . } 226 . . 227 . . func (u *unwinder) valid() bool { 228 . . return u.frame.pc != 0 229 . . }
runtime.(*unwinder).resolveInternal
/usr/lib/go/src/runtime/traceback.go
Total: 200ms 200ms (flat, cum) 0.53% 252 . . func (u *unwinder) resolveInternal(innermost, isSyscall bool) { 253 . . frame := &u.frame 254 . . gp := u.g.ptr() 255 . . 256 . . f := frame.fn 257 180ms 180ms if f.pcsp == 0 { 258 . . // No frame information, must be external function, like race support. 259 . . // See golang.org/issue/13568. 260 . . u.finishInternal() 261 . . return 262 . . } 263 . . 264 . . // Compute function info flags. 265 . . flag := f.flag 266 10ms 10ms if f.funcID == abi.FuncID_cgocallback { 267 . . // cgocallback does write SP to switch from the g0 to the curg stack, 268 . . // but it carefully arranges that during the transition BOTH stacks 269 . . // have cgocallback frame valid for unwinding through. 270 . . // So we don't need to exclude it with the other SP-writing functions. 271 10ms 10ms flag &^= abi.FuncFlagSPWrite 272 . . } 273 . . if isSyscall { 274 . . // Some Syscall functions write to SP, but they do so only after 275 . . // saving the entry PC/SP using entersyscall. 276 . . // Since we are using the entry PC/SP, the later SP write doesn't matter.
runtime.(*unwinder).resolveInternal
/usr/lib/go/src/runtime/traceback.go
Total: 40ms 1.04s (flat, cum) 2.77% 321 . . frame.sp = gp.sched.sp 322 . . u.cgoCtxt = len(gp.cgoCtxt) - 1 323 . . flag &^= abi.FuncFlagSPWrite 324 . . } 325 . . } 326 40ms 1.04s frame.fp = frame.sp + uintptr(funcspdelta(f, frame.pc)) ⋮ ⋮ ⋮ x, _ := pcvalue(f, f.pcsp, targetpc, true) symtab.go:1203 327 . . if !usesLR { 328 . . // On x86, call instruction pushes return PC before entering new function. 329 . . frame.fp += goarch.PtrSize 330 . . } 331 . . }
runtime.(*unwinder).resolveInternal
/usr/lib/go/src/runtime/traceback.go
Total: 50ms 50ms (flat, cum) 0.13% 359 . . // T _ _ | frame.lr = 0 360 . . // F T _ | frame.lr = 0 361 . . // F F F | print; panic 362 . . // F F T | ignore SPWrite 363 . . if u.flags&(unwindPrintErrors|unwindSilentErrors) == 0 && !innermost { 364 10ms 10ms println("traceback: unexpected SPWRITE function", funcname(f)) 365 . . throw("traceback") 366 . . } 367 . . frame.lr = 0 368 . . } else { 369 . . var lrPtr uintptr 370 . . if usesLR { 371 10ms 10ms if innermost && frame.sp < frame.fp || frame.lr == 0 { 372 . . lrPtr = frame.sp 373 10ms 10ms frame.lr = *(*uintptr)(unsafe.Pointer(lrPtr)) 374 . . } 375 . . } else { 376 . . if frame.lr == 0 { 377 . . lrPtr = frame.fp - goarch.PtrSize 378 . . frame.lr = *(*uintptr)(unsafe.Pointer(lrPtr)) 379 . . } 380 . . } 381 . . } 382 . . 383 20ms 20ms frame.varp = frame.fp 384 . . if !usesLR { 385 . . // On x86, call instruction pushes return PC before entering new function. 386 . . frame.varp -= goarch.PtrSize 387 . . } 388 . .
runtime.(*unwinder).resolveInternal
/usr/lib/go/src/runtime/traceback.go
Total: 10ms 10ms (flat, cum) 0.027% 402 . . // (with R29 = RSP - 8 in Go functions). 403 . . // This is technically ABI-compatible but not standard. 404 . . // And it happens to end up mimicking the x86 layout. 405 . . // Other architectures may make different decisions. 406 . . if frame.varp > frame.sp && framepointer_enabled { 407 10ms 10ms frame.varp -= goarch.PtrSize 408 . . } 409 . . 410 . . frame.argp = frame.fp + sys.MinFrameSize 411 . . 412 . . // Determine frame's 'continuation PC', where it can continue.
runtime.(*unwinder).resolveInternal
/usr/lib/go/src/runtime/traceback.go
Total: 10ms 10ms (flat, cum) 0.027% 433 . . // address make sure the pc is in the CALL instruction. 434 . . } else { 435 . . frame.continpc = 0 436 . . } 437 . . } 438 10ms 10ms }
runtime.(*unwinder).next
/usr/lib/go/src/runtime/traceback.go
Total: 60ms 370ms (flat, cum) 0.99% 440 20ms 20ms func (u *unwinder) next() { 441 . . frame := &u.frame 442 . . f := frame.fn 443 . . gp := u.g.ptr() 444 . . 445 . . // Do not unwind past the bottom of the stack. 446 20ms 20ms if frame.lr == 0 { 447 10ms 20ms u.finishInternal() 448 . . return 449 . . } 450 10ms 310ms flr := findfunc(frame.lr) 451 . . if !flr.valid() { 452 . . // This happens if you get a profiling interrupt at just the wrong time. 453 . . // In that context it is okay to stop early. 454 . . // But if no error flags are set, we're doing a garbage collection and must 455 . . // get everything, so crash loudly.
runtime.(*unwinder).next
/usr/lib/go/src/runtime/traceback.go
Total: 30ms 30ms (flat, cum) 0.08% 479 . . print("runtime: traceback stuck. pc=", hex(frame.pc), " sp=", hex(frame.sp), "\n") 480 . . tracebackHexdump(gp.stack, frame, frame.sp) 481 . . throw("traceback stuck") 482 . . } 483 . . 484 20ms 20ms injectedCall := f.funcID == abi.FuncID_sigpanic || f.funcID == abi.FuncID_asyncPreempt || f.funcID == abi.FuncID_debugCallV2 485 . . if injectedCall { 486 . . u.flags |= unwindTrap 487 . . } else { 488 10ms 10ms u.flags &^= unwindTrap 489 . . } 490 . . 491 . . // Unwind to next frame. 492 . . u.calleeFuncID = f.funcID 493 . . frame.fn = flr
runtime.(*unwinder).next
/usr/lib/go/src/runtime/traceback.go
Total: 0 1.18s (flat, cum) 3.14% 508 . . } else if funcspdelta(f, frame.pc) == 0 { 509 . . frame.lr = x 510 . . } 511 . . } 512 . . 513 . 1.18s u.resolveInternal(false, false) 514 . . } 515 . . 516 . . // finishInternal is an unwinder-internal helper called after the stack has been 517 . . // exhausted. It sets the unwinder to an invalid state and checks that it 518 . . // successfully unwound the entire stack.
runtime.(*unwinder).finishInternal
/usr/lib/go/src/runtime/traceback.go
Total: 10ms 10ms (flat, cum) 0.027% 558 . . // callbacks only happen when everything is stopped nicely. 559 . . // At other times, such as when gathering a stack for a profiling signal 560 . . // or when printing a traceback during a crash, everything may not be 561 . . // stopped nicely, and the stack walk may not be able to complete. 562 . . gp := u.g.ptr() 563 10ms 10ms if u.flags&(unwindPrintErrors|unwindSilentErrors) == 0 && u.frame.sp != gp.stktopsp { 564 . . print("runtime: g", gp.goid, ": frame.sp=", hex(u.frame.sp), " top=", hex(gp.stktopsp), "\n") 565 . . print("\tstack=[", hex(gp.stack.lo), "-", hex(gp.stack.hi), "\n") 566 . . throw("traceback did not unwind completely") 567 . . } 568 . . }
runtime.(*unwinder).cgoCallers
/usr/lib/go/src/runtime/traceback.go
Total: 10ms 10ms (flat, cum) 0.027% 588 . . 589 . . // cgoCallers populates pcBuf with the cgo callers of the current frame using 590 . . // the registered cgo unwinder. It returns the number of PCs written to pcBuf. 591 . . // If the current frame is not a cgo frame or if there's no registered cgo 592 . . // unwinder, it returns 0. 593 10ms 10ms func (u *unwinder) cgoCallers(pcBuf []uintptr) int { 594 . . if cgoTraceback == nil || u.frame.fn.funcID != abi.FuncID_cgocallback || u.cgoCtxt < 0 { 595 . . // We don't have a cgo unwinder (typical case), or we do but we're not 596 . . // in a cgo frame or we're out of cgo context. 597 . . return 0 598 . . }
runtime.tracebackPCs
/usr/lib/go/src/runtime/traceback.go
Total: 0 40ms (flat, cum) 0.11% 618 . . // 619 . . // Callers should set the unwindSilentErrors flag on u. 620 . . func tracebackPCs(u *unwinder, skip int, pcBuf []uintptr) int { 621 . . var cgoBuf [32]uintptr 622 . . n := 0 623 . 10ms for ; n < len(pcBuf) && u.valid(); u.next() { 624 . . f := u.frame.fn 625 . 10ms cgoN := u.cgoCallers(cgoBuf[:]) 626 . . 627 . . // TODO: Why does &u.cache cause u to escape? (Same in traceback2) 628 . 20ms for iu, uf := newInlineUnwinder(f, u.symPC()); n < len(pcBuf) && uf.valid(); uf = iu.next(uf) { 629 . . sf := iu.srcFunc(uf) 630 . . if sf.funcID == abi.FuncIDWrapper && elideWrapperCalling(u.calleeFuncID) { 631 . . // ignore wrappers 632 . . } else if skip > 0 { 633 . . skip--
runtime.callers
/usr/lib/go/src/runtime/traceback.go
Total: 0 40ms (flat, cum) 0.11% 1092 . . func callers(skip int, pcbuf []uintptr) int { 1093 . . sp := sys.GetCallerSP() 1094 . . pc := sys.GetCallerPC() 1095 . . gp := getg() 1096 . . var n int 1097 . 40ms systemstack(func() { 1098 . . var u unwinder
runtime.callers.func1
/usr/lib/go/src/runtime/traceback.go
Total: 0 40ms (flat, cum) 0.11% 1099 . . u.initAt(pc, sp, 0, gp, unwindSilentErrors) 1100 . 40ms n = tracebackPCs(&u, skip, pcbuf) 1101 . . }) 1102 . . return n 1103 . . } 1104 . . 1105 . . func gcallers(gp *g, skip int, pcbuf []uintptr) int {
runtime.isSystemGoroutine
/usr/lib/go/src/runtime/traceback.go
Total: 80ms 300ms (flat, cum) 0.8% 1362 . . // runtime.runFinalizers/runtime.runCleanups. 1363 . . // 1364 . . // If fixed is true, any goroutine that can vary between user and 1365 . . // system (that is, the finalizer goroutine) is considered a user 1366 . . // goroutine. 1367 10ms 10ms func isSystemGoroutine(gp *g, fixed bool) bool { 1368 . . // Keep this in sync with internal/trace.IsSystemGoroutine. 1369 40ms 90ms f := findfunc(gp.startpc) 1370 . . if !f.valid() { 1371 . . return false 1372 . . } 1373 . . if f.funcID == abi.FuncID_runtime_main || f.funcID == abi.FuncID_corostart || f.funcID == abi.FuncID_handleAsyncEvent { 1374 . . return false 1375 . . } 1376 10ms 10ms if f.funcID == abi.FuncID_runFinalizers { 1377 . . // We include the finalizer goroutine if it's calling 1378 . . // back into user code. 1379 . . if fixed { 1380 . . // This goroutine can vary. In fixed mode, 1381 . . // always consider it a user goroutine. 1382 . . return false 1383 . . } 1384 . . return fingStatus.Load()&fingRunningFinalizer == 0 1385 . . } 1386 . . if f.funcID == abi.FuncID_runCleanups { 1387 . . // We include the cleanup goroutines if they're calling 1388 . . // back into user code. 1389 . . if fixed { 1390 . . // This goroutine can vary. In fixed mode, 1391 . . // always consider it a user goroutine. 1392 . . return false 1393 . . } 1394 . . return !gp.runningCleanups.Load() 1395 . . } 1396 20ms 190ms return stringslite.HasPrefix(funcname(f), "runtime.") ⋮ return f.datap.funcName(f.nameOff) symtab.go:1142 ⋮ 1397 . . } 1398 . . 1399 . . // SetCgoTraceback records three C functions to use to gather 1400 . . // traceback information from C code and to convert that traceback 1401 . . // information into symbolic information. These are used when printing
runtime.makeslicecopy
/usr/lib/go/src/runtime/slice.go
Total: 30ms 270ms (flat, cum) 0.72% 33 . . panic(errorString("makeslice: cap out of range")) 34 . . } 35 . . 36 . . // makeslicecopy allocates a slice of "tolen" elements of type "et", 37 . . // then copies "fromlen" elements of type "et" into that new allocation from "from". 38 20ms 120ms func makeslicecopy(et *_type, tolen int, fromlen int, from unsafe.Pointer) unsafe.Pointer { 39 . . var tomem, copymem uintptr 40 . . if uintptr(tolen) > uintptr(fromlen) { 41 . . var overflow bool 42 . . tomem, overflow = math.MulUintptr(et.Size_, uintptr(tolen)) 43 . . if overflow || tomem > maxAlloc || tolen < 0 { 44 . . panicmakeslicelen() 45 . . } 46 10ms 10ms copymem = et.Size_ * uintptr(fromlen) 47 . . } else { 48 . . // fromlen is a known good length providing and equal or greater than tolen, 49 . . // thereby making tolen a good slice length too as from and to slices have the 50 . . // same element width. 51 . . tomem = et.Size_ * uintptr(tolen) 52 . . copymem = tomem 53 . . } 54 . . 55 . . var to unsafe.Pointer 56 . . if !et.Pointers() { 57 . 130ms to = mallocgc(tomem, nil, false) 58 . . if copymem < tomem { 59 . 10ms memclrNoHeapPointers(add(to, copymem), tomem-copymem) 60 . . } 61 . . } else { 62 . . // Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan uninitialized memory. 63 . . to = mallocgc(tomem, et, true) 64 . . if copymem > 0 && writeBarrier.enabled {
runtime.makeslicecopy
/usr/lib/go/src/runtime/slice.go
Total: 0 10ms (flat, cum) 0.027% 82 . . } 83 . . if asanenabled { 84 . . asanread(from, copymem) 85 . . } 86 . . 87 . 10ms memmove(to, from, copymem) 88 . . 89 . . return to 90 . . } 91 . . 92 . . // makeslice should be an internal detail,
runtime.makeslice
/usr/lib/go/src/runtime/slice.go
Total: 90ms 2.61s (flat, cum) 6.95% 96 . . // 97 . . // Do not remove or change the type signature. 98 . . // See go.dev/issue/67401. 99 . . // 100 . . //go:linkname makeslice 101 50ms 100ms func makeslice(et *_type, len, cap int) unsafe.Pointer { 102 . . mem, overflow := math.MulUintptr(et.Size_, uintptr(cap)) 103 40ms 40ms if overflow || mem > maxAlloc || len < 0 || len > cap { 104 . . // NOTE: Produce a 'len out of range' error instead of a 105 . . // 'cap out of range' error when someone does make([]T, bignumber). 106 . . // 'cap out of range' is true too, but since the cap is only being 107 . . // supplied implicitly, saying len is clearer. 108 . . // See golang.org/issue/4085. 109 . . mem, overflow := math.MulUintptr(et.Size_, uintptr(len)) 110 . . if overflow || mem > maxAlloc || len < 0 { 111 . . panicmakeslicelen() 112 . . } 113 . . panicmakeslicecap() 114 . . } 115 . . 116 . 2.47s return mallocgc(mem, et, true) 117 . . } 118 . . 119 . . func makeslice64(et *_type, len64, cap64 int64) unsafe.Pointer { 120 . . len := int(len64) 121 . . if int64(len) != len64 {
runtime.growslice
/usr/lib/go/src/runtime/slice.go
Total: 310ms 310ms (flat, cum) 0.83% 172 . . // 173 . . // Do not remove or change the type signature. 174 . . // See go.dev/issue/67401. 175 . . // 176 . . //go:linkname growslice 177 40ms 40ms func growslice(oldPtr unsafe.Pointer, newLen, oldCap, num int, et *_type) slice { 178 . . oldLen := newLen - num 179 . . if raceenabled { 180 . . callerpc := sys.GetCallerPC() 181 . . racereadrangepc(oldPtr, uintptr(oldLen*int(et.Size_)), callerpc, abi.FuncPCABIInternal(growslice)) 182 . . } 183 . . if msanenabled { 184 . . msanread(oldPtr, uintptr(oldLen*int(et.Size_))) 185 . . } 186 . . if asanenabled { 187 . . asanread(oldPtr, uintptr(oldLen*int(et.Size_))) 188 . . } 189 . . 190 20ms 20ms if newLen < 0 { 191 . . panic(errorString("growslice: len out of range")) 192 . . } 193 . . 194 20ms 20ms if et.Size_ == 0 { 195 . . // append should not create a slice with nil pointer but non-zero len. 196 . . // We assume that append doesn't need to preserve oldPtr in this case. 197 . . return slice{unsafe.Pointer(&zerobase), newLen, newLen} 198 . . } 199 . . 200 10ms 10ms newcap := nextslicecap(newLen, oldCap) if oldCap < threshold { slice.go:297 201 . . 202 . . var overflow bool 203 . . var lenmem, newlenmem, capmem uintptr 204 . . // Specialize for common values of et.Size. 205 . . // For 1 we don't need any division/multiplication. 206 . . // For goarch.PtrSize, compiler will optimize division/multiplication into a shift by a constant. 207 . . // For powers of 2, use a variable shift. 208 10ms 10ms noscan := !et.Pointers() 209 . . switch { 210 10ms 10ms case et.Size_ == 1: 211 . . lenmem = uintptr(oldLen) 212 . . newlenmem = uintptr(newLen) 213 50ms 50ms capmem = roundupsize(uintptr(newcap), noscan) if reqSize <= maxSmallSize-gc.MallocHeaderSize { msize.go:18 ⋮ return uintptr(gc.SizeClassToSize[gc.SizeToSizeClass8[divRoundUp(reqSize, gc.SmallSizeDiv)]]) - (reqSize - size) msize.go:26 ⋮ ⋮ 214 . . overflow = uintptr(newcap) > maxAlloc 215 . . newcap = int(capmem) 216 . . case et.Size_ == goarch.PtrSize: 217 . . lenmem = uintptr(oldLen) * goarch.PtrSize 218 . . newlenmem = uintptr(newLen) * goarch.PtrSize 219 20ms 20ms capmem = roundupsize(uintptr(newcap)*goarch.PtrSize, noscan) if !noscan && reqSize > gc.MinSizeForMallocHeader { // !noscan && !heapBitsInSpan(reqSize) msize.go:20 220 10ms 10ms overflow = uintptr(newcap) > maxAlloc/goarch.PtrSize 221 . . newcap = int(capmem / goarch.PtrSize) 222 . . case isPowerOfTwo(et.Size_): 223 . . var shift uintptr 224 . . if goarch.PtrSize == 8 { 225 . . // Mask shift for better code generation. 226 20ms 20ms shift = uintptr(sys.TrailingZeros64(uint64(et.Size_))) & 63 227 . . } else { 228 . . shift = uintptr(sys.TrailingZeros32(uint32(et.Size_))) & 31 229 . . } 230 . . lenmem = uintptr(oldLen) << shift 231 . . newlenmem = uintptr(newLen) << shift 232 80ms 80ms capmem = roundupsize(uintptr(newcap)<<shift, noscan) return uintptr(gc.SizeClassToSize[gc.SizeToSizeClass8[divRoundUp(reqSize, gc.SmallSizeDiv)]]) - (reqSize - size) msize.go:26 ⋮ ⋮ if !noscan && reqSize > gc.MinSizeForMallocHeader { // !noscan && !heapBitsInSpan(reqSize) msize.go:20 ⋮ if reqSize <= gc.SmallSizeMax-8 { msize.go:25 233 20ms 20ms overflow = uintptr(newcap) > (maxAlloc >> shift) 234 . . newcap = int(capmem >> shift) 235 . . capmem = uintptr(newcap) << shift 236 . . default: 237 . . lenmem = uintptr(oldLen) * et.Size_ 238 . . newlenmem = uintptr(newLen) * et.Size_
runtime.growslice
/usr/lib/go/src/runtime/slice.go
Total: 110ms 1.07s (flat, cum) 2.85% 258 . . if overflow || capmem > maxAlloc { 259 . . panic(errorString("growslice: len out of range")) 260 . . } 261 . . 262 . . var p unsafe.Pointer 263 10ms 10ms if !et.Pointers() { 264 . 80ms p = mallocgc(capmem, nil, false) 265 . . // The append() that calls growslice is going to overwrite from oldLen to newLen. 266 . . // Only clear the part that will not be overwritten. 267 . . // The reflect_growslice() that calls growslice will manually clear 268 . . // the region not cleared here. 269 . 10ms memclrNoHeapPointers(add(p, newlenmem), capmem-newlenmem) 270 . . } else { 271 . . // Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan uninitialized memory. 272 10ms 880ms p = mallocgc(capmem, et, true) 273 20ms 20ms if lenmem > 0 && writeBarrier.enabled { 274 . . // Only shade the pointers in oldPtr since we know the destination slice p 275 . . // only contains nil pointers because it has been cleared during alloc. 276 . . // 277 . . // It's safe to pass a type to this function as an optimization because 278 . . // from and to only ever refer to memory representing whole values of 279 . . // type et. See the comment on bulkBarrierPreWrite. 280 . . bulkBarrierPreWriteSrcOnly(uintptr(p), uintptr(oldPtr), lenmem-et.Size_+et.PtrBytes, et) 281 . . } 282 . . } 283 20ms 20ms memmove(p, oldPtr, lenmem) 284 . . 285 50ms 50ms return slice{p, newLen, newcap} 286 . . } 287 . . 288 . . // nextslicecap computes the next appropriate slice length. 289 . . func nextslicecap(newLen, oldCap int) int { 290 . . newcap := oldCap
runtime.nextslicecap
/usr/lib/go/src/runtime/slice.go
Total: 10ms 10ms (flat, cum) 0.027% 292 . . if newLen > doublecap { 293 . . return newLen 294 . . } 295 . . 296 . . const threshold = 256 297 10ms 10ms if oldCap < threshold { 298 . . return doublecap 299 . . } 300 . . for { 301 . . // Transition from growing 2x for small slices 302 . . // to growing 1.25x for large slices. This formula
internal/bytealg.MakeNoZero
/usr/lib/go/src/runtime/slice.go
Total: 0 150ms (flat, cum) 0.4% 395 . . func bytealg_MakeNoZero(len int) []byte { 396 . . if uintptr(len) > maxAlloc { 397 . . panicmakeslicelen() 398 . . } 399 . . cap := roundupsize(uintptr(len), true) 400 . 150ms return unsafe.Slice((*byte)(mallocgc(uintptr(cap), nil, false)), cap)[:len] 401 . . }
runtime.(*stkframe).argMapInternal
/usr/lib/go/src/runtime/stkframe.go
Total: 20ms 20ms (flat, cum) 0.053% 89 . . // this if non-nil, and otherwise fetch the argument map using the 90 . . // current PC. 91 . . // 92 . . // hasReflectStackObj indicates that this frame also has a reflect 93 . . // function stack object, which the caller must synthesize. 94 10ms 10ms func (frame *stkframe) argMapInternal() (argMap bitvector, hasReflectStackObj bool) { 95 . . f := frame.fn 96 . . if f.args != abi.ArgsSizeUnknown { 97 10ms 10ms argMap.n = f.args / goarch.PtrSize 98 . . return 99 . . } 100 . . // Extract argument bitmaps for reflect stubs from the calls they made to reflect. 101 . . switch funcname(f) { 102 . . case "reflect.makeFuncStub", "reflect.methodValueCall":
runtime.(*stkframe).getStackMap
/usr/lib/go/src/runtime/stkframe.go
Total: 510ms 1.38s (flat, cum) 3.68% 152 . . return 153 . . } 154 . . 155 . . // getStackMap returns the locals and arguments live pointer maps, and 156 . . // stack object list for frame. 157 10ms 10ms func (frame *stkframe) getStackMap(debug bool) (locals, args bitvector, objs []stackObjectRecord) { 158 . . targetpc := frame.continpc 159 30ms 30ms if targetpc == 0 { 160 . . // Frame is dead. Return empty bitvectors. 161 . . return 162 . . } 163 . . 164 . . f := frame.fn 165 . . pcdata := int32(-1) 166 20ms 40ms if targetpc != f.entry() { return f.datap.textAddr(f.entryOff) symtab.go:894 ⋮ 167 . . // Back up to the CALL. If we're at the function entry 168 . . // point, we want to use the entry map (-1), even if 169 . . // the first instruction of the function changes the 170 . . // stack map. 171 10ms 10ms targetpc-- 172 . 830ms pcdata = pcdatavalue(f, abi.PCDATA_StackMapIndex, targetpc) 173 . . } 174 10ms 10ms if pcdata == -1 { 175 . . // We do not have a valid pcdata value but there might be a 176 . . // stackmap for this function. It is likely that we are looking 177 . . // at the function prologue, assume so and hope for the best. 178 . . pcdata = 0 179 . . } 180 . . 181 . . // Local variables. 182 . . size := frame.varp - frame.sp 183 . . var minsize uintptr 184 . . switch goarch.ArchFamily { 185 . . case goarch.ARM64: 186 . . minsize = sys.StackAlign 187 . . default: 188 . . minsize = sys.MinFrameSize 189 . . } 190 . . if size > minsize { 191 . . stackid := pcdata 192 20ms 20ms stkmap := (*stackmap)(funcdata(f, abi.FUNCDATA_LocalsPointerMaps)) if i < 0 || i >= f.nfuncdata { symtab.go:1259 ⋮ raw := base + uintptr(off) symtab.go:1272 193 160ms 160ms if stkmap == nil || stkmap.n <= 0 { 194 . . print("runtime: frame ", funcname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n") 195 . . throw("missing stackmap") 196 . . } 197 . . // If nbit == 0, there's no work to do. 198 30ms 30ms if stkmap.nbit > 0 { 199 . . if stackid < 0 || stackid >= stkmap.n { 200 . . // don't know where we are 201 . . print("runtime: pcdata is ", stackid, " and ", stkmap.n, " locals stack map entries for ", funcname(f), " (targetpc=", hex(targetpc), ")\n") 202 . . throw("bad symbol table") 203 . . } 204 10ms 10ms locals = stackmapdata(stkmap, stackid) return bitvector{stkmap.nbit, addb(&stkmap.bytedata[0], uintptr(n*((stkmap.nbit+7)>>3)))} symtab.go:1330 205 . . if stackDebug >= 3 && debug { 206 . . print(" locals ", stackid, "/", stkmap.n, " ", locals.n, " words ", locals.bytedata, "\n") 207 . . } 208 . . } else if stackDebug >= 3 && debug { 209 . . print(" no locals to adjust\n") 210 . . } 211 . . } 212 . . 213 . . // Arguments. First fetch frame size and special-case argument maps. 214 . . var isReflect bool 215 . 20ms args, isReflect = frame.argMapInternal() 216 20ms 20ms if args.n > 0 && args.bytedata == nil { 217 . . // Non-empty argument frame, but not a special map. 218 . . // Fetch the argument map at pcdata. 219 10ms 10ms stackmap := (*stackmap)(funcdata(f, abi.FUNCDATA_ArgsPointerMaps)) raw := base + uintptr(off) symtab.go:1272 220 50ms 50ms if stackmap == nil || stackmap.n <= 0 { 221 . . print("runtime: frame ", funcname(f), " untyped args ", hex(frame.argp), "+", hex(args.n*goarch.PtrSize), "\n") 222 . . throw("missing stackmap") 223 . . } 224 . . if pcdata < 0 || pcdata >= stackmap.n { 225 . . // don't know where we are 226 . . print("runtime: pcdata is ", pcdata, " and ", stackmap.n, " args stack map entries for ", funcname(f), " (targetpc=", hex(targetpc), ")\n") 227 . . throw("bad symbol table") 228 . . } 229 20ms 20ms if stackmap.nbit == 0 { 230 . . args.n = 0 231 . . } else { 232 20ms 20ms args = stackmapdata(stackmap, pcdata) 233 . . } 234 . . } 235 . . 236 . . // stack objects. 237 . . if (GOARCH == "amd64" || GOARCH == "arm64" || GOARCH == "loong64" || GOARCH == "ppc64" || GOARCH == "ppc64le" || GOARCH == "riscv64") && 238 . . unsafe.Sizeof(abi.RegArgs{}) > 0 && isReflect { 239 . . // For reflect.makeFuncStub and reflect.methodValueCall, 240 . . // we need to fake the stack object record. 241 . . // These frames contain an internal/abi.RegArgs at a hard-coded offset. 242 . . // This offset matches the assembly code on amd64 and arm64. 243 . . objs = methodValueCallFrameObjs[:] 244 . . } else { 245 20ms 20ms p := funcdata(f, abi.FUNCDATA_StackObjects) raw := base + uintptr(off) symtab.go:1272 ⋮ return unsafe.Pointer(raw & mask) symtab.go:1273 246 . . if p != nil { 247 . . n := *(*uintptr)(p) 248 10ms 10ms p = add(p, goarch.PtrSize) return unsafe.Pointer(uintptr(p) + x) stubs.go:25 249 . . r0 := (*stackObjectRecord)(noescape(p)) 250 20ms 20ms objs = unsafe.Slice(r0, int(n)) 251 . . // Note: the noescape above is needed to keep 252 . . // getStackMap from "leaking param content: 253 . . // frame". That leak propagates up to getgcmask, then 254 . . // GCMask, then verifyGCInfo, which converts the stack 255 . . // gcinfo tests into heap gcinfo tests :( 256 . . } 257 . . } 258 . . 259 40ms 40ms return 260 . . } 261 . . 262 . . var methodValueCallFrameObjs [1]stackObjectRecord // initialized in stackobjectinit 263 . . 264 . . func stkobjinit() {
internal/runtime/maps.(*Map).getWithoutKeySmallFastStr
/usr/lib/go/src/internal/runtime/maps/runtime_faststr_swiss.go
Total: 80ms 80ms (flat, cum) 0.21% 12 . . "internal/race" 13 . . "internal/runtime/sys" 14 . . "unsafe" 15 . . ) 16 . . 17 20ms 20ms func (m *Map) getWithoutKeySmallFastStr(typ *abi.SwissMapType, key string) unsafe.Pointer { 18 . . g := groupReference{ 19 . . data: m.dirPtr, 20 . . } 21 . . 22 . . ctrls := *g.ctrls() 23 . . slotKey := g.key(typ, 0) 24 . . slotSize := typ.SlotSize 25 . . 26 . . // The 64 threshold was chosen based on performance of BenchmarkMapStringKeysEight, 27 . . // where there are 8 keys to check, all of which don't quick-match the lookup key. 28 . . // In that case, we can save hashing the lookup key. That savings is worth this extra code 29 . . // for strings that are long enough that hashing is expensive. 30 60ms 60ms if len(key) > 64 { 31 . . // String hashing and equality might be expensive. Do a quick check first. 32 . . j := abi.SwissMapGroupSlots 33 . . for i := range abi.SwissMapGroupSlots { 34 . . if ctrls&(1<<7) == 0 && longStringQuickEqualityTest(key, *(*string)(slotKey)) { 35 . . if j < abi.SwissMapGroupSlots {
internal/runtime/maps.(*Map).getWithoutKeySmallFastStr
/usr/lib/go/src/internal/runtime/maps/runtime_faststr_swiss.go
Total: 150ms 290ms (flat, cum) 0.77% 54 . . return nil 55 . . } 56 . . 57 . . dohash: 58 . . // This path will cost 1 hash and 1+ε comparisons. 59 10ms 100ms hash := typ.Hasher(abi.NoEscape(unsafe.Pointer(&key)), m.seed) 60 . . h2 := uint8(h2(hash)) 61 . . ctrls = *g.ctrls() 62 10ms 10ms slotKey = g.key(typ, 0) 63 . . 64 30ms 30ms for range abi.SwissMapGroupSlots { 65 60ms 110ms if uint8(ctrls) == h2 && key == *(*string)(slotKey) { 66 10ms 10ms return unsafe.Pointer(uintptr(slotKey) + 2*goarch.PtrSize) 67 . . } 68 20ms 20ms slotKey = unsafe.Pointer(uintptr(slotKey) + slotSize) 69 . . ctrls >>= 8 70 . . } 71 10ms 10ms return nil 72 . . } 73 . . 74 . . // Returns true if a and b might be equal. 75 . . // Returns false if a and b are definitely not equal. 76 . . // Requires len(a)>=8.
runtime.mapaccess1_faststr
/usr/lib/go/src/internal/runtime/maps/runtime_faststr_swiss.go
Total: 10ms 90ms (flat, cum) 0.24% 115 . . fatal("concurrent map read and map write") 116 . . return nil 117 . . } 118 . . 119 . . if m.dirLen <= 0 { 120 . 80ms elem := m.getWithoutKeySmallFastStr(typ, key) 121 . . if elem == nil { 122 10ms 10ms return unsafe.Pointer(&zeroVal[0]) 123 . . } 124 . . return elem 125 . . } 126 . . 127 . . k := key
runtime.mapaccess2_faststr
/usr/lib/go/src/internal/runtime/maps/runtime_faststr_swiss.go
Total: 100ms 400ms (flat, cum) 1.07% 157 . . } 158 . . } 159 . . } 160 . . 161 . . //go:linkname runtime_mapaccess2_faststr runtime.mapaccess2_faststr 162 10ms 10ms func runtime_mapaccess2_faststr(typ *abi.SwissMapType, m *Map, key string) (unsafe.Pointer, bool) { 163 . . if race.Enabled && m != nil { 164 . . callerpc := sys.GetCallerPC() 165 . . pc := abi.FuncPCABIInternal(runtime_mapaccess2_faststr) 166 . . race.ReadPC(unsafe.Pointer(m), callerpc, pc) 167 . . } 168 . . 169 50ms 50ms if m == nil || m.Used() == 0 { 170 . . return unsafe.Pointer(&zeroVal[0]), false 171 . . } 172 . . 173 20ms 20ms if m.writing != 0 { 174 . . fatal("concurrent map read and map write") 175 . . return nil, false 176 . . } 177 . . 178 . . if m.dirLen <= 0 { 179 . 290ms elem := m.getWithoutKeySmallFastStr(typ, key) 180 . . if elem == nil { 181 . . return unsafe.Pointer(&zeroVal[0]), false 182 . . } 183 . . return elem, true 184 . . } 185 . . 186 . . k := key 187 . . hash := typ.Hasher(abi.NoEscape(unsafe.Pointer(&k)), m.seed) 188 . . 189 . . // Select table. 190 . . idx := m.directoryIndex(hash) 191 . . t := m.directoryAt(idx) 192 . . 193 . . // Probe table. 194 . . seq := makeProbeSeq(h1(hash), t.groups.lengthMask) 195 . . for ; ; seq = seq.next() { 196 . . g := t.groups.group(typ, seq.offset) 197 . . 198 10ms 10ms match := g.ctrls().matchH2(h2(hash)) return ctrlGroupMatchH2(g, h) group.go:154 v := uint64(g) ^ (bitsetLSB * uint64(h)) group.go:170 199 . . 200 . . for match != 0 { 201 10ms 10ms i := match.first() 202 . . 203 . . slotKey := g.key(typ, i) 204 . 10ms if key == *(*string)(slotKey) { 205 . . slotElem := unsafe.Pointer(uintptr(slotKey) + 2*goarch.PtrSize) 206 . . return slotElem, true 207 . . } 208 . . match = match.removeFirst() 209 . . }
internal/runtime/maps.(*Map).putSlotSmallFastStr
/usr/lib/go/src/internal/runtime/maps/runtime_faststr_swiss.go
Total: 80ms 80ms (flat, cum) 0.21% 215 . . return unsafe.Pointer(&zeroVal[0]), false 216 . . } 217 . . } 218 . . } 219 . . 220 10ms 10ms func (m *Map) putSlotSmallFastStr(typ *abi.SwissMapType, hash uintptr, key string) unsafe.Pointer { 221 . . g := groupReference{ 222 . . data: m.dirPtr, 223 . . } 224 . . 225 20ms 20ms match := g.ctrls().matchH2(h2(hash)) return h & 0x7f map.go:191 ⋮ 226 . . 227 . . // Look for an existing slot containing this key. 228 . . for match != 0 { 229 . . i := match.first() 230 . . 231 . . slotKey := g.key(typ, i) 232 . . if key == *(*string)(slotKey) { 233 . . // Key needs update, as the backing storage may differ. 234 . . *(*string)(slotKey) = key 235 . . slotElem := g.elem(typ, i) 236 . . return slotElem 237 . . } 238 . . match = match.removeFirst() 239 . . } 240 . . 241 . . // There can't be deleted slots, small maps can't have them 242 . . // (see deleteSmall). Use matchEmptyOrDeleted as it is a bit 243 . . // more efficient than matchEmpty. 244 10ms 10ms match = g.ctrls().matchEmptyOrDeleted() return (*ctrlGroup)(g.data) group.go:280 245 . . if match == 0 { 246 . . fatal("small map with no empty slot (concurrent map writes?)") 247 . . } 248 . . 249 . . i := match.first() 250 . . 251 . . slotKey := g.key(typ, i) 252 20ms 20ms *(*string)(slotKey) = key 253 . . 254 20ms 20ms slotElem := g.elem(typ, i) offset := groupSlotsOffset + i*typ.SlotSize + typ.ElemOff group.go:292 ⋮ return unsafe.Pointer(uintptr(g.data) + offset) group.go:294 255 . . 256 . . g.ctrls().set(i, ctrl(h2(hash))) 257 . . m.used++ 258 . .
runtime.mapassign_faststr
/usr/lib/go/src/internal/runtime/maps/runtime_faststr_swiss.go
Total: 80ms 570ms (flat, cum) 1.52% 259 . . return slotElem 260 . . } 261 . . 262 . . //go:linkname runtime_mapassign_faststr runtime.mapassign_faststr 263 10ms 10ms func runtime_mapassign_faststr(typ *abi.SwissMapType, m *Map, key string) unsafe.Pointer { 264 10ms 10ms if m == nil { 265 . . panic(errNilAssign) 266 . . } 267 . . if race.Enabled { 268 . . callerpc := sys.GetCallerPC() 269 . . pc := abi.FuncPCABIInternal(runtime_mapassign_faststr) 270 . . race.WritePC(unsafe.Pointer(m), callerpc, pc) 271 . . } 272 10ms 10ms if m.writing != 0 { 273 . . fatal("concurrent map writes") 274 . . } 275 . . 276 . . k := key 277 30ms 100ms hash := typ.Hasher(abi.NoEscape(unsafe.Pointer(&k)), m.seed) 278 . . 279 . . // Set writing after calling Hasher, since Hasher may panic, in which 280 . . // case we have not actually done a write. 281 10ms 10ms m.writing ^= 1 // toggle, see comment on writing 282 . . 283 . . if m.dirPtr == nil { 284 . 340ms m.growToSmall(typ) 285 . . } 286 . . 287 . . if m.dirLen == 0 { 288 . . if m.used < abi.SwissMapGroupSlots { 289 . 80ms elem := m.putSlotSmallFastStr(typ, hash, key) 290 . . 291 . . if m.writing == 0 { 292 . . fatal("concurrent map writes") 293 . . } 294 10ms 10ms m.writing ^= 1 295 . . 296 . . return elem 297 . . } 298 . . 299 . . // Can't fit another entry, grow to full size map.
runtime.mapdelete_faststr
/usr/lib/go/src/internal/runtime/maps/runtime_faststr_swiss.go
Total: 0 10ms (flat, cum) 0.027% 407 . . 408 . . if m == nil || m.Used() == 0 { 409 . . return 410 . . } 411 . . 412 . 10ms m.Delete(typ, abi.NoEscape(unsafe.Pointer(&key))) 413 . . }
runtime.sellock
/usr/lib/go/src/runtime/select.go
Total: 40ms 160ms (flat, cum) 0.43% 32 . . } 33 . . 34 . . func sellock(scases []scase, lockorder []uint16) { 35 . . var c *hchan 36 . . for _, o := range lockorder { 37 10ms 10ms c0 := scases[o].c 38 20ms 20ms if c0 != c { 39 . . c = c0 40 10ms 130ms lock(&c.lock) ⋮ lockWithRank(l, getLockRank(l)) lock_spinbit.go:152 lock2(l) lockrank_off.go:24 41 . . } 42 . . } 43 . . } 44 . . 45 . . func selunlock(scases []scase, lockorder []uint16) {
runtime.selunlock
/usr/lib/go/src/runtime/select.go
Total: 40ms 180ms (flat, cum) 0.48% 49 . . // First M calls runtime·park() in runtime·selectgo() passing the sel. 50 . . // Once runtime·park() has unlocked the last lock, another M makes 51 . . // the G that calls select runnable again and schedules it for execution. 52 . . // When the G runs on another M, it locks all the locks and frees sel. 53 . . // Now if the first M touches sel, it will access freed memory. 54 20ms 20ms for i := len(lockorder) - 1; i >= 0; i-- { 55 . . c := scases[lockorder[i]].c 56 10ms 10ms if i > 0 && c == scases[lockorder[i-1]].c { 57 . . continue // will unlock it on the next iteration 58 . . } 59 10ms 150ms unlock(&c.lock) unlockWithRank(l) lock_spinbit.go:261 unlock2(l) lockrank_off.go:35 ⋮ 60 . . } 61 . . } 62 . . 63 . . func selparkcommit(gp *g, _ unsafe.Pointer) bool { 64 . . // There are unlocked sudogs that point into gp's stack. Stack
runtime.selparkcommit
/usr/lib/go/src/runtime/select.go
Total: 10ms 60ms (flat, cum) 0.16% 88 . . // any sudog with that channel may change, 89 . . // including c and waitlink. Since multiple 90 . . // sudogs may have the same channel, we unlock 91 . . // only after we've passed the last instance 92 . . // of a channel. 93 10ms 60ms unlock(&lastc.lock) unlockWithRank(l) lock_spinbit.go:261 unlock2(l) lockrank_off.go:35 ⋮ 94 . . } 95 . . lastc = sg.c 96 . . } 97 . . if lastc != nil { 98 . . unlock(&lastc.lock)
runtime.selectgo
/usr/lib/go/src/runtime/select.go
Total: 40ms 40ms (flat, cum) 0.11% 117 . . // 118 . . // selectgo returns the index of the chosen scase, which matches the 119 . . // ordinal position of its respective select{recv,send,default} call. 120 . . // Also, if the chosen scase was a receive operation, it reports whether 121 . . // a value was received. 122 20ms 20ms func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, nsends, nrecvs int, block bool) (int, bool) { 123 . . gp := getg() 124 . . if debugSelect { 125 . . print("select: cas0=", cas0, "\n") 126 . . } 127 . . 128 . . // NOTE: In order to maintain a lean stack size, the number of scases 129 . . // is capped at 65536. 130 . . cas1 := (*[1 << 16]scase)(unsafe.Pointer(cas0)) 131 . . order1 := (*[1 << 17]uint16)(unsafe.Pointer(order0)) 132 . . 133 10ms 10ms ncases := nsends + nrecvs 134 10ms 10ms scases := cas1[:ncases:ncases] 135 . . pollorder := order1[:ncases:ncases] 136 . . lockorder := order1[ncases:][:ncases:ncases] 137 . . // NOTE: pollorder/lockorder's underlying array was not zero-initialized by compiler. 138 . . 139 . . // Even when raceenabled is true, there might be select
runtime.selectgo
/usr/lib/go/src/runtime/select.go
Total: 220ms 330ms (flat, cum) 0.88% 165 . . // optimizing (and needing to test). 166 . . 167 . . // generate permuted order 168 . . norder := 0 169 . . allSynctest := true 170 10ms 10ms for i := range scases { 171 . . cas := &scases[i] 172 . . 173 . . // Omit cases without channels from the poll and lock orders. 174 . . if cas.c == nil { 175 . . cas.elem = nil // allow GC 176 . . continue 177 . . } 178 . . 179 10ms 10ms if cas.c.bubble != nil { 180 . . if getg().bubble != cas.c.bubble { 181 . . fatal("select on synctest channel from outside bubble") 182 . . } 183 . . } else { 184 . . allSynctest = false 185 . . } 186 . . 187 . . if cas.c.timer != nil { 188 . . cas.c.timer.maybeRunChan(cas.c) 189 . . } 190 . . 191 20ms 20ms j := cheaprandn(uint32(norder + 1)) return uint32((uint64(cheaprand()) * uint64(n)) >> 32) rand.go:293 mp.cheaprand += 0xa0761d6478bd642f rand.go:235 ⋮ 192 40ms 40ms pollorder[norder] = pollorder[j] 193 . . pollorder[j] = uint16(i) 194 . . norder++ 195 . . } 196 . . pollorder = pollorder[:norder] 197 . . lockorder = lockorder[:norder] 198 . . 199 . . waitReason := waitReasonSelect 200 10ms 10ms if gp.bubble != nil && allSynctest { 201 . . // Every channel selected on is in a synctest bubble, 202 . . // so this goroutine will count as idle while selecting. 203 . . waitReason = waitReasonSynctestSelect 204 . . } 205 . . 206 . . // sort the cases by Hchan address to get the locking order. 207 . . // simple heap sort, to guarantee n log n time and constant stack footprint. 208 . . for i := range lockorder { 209 . . j := i 210 . . // Start with the pollorder to permute cases on the same channel. 211 10ms 10ms c := scases[pollorder[i]].c 212 30ms 30ms for j > 0 && scases[lockorder[(j-1)/2]].c.sortkey() < c.sortkey() { return uintptr(unsafe.Pointer(c)) select.go:546 ⋮ 213 . . k := (j - 1) / 2 214 30ms 30ms lockorder[j] = lockorder[k] 215 . . j = k 216 . . } 217 20ms 20ms lockorder[j] = pollorder[i] 218 . . } 219 . . for i := len(lockorder) - 1; i >= 0; i-- { 220 . . o := lockorder[i] 221 . . c := scases[o].c 222 . . lockorder[i] = lockorder[0] 223 . . j := 0 224 . . for { 225 . . k := j*2 + 1 226 . . if k >= i { 227 . . break 228 . . } 229 30ms 30ms if k+1 < i && scases[lockorder[k]].c.sortkey() < scases[lockorder[k+1]].c.sortkey() { 230 . . k++ 231 . . } 232 . . if c.sortkey() < scases[lockorder[k]].c.sortkey() { 233 10ms 10ms lockorder[j] = lockorder[k] 234 . . j = k 235 . . continue 236 . . } 237 . . break 238 . . } 239 . . lockorder[j] = o 240 . . } 241 . . 242 . . if debugSelect { 243 . . for i := 0; i+1 < len(lockorder); i++ { 244 . . if scases[lockorder[i]].c.sortkey() > scases[lockorder[i+1]].c.sortkey() { 245 . . print("i=", i, " x=", lockorder[i], " y=", lockorder[i+1], "\n") 246 . . throw("select: broken sort") 247 . . } 248 . . } 249 . . } 250 . . 251 . . // lock all the channels involved in the select 252 . 110ms sellock(scases, lockorder) 253 . . 254 . . var ( 255 . . sg *sudog 256 . . c *hchan 257 . . k *scase
runtime.selectgo
/usr/lib/go/src/runtime/select.go
Total: 140ms 260ms (flat, cum) 0.69% 310 . . if gp.waiting != nil { 311 . . throw("gp.waiting != nil") 312 . . } 313 . . nextp = &gp.waiting 314 . . for _, casei := range lockorder { 315 10ms 10ms casi = int(casei) 316 . . cas = &scases[casi] 317 . . c = cas.c 318 . 50ms sg := acquireSudog() 319 10ms 10ms sg.g = gp 320 . . sg.isSelect = true 321 . . // No stack splits between assigning elem and enqueuing 322 . . // sg on gp.waiting where copystack can find it. 323 . . sg.elem = cas.elem 324 . . sg.releasetime = 0 325 . . if t0 != 0 { 326 . . sg.releasetime = -1 327 . . } 328 . . sg.c = c 329 . . // Construct waiting list in lock order. 330 . . *nextp = sg 331 . . nextp = &sg.waitlink 332 . . 333 . . if casi < nsends { 334 . . c.sendq.enqueue(sg) 335 . . } else { 336 10ms 10ms c.recvq.enqueue(sg) sgp.next = nil chan.go:873 337 . . } 338 . . 339 . . if c.timer != nil { 340 . . blockTimerChan(c) 341 . . } 342 . . } 343 . . 344 . . // wait for someone to wake us up 345 . . gp.param = nil 346 . . // Signal to anyone trying to shrink our stack that we're about 347 . . // to park on a channel. The window between when this G's status 348 . . // changes and when we set gp.activeStackChans is not safe for 349 . . // stack shrinking. 350 . . gp.parkingOnChan.Store(true) 351 . 20ms gopark(selparkcommit, nil, waitReason, traceBlockSelect, 1) 352 . . gp.activeStackChans = false 353 . . 354 . 50ms sellock(scases, lockorder) 355 . . 356 . . gp.selectDone.Store(0) 357 . . sg = (*sudog)(gp.param) 358 . . gp.param = nil 359 . . 360 . . // pass 3 - dequeue from unsuccessful chans 361 . . // otherwise they stack up on quiet channels 362 . . // record the successful case, if any. 363 . . // We singly-linked up the SudoGs in lock order. 364 . . casi = -1 365 . . cas = nil 366 . . caseSuccess = false 367 . . sglist = gp.waiting 368 . . // Clear all elem before unlinking from gp.waiting. 369 50ms 50ms for sg1 := gp.waiting; sg1 != nil; sg1 = sg1.waitlink { 370 . . sg1.isSelect = false 371 10ms 10ms sg1.elem = nil 372 . . sg1.c = nil 373 . . } 374 . . gp.waiting = nil 375 . . 376 . . for _, casei := range lockorder { 377 10ms 10ms k = &scases[casei] 378 10ms 10ms if k.c.timer != nil { 379 . . unblockTimerChan(k.c) 380 . . } 381 10ms 10ms if sg == sglist { 382 . . // sg has already been dequeued by the G that woke us up. 383 . . casi = int(casei) 384 . . cas = k 385 . . caseSuccess = sglist.success 386 . . if sglist.releasetime > 0 { 387 . . caseReleaseTime = sglist.releasetime 388 . . } 389 . . } else { 390 . . c = k.c 391 . . if int(casei) < nsends { 392 . . c.sendq.dequeueSudoG(sglist) 393 . . } else { 394 10ms 10ms c.recvq.dequeueSudoG(sglist) 395 . . } 396 . . } 397 10ms 10ms sgnext = sglist.waitlink 398 . . sglist.waitlink = nil 399 . . releaseSudog(sglist) 400 . . sglist = sgnext 401 . . } 402 . .
runtime.selectgo
/usr/lib/go/src/runtime/select.go
Total: 10ms 200ms (flat, cum) 0.53% 438 . . } else if cas.elem != nil { 439 . . asanwrite(cas.elem, c.elemtype.Size_) 440 . . } 441 . . } 442 . . 443 . 60ms selunlock(scases, lockorder) 444 . . goto retc 445 . . 446 . . bufrecv: 447 . . // can receive from buffer 448 . . if raceenabled { 449 . . if cas.elem != nil { 450 . . raceWriteObjectPC(c.elemtype, cas.elem, casePC(casi), chanrecvpc) 451 . . } 452 . . racenotify(c, c.recvx, nil) 453 . . } 454 . . if msanenabled && cas.elem != nil { 455 . . msanwrite(cas.elem, c.elemtype.Size_) 456 . . } 457 . . if asanenabled && cas.elem != nil { 458 . . asanwrite(cas.elem, c.elemtype.Size_) 459 . . } 460 . . recvOK = true 461 . . qp = chanbuf(c, c.recvx) 462 . . if cas.elem != nil { 463 . 20ms typedmemmove(c.elemtype, cas.elem, qp) 464 . . } 465 . . typedmemclr(c.elemtype, qp) 466 10ms 10ms c.recvx++ 467 . . if c.recvx == c.dataqsiz { 468 . . c.recvx = 0 469 . . } 470 . . c.qcount-- 471 . 110ms selunlock(scases, lockorder) 472 . . goto retc 473 . . 474 . . bufsend: 475 . . // can send to buffer 476 . . if raceenabled {
runtime.selectgo.func2
/usr/lib/go/src/runtime/select.go
Total: 0 40ms (flat, cum) 0.11% 492 . . selunlock(scases, lockorder) 493 . . goto retc 494 . . 495 . . recv: 496 . . // can receive from sleeping sender (sg) 497 . 40ms recv(c, sg, cas.elem, func() { selunlock(scases, lockorder) }, 2) 498 . . if debugSelect { 499 . . print("syncrecv: cas0=", cas0, " c=", c, "\n") 500 . . } 501 . . recvOK = true 502 . . goto retc
runtime.(*hchan).sortkey
/usr/lib/go/src/runtime/select.go
Total: 10ms 10ms (flat, cum) 0.027% 541 . . selunlock(scases, lockorder) 542 . . panic(plainError("send on closed channel")) 543 . . } 544 . . 545 . . func (c *hchan) sortkey() uintptr { 546 10ms 10ms return uintptr(unsafe.Pointer(c)) 547 . . } 548 . . 549 . . // A runtimeSelect is a single case passed to rselect. 550 . . // This must match ../reflect/value.go:/runtimeSelect 551 . . type runtimeSelect struct {
git.urbach.dev/cli/q/src/ssa.NewBlock
/home/user/q/src/ssa/Block.go
Total: 0 90ms (flat, cum) 0.24% 16 . . Predecessors []*Block 17 . . } 18 . . 19 . . // NewBlock creates a new basic block. 20 . . func NewBlock(label string) *Block { 21 . 20ms return &Block{ 22 . 70ms Instructions: make([]Value, 0, 8), 23 . . Label: label, 24 . . } 25 . . }
git.urbach.dev/cli/q/src/ssa.(*Block).AddSuccessor
/home/user/q/src/ssa/Block.go
Total: 60ms 650ms (flat, cum) 1.73% 26 . . 27 . . // AddSuccessor adds the given block as a successor. 28 . . func (b *Block) AddSuccessor(successor *Block) { 29 . 60ms successor.Predecessors = append(successor.Predecessors, b) 30 . . 31 . . if len(b.Protected) > 0 { 32 . . if successor.Protected == nil { 33 . . successor.Protected = make(map[Value][]Value, len(b.Protected)) 34 . . } 35 . . 36 . . maps.Copy(successor.Protected, b.Protected) 37 . . } 38 . . 39 . . if b.Identifiers == nil { 40 . . return 41 . . } 42 . . 43 . . if successor.Identifiers == nil { 44 . 60ms successor.Identifiers = make(map[string]Value, len(b.Identifiers)) 45 . . 46 10ms 10ms if len(successor.Predecessors) == 1 { 47 . 270ms maps.Copy(successor.Identifiers, b.Identifiers) for k, v := range src { maps.go:63 ⋮ dst[k] = v maps.go:64 ⋮ for k, v := range src { maps.go:63 48 10ms 10ms return 49 . . } 50 . . } 51 . . 52 10ms 20ms keys := make(map[string]struct{}, max(len(b.Identifiers), len(successor.Identifiers))) 53 . . 54 10ms 10ms for name := range successor.Identifiers { 55 10ms 20ms keys[name] = struct{}{} 56 . . } 57 . . 58 . 20ms for name := range b.Identifiers { 59 . 30ms keys[name] = struct{}{} 60 . . } 61 . . 62 . 10ms for name := range keys { 63 . 40ms oldValue, oldExists := successor.Identifiers[name] 64 . 30ms newValue, newExists := b.Identifiers[name] 65 . . 66 . . switch { 67 . . case oldExists: 68 . . if oldValue == newValue { 69 . . continue 70 . . } 71 . . 72 10ms 10ms definedLocally := successor.Index(oldValue) != -1 73 . . 74 . . if definedLocally { 75 . . phi, isPhi := oldValue.(*Phi) 76 . . 77 . . if isPhi { 78 . . if newExists { 79 . . phi.Arguments = append(phi.Arguments, newValue) 80 . . } else { 81 . . phi.Arguments = append(phi.Arguments, Undefined) 82 . . } 83 . . } 84 . . 85 . . continue 86 . . } 87 . . 88 . 30ms phi := &Phi{ 89 . 10ms Arguments: make([]Value, len(successor.Predecessors)-1, len(successor.Predecessors)), 90 . . Typ: oldValue.Type(), 91 . . } 92 . . 93 . . for i := range phi.Arguments { 94 . . phi.Arguments[i] = oldValue 95 . . } 96 . . 97 . 10ms successor.InsertAt(phi, 0) b.Instructions = slices.Insert(b.Instructions, index, value) Block.go:234 98 . . successor.Identifiers[name] = phi 99 . . 100 . . if newExists { 101 . . phi.Arguments = append(phi.Arguments, newValue) 102 . . } else {
git.urbach.dev/cli/q/src/ssa.(*Block).AddSuccessor
/home/user/q/src/ssa/Block.go
Total: 10ms 10ms (flat, cum) 0.027% 116 . . successor.InsertAt(phi, 0) 117 . . successor.Identifiers[name] = phi 118 . . phi.Arguments = append(phi.Arguments, newValue) 119 . . } 120 . . } 121 10ms 10ms } 122 . .
git.urbach.dev/cli/q/src/ssa.(*Block).Append
/home/user/q/src/ssa/Block.go
Total: 40ms 90ms (flat, cum) 0.24% 124 . . func (b *Block) Append(value Value) { 125 40ms 90ms b.Instructions = append(b.Instructions, value) 126 . . } 127 . .
git.urbach.dev/cli/q/src/ssa.(*Block).CanReachPredecessor
/home/user/q/src/ssa/Block.go
Total: 0 90ms (flat, cum) 0.24% 128 . . // CanReachPredecessor checks if the `other` block appears as a predecessor or is the block itself. 129 . . func (b *Block) CanReachPredecessor(other *Block) bool { 130 . 90ms return b.canReachPredecessor(other, make(map[*Block]bool)) 131 . . } 132 . .
git.urbach.dev/cli/q/src/ssa.(*Block).canReachPredecessor
/home/user/q/src/ssa/Block.go
Total: 20ms 70ms (flat, cum) 0.19% 133 . . // canReachPredecessor checks if the `other` block appears as a predecessor or is the block itself. 134 . . func (b *Block) canReachPredecessor(other *Block, traversed map[*Block]bool) bool { 135 10ms 10ms if other == b { 136 . . return true 137 . . } 138 . . 139 . . if traversed[b] { 140 . . return false 141 . . } 142 . . 143 . 50ms traversed[b] = true 144 . . 145 . . for _, pre := range b.Predecessors { 146 . . if pre.canReachPredecessor(other, traversed) { 147 . . return true 148 . . } 149 . . } 150 . . 151 10ms 10ms return false 152 . . } 153 . . 154 . . // Contains checks if the value exists within the block. 155 . . func (b *Block) Contains(value Value) bool {
git.urbach.dev/cli/q/src/ssa.(*Block).FindExisting
/home/user/q/src/ssa/Block.go
Total: 180ms 260ms (flat, cum) 0.69% 157 . . } 158 . . 159 . . // FindExisting returns an equal instruction that's already appended or `nil` if none could be found. 160 . . func (b *Block) FindExisting(instr Value) Value { 161 50ms 110ms if !instr.IsConst() { 162 10ms 10ms return nil 163 . . } 164 . . 165 120ms 140ms for _, existing := range slices.Backward(b.Instructions) { if !yield(i, s[i]) { iter.go:29 if existing.IsConst() && instr.Equals(existing) { Block.go:166 ⋮ ⋮ ⋮ ⋮ ⋮ if !yield(i, s[i]) { iter.go:29 ⋮ switch existing.(type) { Block.go:172 ⋮ if existing.IsConst() && instr.Equals(existing) { Block.go:166 ⋮ ⋮ switch existing.(type) { Block.go:172
git.urbach.dev/cli/q/src/ssa.(*Block).FindExisting-range1
/home/user/q/src/ssa/Block.go
Total: 70ms 90ms (flat, cum) 0.24% 166 50ms 70ms if existing.IsConst() && instr.Equals(existing) { 167 . . return existing 168 . . } 169 . . 170 . . // If we encounter a call, we can't be sure that the value is still the same. 171 . . // TODO: This is a bit too conservative. We could check if the call affects the value. 172 20ms 20ms switch existing.(type) { 173 . . case *Call, *CallExtern:
git.urbach.dev/cli/q/src/ssa.(*Block).FindExisting
/home/user/q/src/ssa/Block.go
Total: 60ms 60ms (flat, cum) 0.16% 175 . . } 176 40ms 40ms } 177 . . 178 20ms 20ms return nil 179 . . } 180 . .
git.urbach.dev/cli/q/src/ssa.(*Block).FindIdentifier
/home/user/q/src/ssa/Block.go
Total: 20ms 110ms (flat, cum) 0.29% 182 . . // can have and combines them to a phi instruction if necessary. 183 . . func (b *Block) FindIdentifier(name string) (value Value, exists bool) { 184 20ms 110ms value, exists = b.Identifiers[name] 185 . . return 186 . . } 187 . . 188 . . // IdentifiersFor returns an iterator for all the identifiers pointing to the given value. 189 . . func (b *Block) IdentifiersFor(value Value) iter.Seq[string] {
git.urbach.dev/cli/q/src/ssa.(*Block).Identify
/home/user/q/src/ssa/Block.go
Total: 20ms 380ms (flat, cum) 1.01% 198 . . } 199 . . } 200 . . 201 . . // Identify adds a new identifier or changes an existing one. 202 . . func (b *Block) Identify(name string, value Value) { 203 20ms 20ms if b.Identifiers == nil { 204 . 80ms b.Identifiers = make(map[string]Value, 8) 205 . . } 206 . . 207 . 280ms b.Identifiers[name] = value 208 . . } 209 . .
git.urbach.dev/cli/q/src/ssa.(*Block).IsIdentified
/home/user/q/src/ssa/Block.go
Total: 20ms 30ms (flat, cum) 0.08% 210 . . // IsIdentified returns true if the value can be obtained from one of the identifiers. 211 . . func (b *Block) IsIdentified(value Value) bool { 212 20ms 30ms for _, existing := range b.Identifiers { 213 . . if existing == value { 214 . . return true 215 . . } 216 . . } 217 . .
git.urbach.dev/cli/q/src/ssa.(*Block).InsertAt
/home/user/q/src/ssa/Block.go
Total: 0 10ms (flat, cum) 0.027% 229 . . return -1 230 . . } 231 . . 232 . . // InsertAt inserts the `value` at the given `index`. 233 . . func (b *Block) InsertAt(value Value, index int) { 234 . 10ms b.Instructions = slices.Insert(b.Instructions, index, value) 235 . . } 236 . .
git.urbach.dev/cli/q/src/ssa.(*Block).Last
/home/user/q/src/ssa/Block.go
Total: 20ms 20ms (flat, cum) 0.053% 237 . . // Last returns the last value. 238 . . func (b *Block) Last() Value { 239 20ms 20ms if len(b.Instructions) == 0 { 240 . . return nil 241 . . } 242 . . 243 . . return b.Instructions[len(b.Instructions)-1] 244 . . }
git.urbach.dev/cli/q/src/ssa.(*Block).Phis
/home/user/q/src/ssa/Block.go
Total: 10ms 40ms (flat, cum) 0.11% 246 . . // Phis is an iterator for all phis at the top of the block. 247 . . func (b *Block) Phis(yield func(*Phi) bool) { 248 . . for _, instr := range b.Instructions { 249 . . phi, isPhi := instr.(*Phi) 250 . . 251 10ms 40ms if !isPhi || !yield(phi) { ⋮ instr.Replace(oldValue, phi) compileLoop.go:132 252 . . return 253 . . } 254 . . } 255 . . } 256 . .
git.urbach.dev/cli/q/src/ssa.(*Block).RemoveAt
/home/user/q/src/ssa/Block.go
Total: 10ms 20ms (flat, cum) 0.053% 257 . . // RemoveAt sets the value at the given index to nil. 258 . . func (b *Block) RemoveAt(index int) { 259 . . value := b.Instructions[index] 260 . . 261 . . for _, input := range value.Inputs() { 262 . 10ms input.RemoveUser(value) 263 . . } 264 . . 265 . . b.Instructions[index] = nil 266 10ms 10ms } 267 . .
git.urbach.dev/cli/q/src/ssa.(*Block).RemoveNilValues
/home/user/q/src/ssa/Block.go
Total: 0 110ms (flat, cum) 0.29% 269 . . func (b *Block) RemoveNilValues() { 270 . 110ms b.Instructions = slices.DeleteFunc(b.Instructions, func(value Value) bool { 271 . . return value == nil 272 . . }) 273 . . } 274 . . 275 . . // ReplaceAllUses replaces all uses of `old` with `new`.
git.urbach.dev/cli/q/src/ssa.(*Block).Unidentify
/home/user/q/src/ssa/Block.go
Total: 0 40ms (flat, cum) 0.11% 284 . . return CleanLabel(b.Label) 285 . . } 286 . . 287 . . // Unidentify deletes the identifier for the given value. 288 . . func (b *Block) Unidentify(value Value) { 289 . 20ms for name, existing := range b.Identifiers { 290 . 10ms if existing == value { 291 . 10ms delete(b.Identifiers, name) 292 . . return 293 . . } 294 . . } 295 . . } 296 . .
git.urbach.dev/cli/q/src/ssa.(*IR).AddBlock
/home/user/q/src/ssa/IR.go
Total: 0 30ms (flat, cum) 0.08% 5 . . Blocks []*Block 6 . . } 7 . . 8 . . // AddBlock adds a new block to the function. 9 . . func (ir *IR) AddBlock(block *Block) { 10 . 30ms ir.Blocks = append(ir.Blocks, block) 11 . . }
git.urbach.dev/cli/q/src/ssa.(*IR).Append
/home/user/q/src/ssa/IR.go
Total: 110ms 480ms (flat, cum) 1.28% 13 . . // Append adds a new value to the last block. 14 10ms 10ms func (ir *IR) Append(instr Value) Value { 15 30ms 350ms existing := ir.Block().FindExisting(instr) return ir.Blocks[len(ir.Blocks)-1] IR.go:27 ⋮ ⋮ 16 . . 17 10ms 10ms if existing != nil { 18 . . return existing 19 . . } 20 . . 21 60ms 110ms ir.Block().Append(instr) b.Instructions = append(b.Instructions, value) Block.go:125 ⋮ ⋮ ⋮ ⋮ return ir.Blocks[len(ir.Blocks)-1] IR.go:27 22 . . return instr 23 . . }
git.urbach.dev/cli/q/src/ssa.(*IR).Block
/home/user/q/src/ssa/IR.go
Total: 100ms 100ms (flat, cum) 0.27% 25 . . // Block returns the last block. 26 . . func (ir *IR) Block() *Block { 27 100ms 100ms return ir.Blocks[len(ir.Blocks)-1] 28 . . } 29 . .
git.urbach.dev/cli/q/src/ssa.(*IR).ComputeUsers
/home/user/q/src/ssa/IR.go
Total: 160ms 790ms (flat, cum) 2.10% 31 . . func (ir *IR) ComputeUsers() { 32 . . for _, block := range ir.Blocks { 33 40ms 40ms for _, value := range block.Instructions { 34 40ms 240ms for _, input := range value.Inputs() { 35 80ms 510ms input.AddUser(value) 36 . . } 37 . . } 38 . . } 39 . . } 40 . .
git.urbach.dev/cli/q/src/ssa.(*IR).ExitBlocks
/home/user/q/src/ssa/IR.go
Total: 30ms 100ms (flat, cum) 0.27% 49 . . return count 50 . . } 51 . . 52 . . // ExitBlocks is an iterator for all exit blocks. 53 . . func (ir *IR) ExitBlocks(yield func(*Block) bool) { 54 30ms 30ms for _, block := range ir.Blocks { 55 . . _, returns := block.Last().(*Return) 56 . . 57 . . if !returns { 58 . . continue 59 . . } 60 . . 61 . 70ms if !yield(block) { for _, value := range exitBlock.Identifiers { verifyDeallocation.go:12 ⋮ resource, isResource := value.Type().(*types.Resource) verifyDeallocation.go:23 ⋮ for _, value := range exitBlock.Identifiers { verifyDeallocation.go:12 62 . . return 63 . . } 64 . . } 65 . . }
git.urbach.dev/cli/q/src/ssa.(*IR).IsIdentified
/home/user/q/src/ssa/IR.go
Total: 20ms 30ms (flat, cum) 0.08% 66 . . 67 . . // IsIdentified returns true if the value can be obtained from one of the identifiers. 68 . . func (ir *IR) IsIdentified(value Value) bool { 69 . . for _, block := range ir.Blocks { 70 20ms 30ms if block.IsIdentified(value) { for _, existing := range b.Identifiers { Block.go:212 ⋮ ⋮ 71 . . return true 72 . . } 73 . . } 74 . . 75 . . return false
git.urbach.dev/cli/q/src/ssa.(*IR).ReplaceAll
/home/user/q/src/ssa/IR.go
Total: 50ms 50ms (flat, cum) 0.13% 76 . . } 77 . . 78 . . // ReplaceAll replaces all occurrences of the given `old` value with the `new` value. 79 . . func (ir *IR) ReplaceAll(old Value, new Value) { 80 . . for _, block := range ir.Blocks { 81 20ms 20ms for _, value := range block.Instructions { 82 30ms 30ms value.Replace(old, new) 83 . . } 84 . . } 85 . . }
git.urbach.dev/cli/q/src/token.Tokenize
/home/user/q/src/token/Tokenize.go
Total: 360ms 990ms (flat, cum) 2.64% 1 . . package token 2 . . 3 . . // Tokenize turns the file contents into a list of tokens. 4 10ms 10ms func Tokenize(buffer []byte) List { 5 . . var ( 6 . . i Position 7 . 330ms tokens = make(List, 0, 8+len(buffer)/2) 8 . . ) 9 . . 10 10ms 10ms for i < Position(len(buffer)) { 11 100ms 100ms switch buffer[i] { 12 50ms 50ms case ' ', '\t', '\r': 13 . . case ',': 14 . . tokens = append(tokens, Token{Kind: Separator, Position: i, Length: 1}) 15 . . case '(': 16 20ms 20ms tokens = append(tokens, Token{Kind: GroupStart, Position: i, Length: 1}) 17 10ms 10ms case ')': 18 10ms 10ms tokens = append(tokens, Token{Kind: GroupEnd, Position: i, Length: 1}) 19 . . case '{': 20 . . tokens = append(tokens, Token{Kind: BlockStart, Position: i, Length: 1}) 21 . . case '}': 22 . . tokens = append(tokens, Token{Kind: BlockEnd, Position: i, Length: 1}) 23 . . case '[': 24 . . tokens = append(tokens, Token{Kind: ArrayStart, Position: i, Length: 1}) 25 . . case ']': 26 . . tokens = append(tokens, Token{Kind: ArrayEnd, Position: i, Length: 1}) 27 . . case '\n': 28 . . tokens = append(tokens, Token{Kind: NewLine, Position: i, Length: 1}) 29 10ms 10ms case '-': 30 10ms 40ms tokens, i = dash(tokens, buffer, i) 31 20ms 20ms case '/': 32 . . tokens, i = slash(tokens, buffer, i) 33 . . continue 34 10ms 10ms case '"', '\'': 35 . 20ms tokens, i = quote(tokens, buffer, i) 36 . . continue 37 10ms 10ms case '0': 38 . 20ms tokens, i = zero(tokens, buffer, i) 39 . . continue 40 . . case '#': 41 . . tokens, i = hash(tokens, buffer, i) 42 . . continue 43 . . default: 44 30ms 30ms if isIdentifierStart(buffer[i]) { return isLetter(c) || c == '_' identifier.go:53 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') identifier.go:57 ⋮ 45 30ms 250ms tokens, i = identifier(tokens, buffer, i) 46 . . continue 47 . . } 48 . . 49 10ms 10ms if isDigit(buffer[i]) { return c >= '0' && c <= '9' digit.go:25 50 . 10ms tokens, i = digit(tokens, buffer, i) 51 . . continue 52 . . } 53 . . 54 20ms 20ms if isOperator(buffer[i]) { switch c { operator.go:82 ⋮ case '=', ':', '.', '+', '-', '*', '/', '<', '>', '&', '|', '^', '%', '!': operator.go:83 55 . . tokens, i = operator(tokens, buffer, i) 56 . . continue 57 . . } 58 . . 59 . . tokens = append(tokens, Token{Kind: Invalid, Position: i, Length: 1})
internal/runtime/maps.newTable
/usr/lib/go/src/internal/runtime/maps/table.go
Total: 0 130ms (flat, cum) 0.35% 74 . . func newTable(typ *abi.SwissMapType, capacity uint64, index int, localDepth uint8) *table { 75 . . if capacity < abi.SwissMapGroupSlots { 76 . . capacity = abi.SwissMapGroupSlots 77 . . } 78 . . 79 . 10ms t := &table{ 80 . . index: index, 81 . . localDepth: localDepth, 82 . . } 83 . . 84 . . if capacity > maxTableCapacity { 85 . . panic("initial table capacity too large") 86 . . } 87 . . 88 . . // N.B. group count must be a power of two for probeSeq to visit every 89 . . // group. 90 . . capacity, overflow := alignUpPow2(capacity) 91 . . if overflow { 92 . . panic("rounded-up capacity overflows uint64") 93 . . } 94 . . 95 . 120ms t.reset(typ, uint16(capacity)) 96 . . 97 . . return t 98 . . } 99 . .
internal/runtime/maps.(*table).reset
/usr/lib/go/src/internal/runtime/maps/table.go
Total: 0 120ms (flat, cum) 0.32% 100 . . // reset resets the table with new, empty groups with the specified new total 101 . . // capacity. 102 . . func (t *table) reset(typ *abi.SwissMapType, capacity uint16) { 103 . . groupCount := uint64(capacity) / abi.SwissMapGroupSlots 104 . 120ms t.groups = newGroups(typ, groupCount) data: newarray(typ.Group, int(length)), group.go:316 105 . . t.capacity = capacity 106 . . t.growthLeft = t.maxGrowthLeft() 107 . . 108 . . for i := uint64(0); i <= t.groups.lengthMask; i++ { 109 . . g := t.groups.group(typ, i)
internal/runtime/maps.(*Iter).Init
/usr/lib/go/src/internal/runtime/maps/table.go
Total: 70ms 140ms (flat, cum) 0.37% 646 . . // are the group index. 647 . . entryIdx uint64 648 . . } 649 . . 650 . . // Init initializes Iter for iteration. 651 10ms 10ms func (it *Iter) Init(typ *abi.SwissMapType, m *Map) { 652 10ms 10ms it.typ = typ 653 . . 654 20ms 20ms if m == nil || m.used == 0 { 655 . . return 656 . . } 657 . . 658 . . dirIdx := 0 659 . . var groupSmall groupReference 660 . . if m.dirLen <= 0 { 661 . . // Use dirIdx == -1 as sentinel for small maps. 662 . . dirIdx = -1 663 . . groupSmall.data = m.dirPtr 664 . . } 665 . . 666 10ms 10ms it.m = m 667 . 40ms it.entryOffset = rand() 668 10ms 40ms it.dirOffset = rand() 669 . . it.globalDepth = m.globalDepth 670 . . it.dirIdx = dirIdx 671 10ms 10ms it.group = groupSmall 672 . . it.clearSeq = m.clearSeq 673 . . } 674 . . 675 . . func (it *Iter) Initialized() bool { 676 . . return it.typ != nil
internal/runtime/maps.(*Iter).Next
/usr/lib/go/src/internal/runtime/maps/table.go
Total: 180ms 180ms (flat, cum) 0.48% 778 . . // 779 . . // The table can be mutated during iteration, though there is no guarantee that 780 . . // the mutations will be visible to the iteration. 781 . . // 782 . . // Init must be called prior to Next. 783 10ms 10ms func (it *Iter) Next() { 784 . . if it.m == nil { 785 . . // Map was empty at Iter.Init. 786 10ms 10ms it.key = nil 787 . . it.elem = nil 788 10ms 10ms return 789 . . } 790 . . 791 10ms 10ms if it.m.writing != 0 { 792 . . fatal("concurrent map iteration and map write") 793 . . return 794 . . } 795 . . 796 . . if it.dirIdx < 0 { 797 . . // Map was small at Init. 798 40ms 40ms for ; it.entryIdx < abi.SwissMapGroupSlots; it.entryIdx++ { 799 . . k := uintptr(it.entryIdx+it.entryOffset) % abi.SwissMapGroupSlots 800 . . 801 20ms 20ms if (it.group.ctrls().get(k) & ctrlEmpty) == ctrlEmpty { 802 . . // Empty or deleted. 803 . . continue 804 . . } 805 . . 806 80ms 80ms key := it.group.key(it.typ, k) offset := groupSlotsOffset + i*typ.SlotSize group.go:285 ⋮ 807 . . if it.typ.IndirectKey() { 808 . . key = *((*unsafe.Pointer)(key)) 809 . . } 810 . . 811 . . // As below, if we have grown to a full map since Init,
internal/runtime/maps.(*Iter).Next
/usr/lib/go/src/internal/runtime/maps/table.go
Total: 50ms 50ms (flat, cum) 0.13% 830 . . } else { 831 . . key = newKey 832 . . elem = newElem 833 . . } 834 . . } else { 835 10ms 10ms elem = it.group.elem(it.typ, k) offset := groupSlotsOffset + i*typ.SlotSize + typ.ElemOff group.go:292 836 . . if it.typ.IndirectElem() { 837 . . elem = *((*unsafe.Pointer)(elem)) 838 . . } 839 . . } 840 . . 841 . . it.entryIdx++ 842 . . it.key = key 843 . . it.elem = elem 844 10ms 10ms return 845 . . } 846 30ms 30ms it.key = nil 847 . . it.elem = nil 848 . . return 849 . . } 850 . . 851 . . if it.globalDepth != it.m.globalDepth {
internal/runtime/maps.(*Iter).Next
/usr/lib/go/src/internal/runtime/maps/table.go
Total: 20ms 20ms (flat, cum) 0.053% 889 . . // Continue iteration until we find a full slot. 890 . . for ; it.dirIdx < it.m.dirLen; it.nextDirIdx() { 891 . . // Resolve the table. 892 . . if it.tab == nil { 893 . . dirIdx := int((uint64(it.dirIdx) + it.dirOffset) & uint64(it.m.dirLen-1)) 894 10ms 10ms newTab := it.m.directoryAt(uintptr(dirIdx)) 895 10ms 10ms if newTab.index != dirIdx { 896 . . // Normally we skip past all duplicates of the 897 . . // same entry in the table (see updates to 898 . . // it.dirIdx at the end of the loop below), so 899 . . // this case wouldn't occur. 900 . . //
internal/runtime/maps.(*Iter).Next
/usr/lib/go/src/internal/runtime/maps/table.go
Total: 10ms 10ms (flat, cum) 0.027% 932 . . // it is cheaper to check a single slot than do a full control 933 . . // match. 934 . . 935 . . entryIdx := (it.entryIdx + it.entryOffset) & entryMask 936 . . slotIdx := uintptr(entryIdx & (abi.SwissMapGroupSlots - 1)) 937 10ms 10ms if slotIdx == 0 || it.group.data == nil { 938 . . // Only compute the group (a) when we switch 939 . . // groups (slotIdx rolls over) and (b) on the 940 . . // first iteration in this table (slotIdx may 941 . . // not be zero due to entryOffset). 942 . . groupIdx := entryIdx >> abi.SwissMapGroupSlotsBits
runtime.acquirem
/usr/lib/go/src/runtime/runtime1.go
Total: 190ms 190ms (flat, cum) 0.51% 626 . . // Helpers for Go. Must be NOSPLIT, must only call NOSPLIT functions, and must not block. 627 . . 628 . . //go:nosplit 629 . . func acquirem() *m { 630 . . gp := getg() 631 110ms 110ms gp.m.locks++ 632 80ms 80ms return gp.m 633 . . } 634 . .
runtime.releasem
/usr/lib/go/src/runtime/runtime1.go
Total: 120ms 120ms (flat, cum) 0.32% 636 . . func releasem(mp *m) { 637 . . gp := getg() 638 50ms 50ms mp.locks-- 639 70ms 70ms if mp.locks == 0 && gp.preempt { 640 . . // restore the preemption request in case we've cleared it in newstack 641 . . gp.stackguard0 = stackPreempt 642 . . } 643 . . } 644 . .
runtime.interhash
/usr/lib/go/src/runtime/alg.go
Total: 110ms 130ms (flat, cum) 0.35% 128 . . func c128hash(p unsafe.Pointer, h uintptr) uintptr { 129 . . x := (*[2]float64)(p) 130 . . return f64hash(unsafe.Pointer(&x[1]), f64hash(unsafe.Pointer(&x[0]), h)) 131 . . } 132 . . 133 10ms 10ms func interhash(p unsafe.Pointer, h uintptr) uintptr { 134 . . a := (*iface)(p) 135 . . tab := a.tab 136 . . if tab == nil { 137 . . return h 138 . . } 139 . . t := tab.Type 140 70ms 70ms if t.Equal == nil { 141 . . // Check hashability here. We could do this check inside 142 . . // typehash, but we want to report the topmost type in 143 . . // the error text (e.g. in a struct with a field of slice type 144 . . // we want to report the struct, not the slice). 145 . . panic(errorString("hash of unhashable type " + toRType(t).string())) 146 . . } 147 10ms 10ms if isDirectIface(t) { return t.Kind_&abi.KindDirectIface != 0 typekind.go:11 148 20ms 40ms return c1 * typehash(t, unsafe.Pointer(&a.data), h^c0) 149 . . } else { 150 . . return c1 * typehash(t, a.data, h^c0) 151 . . } 152 . . } 153 . .
runtime.nilinterhash
/usr/lib/go/src/runtime/alg.go
Total: 10ms 60ms (flat, cum) 0.16% 159 . . // 160 . . // Do not remove or change the type signature. 161 . . // See go.dev/issue/67401. 162 . . // 163 . . //go:linkname nilinterhash 164 10ms 10ms func nilinterhash(p unsafe.Pointer, h uintptr) uintptr { 165 . . a := (*eface)(p) 166 . . t := a._type 167 . . if t == nil { 168 . . return h 169 . . } 170 . . if t.Equal == nil { 171 . . // See comment in interhash above. 172 . . panic(errorString("hash of unhashable type " + toRType(t).string())) 173 . . } 174 . . if isDirectIface(t) { 175 . . return c1 * typehash(t, unsafe.Pointer(&a.data), h^c0) 176 . . } else { 177 . 50ms return c1 * typehash(t, a.data, h^c0) 178 . . } 179 . . } 180 . . 181 . . // typehash computes the hash of the object of type t at address p. 182 . . // h is the seed.
runtime.typehash
/usr/lib/go/src/runtime/alg.go
Total: 20ms 70ms (flat, cum) 0.19% 197 . . // 198 . . // Do not remove or change the type signature. 199 . . // See go.dev/issue/67401. 200 . . // 201 . . //go:linkname typehash 202 10ms 50ms func typehash(t *_type, p unsafe.Pointer, h uintptr) uintptr { 203 . . if t.TFlag&abi.TFlagRegularMemory != 0 { 204 . . // Handle ptr sizes specially, see issue 37086. 205 . . switch t.Size_ { 206 . . case 4: 207 . . return memhash32(p, h) 208 . . case 8: 209 . 10ms return memhash64(p, h) 210 . . default: 211 . . return memhash(p, h, t.Size_) 212 . . } 213 . . } 214 10ms 10ms switch t.Kind_ & abi.KindMask { 215 . . case abi.Float32: 216 . . return f32hash(p, h) 217 . . case abi.Float64: 218 . . return f64hash(p, h) 219 . . case abi.Complex64:
runtime.strequal
/usr/lib/go/src/runtime/alg.go
Total: 30ms 50ms (flat, cum) 0.13% 283 . . return *(*complex64)(p) == *(*complex64)(q) 284 . . } 285 . . func c128equal(p, q unsafe.Pointer) bool { 286 . . return *(*complex128)(p) == *(*complex128)(q) 287 . . } 288 10ms 10ms func strequal(p, q unsafe.Pointer) bool { 289 20ms 40ms return *(*string)(p) == *(*string)(q)
runtime.interequal
/usr/lib/go/src/runtime/alg.go
Total: 30ms 30ms (flat, cum) 0.08% 291 10ms 10ms func interequal(p, q unsafe.Pointer) bool { 292 . . x := *(*iface)(p) 293 . . y := *(*iface)(q) 294 20ms 20ms return x.tab == y.tab && ifaceeq(x.tab, x.data, y.data) 295 . . } 296 . . func nilinterequal(p, q unsafe.Pointer) bool { 297 . . x := *(*eface)(p) 298 . . y := *(*eface)(q) 299 . . return x._type == y._type && efaceeq(x._type, x.data, y.data)
runtime.efaceeq
/usr/lib/go/src/runtime/alg.go
Total: 0 50ms (flat, cum) 0.13% 310 . . // Direct interface types are ptr, chan, map, func, and single-element structs/arrays thereof. 311 . . // Maps and funcs are not comparable, so they can't reach here. 312 . . // Ptrs, chans, and single-element items can be compared directly using ==. 313 . . return x == y 314 . . } 315 . 50ms return eq(x, y)
runtime.ifaceeq
/usr/lib/go/src/runtime/alg.go
Total: 100ms 100ms (flat, cum) 0.27% 317 70ms 70ms func ifaceeq(tab *itab, x, y unsafe.Pointer) bool { 318 . . if tab == nil { 319 . . return true 320 . . } 321 . . t := tab.Type 322 10ms 10ms eq := t.Equal 323 20ms 20ms if eq == nil { 324 . . panic(errorString("comparing uncomparable type " + toRType(t).string())) 325 . . } 326 . . if isDirectIface(t) { 327 . . // See comment in efaceeq. 328 . . return x == y
runtime.gogo
/usr/lib/go/src/runtime/asm_arm64.s
Total: 20ms 20ms (flat, cum) 0.053% 187 . . // restore state from Gobuf; longjmp 188 . . TEXT runtime·gogo(SB), NOSPLIT|NOFRAME, $0-8 189 . . MOVD buf+0(FP), R5 190 . . MOVD gobuf_g(R5), R6 191 . . MOVD 0(R6), R4 // make sure g != nil 192 20ms 20ms B gogo<>(SB) 193 . .
gogo
/usr/lib/go/src/runtime/asm_arm64.s
Total: 30ms 40ms (flat, cum) 0.11% 194 . . TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0 195 20ms 20ms MOVD R6, g 196 . 10ms BL runtime·save_g(SB) 197 . . 198 . . MOVD gobuf_sp(R5), R0 199 10ms 10ms MOVD R0, RSP 200 . . MOVD gobuf_bp(R5), R29 201 . . MOVD gobuf_lr(R5), LR 202 . . MOVD gobuf_ctxt(R5), R26 203 . . MOVD $0, gobuf_sp(R5) 204 . . MOVD $0, gobuf_bp(R5)
runtime.mcall
/usr/lib/go/src/runtime/asm_arm64.s
Total: 10ms 5.59s (flat, cum) 14.89% 217 . . 218 . . // Save caller state in g->sched 219 . . MOVD RSP, R0 220 . . MOVD R0, (g_sched+gobuf_sp)(g) 221 . . MOVD R29, (g_sched+gobuf_bp)(g) 222 10ms 10ms MOVD LR, (g_sched+gobuf_pc)(g) 223 . . MOVD $0, (g_sched+gobuf_lr)(g) 224 . . 225 . . // Switch to m->g0 & its stack, call fn. 226 . . MOVD g, R3 227 . . MOVD g_m(g), R8 228 . . MOVD m_g0(R8), g 229 . . BL runtime·save_g(SB) 230 . . CMP g, R3 231 . . BNE 2(PC) 232 . . B runtime·badmcall(SB) 233 . . 234 . . MOVD (g_sched+gobuf_sp)(g), R0 235 . . MOVD R0, RSP // sp = m->g0->sched.sp 236 . . MOVD $0, R29 // clear frame pointer, as caller may execute on another M 237 . . MOVD R3, R0 // arg = g 238 . . MOVD $0, -16(RSP) // dummy LR 239 . . SUB $16, RSP 240 . . MOVD 0(R26), R4 // code pointer 241 . 5.58s BL (R4) 242 . . B runtime·badmcall2(SB) 243 . . 244 . . // systemstack_switch is a dummy routine that systemstack leaves at the bottom 245 . . // of the G stack. We need to distinguish the routine that 246 . . // lives at the bottom of the G stack from the one that lives
runtime.systemstack
/usr/lib/go/src/runtime/asm_arm64.s
Total: 10ms 2.43s (flat, cum) 6.47% 279 . . // Switch stacks. 280 . . // The original frame pointer is stored in R29, 281 . . // which is useful for stack unwinding. 282 . . // Save our state in g->sched. Pretend to 283 . . // be systemstack_switch if the G stack is scanned. 284 . 10ms BL gosave_systemstack_switch<>(SB) 285 . . 286 . . // switch to g0 287 . . MOVD R5, g 288 . 20ms BL runtime·save_g(SB) 289 . . MOVD (g_sched+gobuf_sp)(g), R3 290 . . MOVD R3, RSP 291 . . 292 . . // call target function 293 . . MOVD 0(R26), R3 // code pointer 294 . 2.39s BL (R3) 295 . . 296 . . // switch back to g 297 . . MOVD g_m(g), R3 298 . . MOVD m_curg(R3), g 299 . . BL runtime·save_g(SB) 300 . . MOVD (g_sched+gobuf_sp)(g), R0 301 . . MOVD R0, RSP 302 . . MOVD (g_sched+gobuf_bp)(g), R29 303 . . MOVD $0, (g_sched+gobuf_sp)(g) 304 . . MOVD $0, (g_sched+gobuf_bp)(g) 305 10ms 10ms RET 306 . . 307 . . noswitch: 308 . . // already on m stack, just call directly 309 . . // Using a tail call here cleans up tracebacks since we won't stop 310 . . // at an intermediate systemstack.
runtime.morestack
/usr/lib/go/src/runtime/asm_arm64.s
Total: 10ms 10ms (flat, cum) 0.027% 350 . . // calling the scheduler calling newm calling gc), so we must 351 . . // record an argument size. For that purpose, it has no arguments. 352 . . TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0 353 . . // Cannot grow scheduler stack (m->g0). 354 . . MOVD g_m(g), R8 355 10ms 10ms MOVD m_g0(R8), R4 356 . . 357 . . // Called from f. 358 . . // Set g->sched to context in f 359 . . MOVD RSP, R0 360 . . MOVD R0, (g_sched+gobuf_sp)(g)
runtime.memhash64
/usr/lib/go/src/runtime/asm_arm64.s
Total: 30ms 30ms (flat, cum) 0.08% 605 . . TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24 606 . . MOVB runtime·useAeshash(SB), R10 607 . . CBZ R10, noaes 608 . . MOVD $runtime·aeskeysched+0(SB), R3 609 . . 610 20ms 20ms VEOR V0.B16, V0.B16, V0.B16 611 . . VLD1 (R3), [V2.B16] 612 . . VLD1 (R0), V0.D[1] 613 10ms 10ms VMOV R1, V0.D[0] 614 . . 615 . . AESE V2.B16, V0.B16 616 . . AESMC V0.B16, V0.B16 617 . . AESE V2.B16, V0.B16 618 . . AESMC V0.B16, V0.B16
runtime.strhash
/usr/lib/go/src/runtime/asm_arm64.s
Total: 10ms 10ms (flat, cum) 0.027% 631 . . noaes: 632 . . B runtime·memhashFallback<ABIInternal>(SB) 633 . . 634 . . // func strhash(p unsafe.Pointer, h uintptr) uintptr 635 . . TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24 636 10ms 10ms MOVB runtime·useAeshash(SB), R10 637 . . CBZ R10, noaes 638 . . LDP (R0), (R0, R2) // string data / length 639 . . B aeshashbody<>(SB) 640 . . noaes: 641 . . B runtime·strhashFallback<ABIInternal>(SB)
aeshashbody
/usr/lib/go/src/runtime/asm_arm64.s
Total: 60ms 60ms (flat, cum) 0.16% 643 . . // R0: data 644 . . // R1: seed data 645 . . // R2: length 646 . . // At return, R0 = return value 647 . . TEXT aeshashbody<>(SB),NOSPLIT|NOFRAME,$0 648 30ms 30ms VEOR V30.B16, V30.B16, V30.B16 649 . . VMOV R1, V30.D[0] 650 20ms 20ms VMOV R2, V30.D[1] // load length into seed 651 . . 652 . . MOVD $runtime·aeskeysched+0(SB), R4 653 10ms 10ms VLD1.P 16(R4), [V0.B16] 654 . . AESE V30.B16, V0.B16 655 . . AESMC V0.B16, V0.B16 656 . . CMP $16, R2 657 . . BLO aes0to15 658 . . BEQ aes16
aeshashbody
/usr/lib/go/src/runtime/asm_arm64.s
Total: 90ms 90ms (flat, cum) 0.24% 669 . . VEOR V2.B16, V2.B16, V2.B16 670 . . TBZ $3, R2, less_than_8 671 . . VLD1.P 8(R0), V2.D[0] 672 . . 673 . . less_than_8: 674 10ms 10ms TBZ $2, R2, less_than_4 675 . . VLD1.P 4(R0), V2.S[2] 676 . . 677 . . less_than_4: 678 20ms 20ms TBZ $1, R2, less_than_2 679 10ms 10ms VLD1.P 2(R0), V2.H[6] 680 . . 681 . . less_than_2: 682 20ms 20ms TBZ $0, R2, done 683 10ms 10ms VLD1 (R0), V2.B[14] 684 . . done: 685 10ms 10ms AESE V0.B16, V2.B16 686 . . AESMC V2.B16, V2.B16 687 10ms 10ms AESE V0.B16, V2.B16 688 . . AESMC V2.B16, V2.B16 689 . . AESE V0.B16, V2.B16 690 . . AESMC V2.B16, V2.B16 691 . . 692 . . VMOV V2.D[0], R0
gosave_systemstack_switch
/usr/lib/go/src/runtime/asm_arm64.s
Total: 10ms 10ms (flat, cum) 0.027% 985 . . MOVD R0, (g_sched+gobuf_sp)(g) 986 . . MOVD R29, (g_sched+gobuf_bp)(g) 987 . . MOVD $0, (g_sched+gobuf_lr)(g) 988 . . // Assert ctxt is zero. See func save. 989 . . MOVD (g_sched+gobuf_ctxt)(g), R0 990 10ms 10ms CBZ R0, 2(PC) 991 . . CALL runtime·abort(SB) 992 . . RET 993 . . 994 . . // func asmcgocall_no_g(fn, arg unsafe.Pointer) 995 . . // Call fn(arg) aligned appropriately for the gcc ABI.
git.urbach.dev/cli/q/src/expression.Parse
/home/user/q/src/expression/Parse.go
Total: 270ms 3.35s (flat, cum) 8.92% 3 . . import ( 4 . . "git.urbach.dev/cli/q/src/token" 5 . . ) 6 . . 7 . . // Parse generates an expression tree from tokens. 8 10ms 100ms func Parse(tokens token.List) *Expression { 9 . . var ( 10 . . cursor *Expression 11 . . root *Expression 12 . . groupLevel = 0 13 . . groupPosition = 0 14 . . ) 15 . . 16 80ms 80ms for i, t := range tokens { 17 20ms 20ms if t.Kind == token.GroupStart || t.Kind == token.ArrayStart || t.Kind == token.BlockStart { 18 . . groupLevel++ 19 . . 20 . . if groupLevel == 1 { 21 . . groupPosition = i + 1 22 . . } 23 . . 24 . . continue 25 . . } 26 . . 27 . . if t.Kind == token.GroupEnd || t.Kind == token.ArrayEnd || t.Kind == token.BlockEnd { 28 20ms 20ms groupLevel-- 29 . . 30 . . if groupLevel != 0 { 31 . . continue 32 . . } 33 . . 34 . 1.18s root, cursor = handleGroupEnd(tokens, root, cursor, groupPosition, i, t) 35 . . continue 36 . . } 37 . . 38 10ms 10ms if groupLevel > 0 { 39 . . continue 40 . . } 41 . . 42 100ms 100ms if cursor != nil && cursor.Token.Kind == token.Cast && len(cursor.Children) < 2 { 43 . . cursor.AddChild(&newTypeExpression(tokens[i:]).Expression) 44 . . return root 45 . . } 46 . . 47 10ms 10ms if t.Kind.IsLiteral() { return k == Identifier || k == Number || k == String || k == Rune || k.IsBuiltin() Kind.go:116 48 . 1.54s root, cursor = handleLiteral(root, cursor, t) 49 . . continue 50 . . } 51 . . 52 . . if !t.Kind.IsOperator() { 53 . . continue 54 . . } 55 . . 56 . . if cursor == nil { 57 . . cursor = newLeaf(t) 58 . . cursor.precedence = precedence(t.Kind) 59 . . root = cursor 60 . . continue 61 . . } 62 . . 63 10ms 130ms node := newLeaf(t) return &Expression{Token: t} newLeaf.go:7 ⋮ 64 . . node.precedence = precedence(t.Kind) 65 . . 66 . . if cursor.Token.Kind.IsOperator() { 67 . 70ms root = handleOperator(root, cursor, node) 68 . . } else { 69 10ms 90ms node.AddChild(cursor) expr.Children = append(expr.Children, child) Expression.go:24 ⋮ expr.Children = make([]*Expression, 0, 2) Expression.go:21 70 . . root = node 71 . . } 72 . . 73 . . cursor = node 74 . . }
runtime.(*mheap).nextSpanForSweep
/usr/lib/go/src/runtime/mgcsweep.go
Total: 0 20ms (flat, cum) 0.053% 98 . . for sc := sweep.centralIndex.load(); sc < numSweepClasses; sc++ { 99 . . spc, full := sc.split() 100 . . c := &h.central[spc].mcentral 101 . . var s *mspan 102 . . if full { 103 . 20ms s = c.fullUnswept(sg).pop() 104 . . } else { 105 . . s = c.partialUnswept(sg).pop() 106 . . } 107 . . if s != nil { 108 . . // Write down that we found something so future sweepers
runtime.(*activeSweep).begin
/usr/lib/go/src/runtime/mgcsweep.go
Total: 30ms 30ms (flat, cum) 0.08% 146 . . // 147 . . // Even if the sweepLocker is invalid, its sweepGen is always valid. 148 . . func (a *activeSweep) begin() sweepLocker { 149 . . for { 150 . . state := a.state.Load() 151 20ms 20ms if state&sweepDrainedMask != 0 { 152 10ms 10ms return sweepLocker{mheap_.sweepgen, false} 153 . . } 154 . . if a.state.CompareAndSwap(state, state+1) { 155 . . return sweepLocker{mheap_.sweepgen, true} 156 . . }
runtime.(*activeSweep).end
/usr/lib/go/src/runtime/mgcsweep.go
Total: 40ms 40ms (flat, cum) 0.11% 158 . . } 159 . . 160 . . // end deregisters a sweeper. Must be called once for each time 161 . . // begin is called if the sweepLocker is valid. 162 10ms 10ms func (a *activeSweep) end(sl sweepLocker) { 163 10ms 10ms if sl.sweepGen != mheap_.sweepgen { 164 . . throw("sweeper left outstanding across sweep generations") 165 . . } 166 . . for { 167 . . state := a.state.Load() 168 . . if (state&^sweepDrainedMask)-1 >= sweepDrainedMask { 169 . . throw("mismatched begin/end of activeSweep") 170 . . } 171 20ms 20ms if a.state.CompareAndSwap(state, state-1) { return Cas(&u.value, old, new) types.go:236 172 . . if state-1 != sweepDrainedMask { 173 . . return 174 . . } 175 . . // We're the last sweeper, and there's nothing left to sweep. 176 . . if debug.gcpacertrace > 0 {
runtime.(*activeSweep).isDone
/usr/lib/go/src/runtime/mgcsweep.go
Total: 10ms 10ms (flat, cum) 0.027% 209 . . 210 . . // isDone returns true if all sweep work has been drained and no more 211 . . // outstanding sweepers exist. That is, when the sweep phase is 212 . . // completely done. 213 . . func (a *activeSweep) isDone() bool { 214 10ms 10ms return a.state.Load() == sweepDrainedMask 215 . . } 216 . . 217 . . // reset sets up the activeSweep for the next sweep cycle. 218 . . // 219 . . // The world must be stopped.
runtime.bgsweep
/usr/lib/go/src/runtime/mgcsweep.go
Total: 0 360ms (flat, cum) 0.96% 295 . . // isn't spare idle time available on other cores. If there's available idle 296 . . // time, helping to sweep can reduce allocation latencies by getting ahead of 297 . . // the proportional sweeper and having spans ready to go for allocation. 298 . . const sweepBatchSize = 10 299 . . nSwept := 0 300 . 360ms for sweepone() != ^uintptr(0) { 301 . . nSwept++ 302 . . if nSwept%sweepBatchSize == 0 { 303 . . goschedIfBusy() 304 . . } 305 . . }
runtime.(*sweepLocker).tryAcquire
/usr/lib/go/src/runtime/mgcsweep.go
Total: 10ms 10ms (flat, cum) 0.027% 341 . . func (l *sweepLocker) tryAcquire(s *mspan) (sweepLocked, bool) { 342 . . if !l.valid { 343 . . throw("use of invalid sweepLocker") 344 . . } 345 . . // Check before attempting to CAS. 346 10ms 10ms if atomic.Load(&s.sweepgen) != l.sweepGen-2 { 347 . . return sweepLocked{}, false 348 . . } 349 . . // Attempt to acquire sweep ownership of s. 350 . . if !atomic.Cas(&s.sweepgen, l.sweepGen-2, l.sweepGen-1) { 351 . . return sweepLocked{}, false
runtime.sweepone
/usr/lib/go/src/runtime/mgcsweep.go
Total: 30ms 530ms (flat, cum) 1.41% 358 . . func sweepone() uintptr { 359 . . gp := getg() 360 . . 361 . . // Increment locks to ensure that the goroutine is not preempted 362 . . // in the middle of sweep thus leaving the span in an inconsistent state for next GC 363 10ms 10ms gp.m.locks++ 364 . . 365 . . // TODO(austin): sweepone is almost always called in a loop; 366 . . // lift the sweepLocker into its callers. 367 . . sl := sweep.active.begin() 368 . . if !sl.valid { 369 . . gp.m.locks-- 370 . . return ^uintptr(0) 371 . . } 372 . . 373 . . // Find a span to sweep. 374 . . npages := ^uintptr(0) 375 . . var noMoreWork bool 376 . . for { 377 . 20ms s := mheap_.nextSpanForSweep() 378 . . if s == nil { 379 . . noMoreWork = sweep.active.markDrained() 380 . . break 381 . . } 382 10ms 10ms if state := s.state.get(); state != mSpanInUse { 383 . . // This can happen if direct sweeping already 384 . . // swept this span, but in that case the sweep 385 . . // generation should always be up-to-date. 386 . . if !(s.sweepgen == sl.sweepGen || s.sweepgen == sl.sweepGen+3) { 387 . . print("runtime: bad span s.state=", state, " s.sweepgen=", s.sweepgen, " sweepgen=", sl.sweepGen, "\n") 388 . . throw("non in-use span in unswept list") 389 . . } 390 . . continue 391 . . } 392 10ms 20ms if s, ok := sl.tryAcquire(s); ok { 393 . . // Sweep the span we found. 394 . . npages = s.npages 395 . 430ms if s.sweep(false) { 396 . . // Whole span was freed. Count it toward the 397 . . // page reclaimer credit since these pages can 398 . . // now be used for span allocation. 399 . . mheap_.reclaimCredit.Add(npages) 400 . . } else { 401 . . // Span is still in-use, so this returned no 402 . . // pages to the heap and the span needs to 403 . . // move to the swept in-use list. 404 . . npages = 0 405 . . } 406 . . break 407 . . } 408 . . } 409 . 40ms sweep.active.end(sl) 410 . . 411 . . if noMoreWork { 412 . . // The sweep list is empty. There may still be 413 . . // concurrent sweeps running, but we're at least very 414 . . // close to done sweeping.
runtime.isSweepDone
/usr/lib/go/src/runtime/mgcsweep.go
Total: 10ms 10ms (flat, cum) 0.027% 453 . . // Note that this condition may transition from false to true at any 454 . . // time as the sweeper runs. It may transition from true to false if a 455 . . // GC runs; to prevent that the caller must be non-preemptible or must 456 . . // somehow block GC progress. 457 . . func isSweepDone() bool { 458 10ms 10ms return sweep.active.isDone() return a.state.Load() == sweepDrainedMask mgcsweep.go:214 459 . . } 460 . . 461 . . // Returns only when span s has been swept. 462 . . // 463 . . //go:nowritebarrier
runtime.(*mspan).ensureSwept
/usr/lib/go/src/runtime/mgcsweep.go
Total: 20ms 20ms (flat, cum) 0.053% 464 . . func (s *mspan) ensureSwept() { 465 . . // Caller must disable preemption. 466 . . // Otherwise when this function returns the span can become unswept again 467 . . // (if GC is triggered on another goroutine). 468 . . gp := getg() 469 10ms 10ms if gp.m.locks == 0 && gp.m.mallocing == 0 && gp != gp.m.g0 { 470 . . throw("mspan.ensureSwept: m is not locked") 471 . . } 472 . . 473 . . // If this operation fails, then that means that there are 474 . . // no more spans to be swept. In this case, either s has already 475 . . // been swept, or is about to be acquired for sweeping and swept. 476 10ms 10ms sl := sweep.active.begin() if state&sweepDrainedMask != 0 { mgcsweep.go:151 477 . . if sl.valid { 478 . . // The caller must be sure that the span is a mSpanInUse span. 479 . . if s, ok := sl.tryAcquire(s); ok { 480 . . s.sweep(false) 481 . . sweep.active.end(sl)
runtime.(*sweepLocked).sweep
/usr/lib/go/src/runtime/mgcsweep.go
Total: 10ms 10ms (flat, cum) 0.027% 526 . . if trace.ok() { 527 . . trace.GCSweepSpan(s.npages * pageSize) 528 . . traceRelease(trace) 529 . . } 530 . . 531 10ms 10ms mheap_.pagesSwept.Add(int64(s.npages)) return Xadd64(&u.value, delta) types.go:344 532 . . 533 . . spc := s.spanclass 534 . . size := s.elemsize 535 . . 536 . . // The allocBits indicate which unmarked objects don't need to be
runtime.(*sweepLocked).sweep
/usr/lib/go/src/runtime/mgcsweep.go
Total: 30ms 30ms (flat, cum) 0.08% 547 . . // In such case we need to queue finalizer for execution, 548 . . // mark the object as live and preserve the profile special. 549 . . // 2. A tiny object can have several finalizers setup for different offsets. 550 . . // If such object is not marked, we need to queue all finalizers at once. 551 . . // Both 1 and 2 are possible at the same time. 552 20ms 20ms hadSpecials := s.specials != nil 553 . . siter := newSpecialsIter(s) 554 10ms 10ms for siter.valid() { 555 . . // A finalizer can be set for an inner byte of an object, find object beginning. 556 . . objIndex := uintptr(siter.s.offset) / size 557 . . p := s.base() + objIndex*size 558 . . mbits := s.markBitsForIndex(objIndex) 559 . . if !mbits.isMarked() {
runtime.(*sweepLocked).sweep
/usr/lib/go/src/runtime/mgcsweep.go
Total: 0 10ms (flat, cum) 0.027% 593 . . // Find the exact byte for which the special was setup 594 . . // (as opposed to object beginning). 595 . . special := siter.s 596 . . p := s.base() + uintptr(special.offset) 597 . . siter.unlinkAndNext() 598 . 10ms freeSpecial(special, unsafe.Pointer(p), size) 599 . . } 600 . . } 601 . . } else { 602 . . // object is still live 603 . . if siter.s.kind == _KindSpecialReachable {
runtime.(*sweepLocked).sweep
/usr/lib/go/src/runtime/mgcsweep.go
Total: 0 70ms (flat, cum) 0.19% 650 . . } 651 . . } 652 . . 653 . . // Copy over and clear the inline mark bits if necessary. 654 . . if gcUsesSpanInlineMarkBits(s.elemsize) { 655 . 70ms s.moveInlineMarks(s.gcmarkBits) 656 . . } 657 . . 658 . . // Check for zombie objects. 659 . . if s.freeindex < s.nelems { 660 . . // Everything < freeindex is allocated and hence
runtime.(*sweepLocked).sweep
/usr/lib/go/src/runtime/mgcsweep.go
Total: 10ms 30ms (flat, cum) 0.08% 673 . . } 674 . . } 675 . . } 676 . . 677 . . // Count the number of free objects in this span. 678 10ms 10ms nalloc := uint16(s.countAlloc()) for i := uintptr(0); i < bytes; i += 8 { mbitmap.go:1460 679 . . nfreed := s.allocCount - nalloc 680 . . if nalloc > s.allocCount { 681 . . // The zombie check above should have caught this in 682 . . // more detail. 683 . . print("runtime: nelems=", s.nelems, " nalloc=", nalloc, " previous allocCount=", s.allocCount, " nfreed=", nfreed, "\n") 684 . . throw("sweep increased allocation count") 685 . . } 686 . . 687 . . s.allocCount = nalloc 688 . . s.freeindex = 0 // reset allocation index to start of span. 689 . . s.freeIndexForScan = 0 690 . . if traceEnabled() { 691 . . getg().m.p.ptr().trace.reclaimed += uintptr(nfreed) * s.elemsize 692 . . } 693 . . 694 . . // gcmarkBits becomes the allocBits. 695 . . // get a fresh cleared gcmarkBits in preparation for next GC 696 . . s.allocBits = s.gcmarkBits 697 . 20ms s.gcmarkBits = newMarkBits(uintptr(s.nelems)) 698 . . 699 . . // refresh pinnerBits if they exists 700 . . if s.pinnerBits != nil { 701 . . s.refreshPinnerBits() 702 . . }
runtime.(*sweepLocked).sweep
/usr/lib/go/src/runtime/mgcsweep.go
Total: 30ms 280ms (flat, cum) 0.75% 768 . . // objects, because a fresh span that had been allocated into, 769 . . // wasn't totally filled, but then swept, still has all of its 770 . . // free slots zeroed. 771 . . s.needzero = 1 772 . . stats := memstats.heapStats.acquire() 773 10ms 10ms atomic.Xadd64(&stats.smallFreeCount[spc.sizeclass()], int64(nfreed)) 774 . . memstats.heapStats.release() 775 . . 776 . . // Count the frees in the inconsistent, internal stats. 777 20ms 20ms gcController.totalFree.Add(int64(nfreed) * int64(s.elemsize)) return Xadd64(&u.value, delta) types.go:344 778 . . } 779 . . if !preserve { 780 . . // The caller may not have removed this span from whatever 781 . . // unswept set its on but taken ownership of the span for 782 . . // sweeping by updating sweepgen. If this span still is in 783 . . // an unswept set, then the mcentral will pop it off the 784 . . // set, check its sweepgen, and ignore it. 785 . . if nalloc == 0 { 786 . . // Free totally free span directly back to the heap. 787 . 250ms mheap_.freeSpan(s) systemstack(func() { mheap.go:1633 788 . . return true 789 . . } 790 . . // Return span back to the right mcentral list. 791 . . if nalloc == s.nelems { 792 . . mheap_.central[spc].mcentral.fullSwept(sweepgen).push(s)
runtime.deductSweepCredit
/usr/lib/go/src/runtime/mgcsweep.go
Total: 30ms 30ms (flat, cum) 0.08% 908 . . // enough sweeping so that all pages are swept during the concurrent 909 . . // sweep phase between GC cycles. 910 . . // 911 . . // mheap_ must NOT be locked. 912 . . func deductSweepCredit(spanBytes uintptr, callerSweepPages uintptr) { 913 10ms 10ms if mheap_.sweepPagesPerByte == 0 { 914 . . // Proportional sweep is done or disabled. 915 . . return 916 . . } 917 . . 918 . . trace := traceAcquire() 919 . . if trace.ok() { 920 . . trace.GCSweepStart() 921 . . traceRelease(trace) 922 . . } 923 . . 924 . . // Fix debt if necessary. 925 . . retry: 926 10ms 10ms sweptBasis := mheap_.pagesSweptBasis.Load() return Load64(&u.value) types.go:309 927 . . live := gcController.heapLive.Load() 928 10ms 10ms liveBasis := mheap_.sweepHeapLiveBasis 929 . . newHeapLive := spanBytes 930 . . if liveBasis < live { 931 . . // Only do this subtraction when we don't overflow. Otherwise, pagesTarget 932 . . // might be computed as something really huge, causing us to get stuck 933 . . // sweeping here until the next mark phase.
runtime.deductSweepCredit
/usr/lib/go/src/runtime/mgcsweep.go
Total: 30ms 30ms (flat, cum) 0.08% 955 . . // Sweep pacing changed. Recompute debt. 956 . . goto retry 957 . . } 958 . . } 959 . . 960 20ms 20ms trace = traceAcquire() if !traceEnabled() { traceruntime.go:188 return trace.enabled traceruntime.go:151 ⋮ 961 . . if trace.ok() { 962 . . trace.GCSweepDone() 963 . . traceRelease(trace) 964 . . } 965 10ms 10ms } 966 . . 967 . . // clobberfree sets the memory content at x to bad content, for debugging 968 . . // purposes. 969 . . func clobberfree(x unsafe.Pointer, size uintptr) { 970 . . // size (span.elemsize) is always a multiple of 4.
slices.Index[go.shape.[]git.urbach.dev/cli/q/src/cpu.Register,go.shape.int8]
/usr/lib/go/src/slices/slices.go
Total: 70ms 70ms (flat, cum) 0.19% 92 . . } 93 . . 94 . . // Index returns the index of the first occurrence of v in s, 95 . . // or -1 if not present. 96 . . func Index[S ~[]E, E comparable](s S, v E) int { 97 70ms 70ms for i := range s {
slices.Index[go.shape.[]*git.urbach.dev/cli/q/src/codegen.Step,go.shape.*git.urbach.dev/cli/q/src/codegen.Step]
/usr/lib/go/src/slices/slices.go
Total: 30ms 30ms (flat, cum) 0.08% 98 30ms 30ms if v == s[i] { 99 . . return i 100 . . } 101 . . } 102 . . return -1
slices.IndexFunc[go.shape.[]git.urbach.dev/cli/q/src/ssa.Value,go.shape.interface { AddUser; Equals bool; Inputs []git.urbach.dev/cli/q/src/ssa.Value; IsConst bool; RemoveUser; Replace; String string; Type git.urbach.dev/cli/q/src/types.Type; Users []git.urbach.dev/cli/q/src/ssa.Value }]
/usr/lib/go/src/slices/slices.go
Total: 40ms 40ms (flat, cum) 0.11% 104 . . 105 . . // IndexFunc returns the first index i satisfying f(s[i]), 106 . . // or -1 if none do. 107 . . func IndexFunc[S ~[]E, E any](s S, f func(E) bool) int { 108 40ms 40ms for i := range s {
slices.IndexFunc[go.shape.[]*git.urbach.dev/cli/q/src/ssa.Block,go.shape.*uint8]
/usr/lib/go/src/slices/slices.go
Total: 40ms 60ms (flat, cum) 0.16% 109 40ms 60ms if f(s[i]) { 110 . . return i 111 . . } 112 . . } 113 . . return -1
slices.Contains[go.shape.[]*git.urbach.dev/cli/q/src/codegen.Step,go.shape.*git.urbach.dev/cli/q/src/codegen.Step]
/usr/lib/go/src/slices/slices.go
Total: 100ms 100ms (flat, cum) 0.27% 114 . . } 115 . . 116 . . // Contains reports whether v is present in s. 117 . . func Contains[S ~[]E, E comparable](s S, v E) bool { 118 100ms 100ms return Index(s, v) >= 0 if v == s[i] { slices.go:98 ⋮ ⋮ for i := range s { slices.go:97 ⋮ ⋮ if v == s[i] { slices.go:98 ⋮ for i := range s { slices.go:97 ⋮ ⋮ 119 . . } 120 . . 121 . . // ContainsFunc reports whether at least one 122 . . // element e of s satisfies f(e). 123 . . func ContainsFunc[S ~[]E, E any](s S, f func(E) bool) bool {
slices.Insert[go.shape.[]git.urbach.dev/cli/q/src/ssa.Value,go.shape.interface { AddUser; Equals bool; Inputs []git.urbach.dev/cli/q/src/ssa.Value; IsConst bool; RemoveUser; Replace; String string; Type git.urbach.dev/cli/q/src/types.Type; Users []git.urbach.dev/cli/q/src/ssa.Value }]
/usr/lib/go/src/slices/slices.go
Total: 10ms 10ms (flat, cum) 0.027% 139 . . if m == 0 { 140 . . return s 141 . . } 142 . . n := len(s) 143 . . if i == n { 144 10ms 10ms return append(s, v...) 145 . . } 146 . . if n+m > cap(s) { 147 . . // Use append rather than make so that we bump the size of 148 . . // the slice up to the next storage class. 149 . . // This is what Grow does but we don't call Grow because
slices.DeleteFunc[go.shape.[]git.urbach.dev/cli/q/src/ssa.Value,go.shape.interface { AddUser; Equals bool; Inputs []git.urbach.dev/cli/q/src/ssa.Value; IsConst bool; RemoveUser; Replace; String string; Type git.urbach.dev/cli/q/src/types.Type; Users []git.urbach.dev/cli/q/src/ssa.Value }]
/usr/lib/go/src/slices/slices.go
Total: 150ms 170ms (flat, cum) 0.45% 234 . . 235 . . // DeleteFunc removes any elements from s for which del returns true, 236 . . // returning the modified slice. 237 . . // DeleteFunc zeroes the elements between the new length and the original length. 238 . . // If the result is empty, it has the same nilness as s. 239 40ms 40ms func DeleteFunc[S ~[]E, E any](s S, del func(E) bool) S { 240 80ms 100ms i := IndexFunc(s, del) for i := range s { slices.go:108 ⋮ if f(s[i]) { slices.go:109 ⋮ ⋮ for i := range s { slices.go:108 ⋮ ⋮ ⋮ if f(s[i]) { slices.go:109 241 . . if i == -1 { 242 10ms 10ms return s 243 . . } 244 . . // Don't start copying elements until we find one to delete. 245 . . for j := i + 1; j < len(s); j++ { 246 10ms 10ms if v := s[j]; !del(v) { 247 . . s[i] = v 248 . . i++ 249 . . } 250 . . } 251 10ms 10ms clear(s[i:]) // zero/nil out the obsolete elements, for GC 252 . . return s[:i] 253 . . } 254 . . 255 . . // Replace replaces the elements s[i:j] by the given v, and returns the 256 . . // modified slice.
internal/runtime/maps.ctrlGroup.matchH2
/usr/lib/go/src/internal/runtime/maps/group.go
Total: 30ms 30ms (flat, cum) 0.08% 149 . . } 150 . . 151 . . // matchH2 returns the set of slots which are full and for which the 7-bit hash 152 . . // matches the given value. May return false positives. 153 . . func (g ctrlGroup) matchH2(h uintptr) bitset { 154 30ms 30ms return ctrlGroupMatchH2(g, h) v := uint64(g) ^ (bitsetLSB * uint64(h)) group.go:170 ⋮ ⋮ return bitset(((v - bitsetLSB) &^ v) & bitsetMSB) group.go:171 155 . . } 156 . . 157 . . // Portable implementation of matchH2. 158 . . // 159 . . // Note: On AMD64, this is an intrinsic implemented with SIMD instructions. See
internal/runtime/maps.ctrlGroupMatchH2
/usr/lib/go/src/internal/runtime/maps/group.go
Total: 30ms 30ms (flat, cum) 0.08% 165 . . // subtract off 0x0101 the first 2 bytes we'll become 0xffff and both be 166 . . // considered matches of h. The false positive matches are not a problem, 167 . . // just a rare inefficiency. Note that they only occur if there is a real 168 . . // match and never occur on ctrlEmpty, or ctrlDeleted. The subsequent key 169 . . // comparisons ensure that there is no correctness issue. 170 20ms 20ms v := uint64(g) ^ (bitsetLSB * uint64(h)) 171 10ms 10ms return bitset(((v - bitsetLSB) &^ v) & bitsetMSB) 172 . . } 173 . . 174 . . // matchEmpty returns the set of slots in the group that are empty. 175 . . func (g ctrlGroup) matchEmpty() bitset { 176 . . return ctrlGroupMatchEmpty(g)
internal/runtime/maps.(*groupReference).ctrls
/usr/lib/go/src/internal/runtime/maps/group.go
Total: 30ms 30ms (flat, cum) 0.08% 275 . . return v, false 276 . . } 277 . . 278 . . // ctrls returns the group control word. 279 . . func (g *groupReference) ctrls() *ctrlGroup { 280 30ms 30ms return (*ctrlGroup)(g.data) 281 . . } 282 . .
internal/runtime/maps.(*groupReference).key
/usr/lib/go/src/internal/runtime/maps/group.go
Total: 80ms 80ms (flat, cum) 0.21% 283 . . // key returns a pointer to the key at index i. 284 . . func (g *groupReference) key(typ *abi.SwissMapType, i uintptr) unsafe.Pointer { 285 80ms 80ms offset := groupSlotsOffset + i*typ.SlotSize 286 . . 287 . . return unsafe.Pointer(uintptr(g.data) + offset) 288 . . }
internal/runtime/maps.(*groupReference).elem
/usr/lib/go/src/internal/runtime/maps/group.go
Total: 60ms 60ms (flat, cum) 0.16% 289 . . 290 . . // elem returns a pointer to the element at index i. 291 . . func (g *groupReference) elem(typ *abi.SwissMapType, i uintptr) unsafe.Pointer { 292 40ms 40ms offset := groupSlotsOffset + i*typ.SlotSize + typ.ElemOff 293 . . 294 20ms 20ms return unsafe.Pointer(uintptr(g.data) + offset) 295 . . } 296 . . 297 . . // groupsReference is a wrapper type describing an array of groups stored at 298 . . // data. 299 . . type groupsReference struct {
internal/runtime/maps.newGroups
/usr/lib/go/src/internal/runtime/maps/group.go
Total: 0 660ms (flat, cum) 1.76% 311 . . // 312 . . // Length must be a power of two. 313 . . func newGroups(typ *abi.SwissMapType, length uint64) groupsReference { 314 . . return groupsReference{ 315 . . // TODO: make the length type the same throughout. 316 . 660ms data: newarray(typ.Group, int(length)), 317 . . lengthMask: length - 1, 318 . . } 319 . . } 320 . .
internal/runtime/maps.(*groupsReference).group
/usr/lib/go/src/internal/runtime/maps/group.go
Total: 50ms 50ms (flat, cum) 0.13% 321 . . // group returns the group at index i. 322 . . func (g *groupsReference) group(typ *abi.SwissMapType, i uint64) groupReference { 323 . . // TODO(prattmic): Do something here about truncation on cast to 324 . . // uintptr on 32-bit systems? 325 30ms 30ms offset := uintptr(i) * typ.GroupSize 326 . . 327 . . return groupReference{ 328 20ms 20ms data: unsafe.Pointer(uintptr(g.data) + offset), 329 . . } 330 . . } 331 . . 332 . . func cloneGroup(typ *abi.SwissMapType, newGroup, oldGroup groupReference) { 333 . . typedmemmove(typ.Group, newGroup.data, oldGroup.data)
git.urbach.dev/cli/q/src/token.identifier
/home/user/q/src/token/identifier.go
Total: 220ms 220ms (flat, cum) 0.59% 1 . . package token 2 . . 3 . . // identifier handles all tokens that qualify as an identifier. 4 50ms 50ms func identifier(tokens List, buffer []byte, i Position) (List, Position) { 5 . . position := i 6 . . i++ 7 . . 8 40ms 40ms for i < Position(len(buffer)) && isIdentifier(buffer[i]) { return isLetter(c) || isDigit(c) || c == '_' identifier.go:49 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') identifier.go:57 ⋮ 9 60ms 60ms i++ 10 . . } 11 . . 12 . . identifier := buffer[position:i] 13 . . kind := Identifier 14 . . 15 10ms 10ms switch string(identifier) { 16 10ms 10ms case "as": 17 . . kind = Cast 18 10ms 10ms case "assert": 19 . . kind = Assert 20 . . case "const": 21 . . kind = Const 22 . . case "delete": 23 . . kind = Delete 24 . . case "if": 25 . . kind = If 26 . . case "else": 27 . . kind = Else 28 . . case "extern": 29 . . kind = Extern 30 10ms 10ms case "import": 31 . . kind = Import 32 . . case "loop": 33 . . kind = Loop 34 10ms 10ms case "new": 35 . . kind = New 36 . . case "return": 37 . . kind = Return 38 . . case "syscall": 39 . . kind = Syscall 40 . . case "switch": 41 . . kind = Switch 42 . . } 43 . . 44 20ms 20ms tokens = append(tokens, Token{Kind: kind, Position: position, Length: Length(len(identifier))}) 45 . . return tokens, i 46 . . }
git.urbach.dev/cli/q/src/token.isIdentifier
/home/user/q/src/token/identifier.go
Total: 20ms 20ms (flat, cum) 0.053% 47 . . 48 . . func isIdentifier(c byte) bool { 49 20ms 20ms return isLetter(c) || isDigit(c) || c == '_' return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') identifier.go:57 50 . . }
git.urbach.dev/cli/q/src/token.isIdentifierStart
/home/user/q/src/token/identifier.go
Total: 30ms 30ms (flat, cum) 0.08% 52 . . func isIdentifierStart(c byte) bool { 53 30ms 30ms return isLetter(c) || c == '_' return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') identifier.go:57 ⋮ 54 . . }
git.urbach.dev/cli/q/src/token.isLetter
/home/user/q/src/token/identifier.go
Total: 50ms 50ms (flat, cum) 0.13% 56 . . func isLetter(c byte) bool { 57 50ms 50ms return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') 58 . . }
runtime.gclinkptr.ptr
/usr/lib/go/src/runtime/mcache.go
Total: 20ms 20ms (flat, cum) 0.053% 71 . . 72 . . // ptr returns the *gclink form of p. 73 . . // The result should be used for accessing fields, not stored 74 . . // in other data structures. 75 . . func (p gclinkptr) ptr() *gclink { 76 20ms 20ms return (*gclink)(unsafe.Pointer(p)) 77 . . } 78 . . 79 . . type stackfreelist struct { 80 . . list gclinkptr // linked list of free stacks 81 . . size uintptr // total size of stacks in list
runtime.getMCache
/usr/lib/go/src/runtime/mcache.go
Total: 90ms 90ms (flat, cum) 0.24% 125 . . // 126 . . // Returns nil if we're not bootstrapping or we don't have a P. The caller's 127 . . // P must not change, so we must be in a non-preemptible state. 128 . . func getMCache(mp *m) *mcache { 129 . . // Grab the mcache, since that's where stats live. 130 20ms 20ms pp := mp.p.ptr() 131 . . var c *mcache 132 10ms 10ms if pp == nil { 133 . . // We will be called without a P while bootstrapping, 134 . . // in which case we use mcache0, which is set in mallocinit. 135 . . // mcache0 is cleared when bootstrapping is complete, 136 . . // by procresize. 137 . . c = mcache0 138 . . } else { 139 60ms 60ms c = pp.mcache 140 . . } 141 . . return c 142 . . } 143 . .
runtime.(*mcache).refill
/usr/lib/go/src/runtime/mcache.go
Total: 70ms 1.73s (flat, cum) 4.61% 145 . . // have at least one free object. The current span in c must be full. 146 . . // 147 . . // Must run in a non-preemptible context since otherwise the owner of 148 . . // c could change. 149 10ms 10ms func (c *mcache) refill(spc spanClass) { 150 . . // Return the current cached span to the central lists. 151 . . s := c.alloc[spc] 152 . . 153 . . if s.allocCount != s.nelems { 154 . . throw("refill of span with free space remaining") 155 . . } 156 . . if s != &emptymspan { 157 . . // Mark this span as no longer cached. 158 20ms 20ms if s.sweepgen != mheap_.sweepgen+3 { 159 . . throw("bad sweepgen in refill") 160 . . } 161 . 530ms mheap_.central[spc].mcentral.uncacheSpan(s) 162 . . 163 . . // Count up how many slots were used and record it. 164 . 70ms stats := memstats.heapStats.acquire() 165 . . slotsUsed := int64(s.allocCount) - int64(s.allocCountBeforeCache) 166 20ms 20ms atomic.Xadd64(&stats.smallAllocCount[spc.sizeclass()], slotsUsed) 167 . . 168 . . // Flush tinyAllocs. 169 . . if spc == tinySpanClass { 170 . . atomic.Xadd64(&stats.tinyAllocCount, int64(c.tinyAllocs)) 171 . . c.tinyAllocs = 0 172 . . } 173 . 10ms memstats.heapStats.release() 174 . . 175 . . // Count the allocs in inconsistent, internal stats. 176 . . bytesAllocated := slotsUsed * int64(s.elemsize) 177 20ms 20ms gcController.totalAlloc.Add(bytesAllocated) return Xadd64(&u.value, delta) types.go:344 178 . . 179 . . // Clear the second allocCount just to be safe. 180 . . s.allocCountBeforeCache = 0 181 . . } 182 . . 183 . . // Get a new cached span from the central lists. 184 . 1.05s s = mheap_.central[spc].mcentral.cacheSpan() 185 . . if s == nil { 186 . . throw("out of memory") 187 . . } 188 . . 189 . . if s.allocCount == s.nelems {
runtime.(*mcache).refill
/usr/lib/go/src/runtime/mcache.go
Total: 20ms 90ms (flat, cum) 0.24% 209 . . // We pick an overestimate here because an underestimate leads 210 . . // the pacer to believe that it's in better shape than it is, 211 . . // which appears to lead to more memory used. See #53738 for 212 . . // more details. 213 . . usedBytes := uintptr(s.allocCount) * s.elemsize 214 10ms 80ms gcController.update(int64(s.npages*pageSize)-int64(usedBytes), int64(c.scanAlloc)) 215 . . c.scanAlloc = 0 216 . . 217 . . c.alloc[spc] = s 218 10ms 10ms } 219 . . 220 . . // allocLarge allocates a span for a large object. 221 . . func (c *mcache) allocLarge(size uintptr, noscan bool) *mspan { 222 . . if size+pageSize < size { 223 . . throw("out of memory")
runtime.(*mcache).releaseAll
/usr/lib/go/src/runtime/mcache.go
Total: 0 10ms (flat, cum) 0.027% 296 . . // recomputed since caching this span, so we don't do this for stale spans. 297 . . dHeapLive -= int64(s.nelems-s.allocCount) * int64(s.elemsize) 298 . . } 299 . . 300 . . // Release the span to the mcentral. 301 . 10ms mheap_.central[i].mcentral.uncacheSpan(s) 302 . . c.alloc[i] = &emptymspan 303 . . } 304 . . } 305 . . // Clear tinyalloc pool. 306 . . c.tiny = 0
runtime.(*mcache).prepareForSweep
/usr/lib/go/src/runtime/mcache.go
Total: 30ms 30ms (flat, cum) 0.08% 317 . . } 318 . . 319 . . // prepareForSweep flushes c if the system has entered a new sweep phase 320 . . // since c was populated. This must happen between the sweep phase 321 . . // starting and the first allocation from c. 322 10ms 10ms func (c *mcache) prepareForSweep() { 323 . . // Alternatively, instead of making sure we do this on every P 324 . . // between starting the world and allocating on that P, we 325 . . // could leave allocate-black on, allow allocation to continue 326 . . // as usual, use a ragged barrier at the beginning of sweep to 327 . . // ensure all cached spans are swept, and then disable 328 . . // allocate-black. However, with this approach it's difficult 329 . . // to avoid spilling mark bits into the *next* GC cycle. 330 10ms 10ms sg := mheap_.sweepgen 331 . . flushGen := c.flushGen.Load() 332 10ms 10ms if flushGen == sg { 333 . . return 334 . . } else if flushGen != sg-2 { 335 . . println("bad flushGen", flushGen, "in prepareForSweep; sweepgen", sg) 336 . . throw("bad flushGen") 337 . . }
runtime.makechan
/usr/lib/go/src/runtime/chan.go
Total: 0 60ms (flat, cum) 0.16% 94 . . // TODO(dvyukov,rlh): Rethink when collector can move allocated objects. 95 . . var c *hchan 96 . . switch { 97 . . case mem == 0: 98 . . // Queue or element size is zero. 99 . 10ms c = (*hchan)(mallocgc(hchanSize, nil, true)) 100 . . // Race detector uses this location for synchronization. 101 . . c.buf = c.raceaddr() 102 . . case !elem.Pointers(): 103 . . // Elements do not contain pointers. 104 . . // Allocate hchan and buf in one call. 105 . . c = (*hchan)(mallocgc(hchanSize+mem, nil, true)) 106 . . c.buf = add(unsafe.Pointer(c), hchanSize) 107 . . default: 108 . . // Elements contain pointers. 109 . 30ms c = new(hchan) 110 . 20ms c.buf = mallocgc(mem, elem, true) 111 . . } 112 . . 113 . . c.elemsize = uint16(elem.Size_) 114 . . c.elemtype = elem 115 . . c.dataqsiz = uint(size)
runtime.chansend1
/usr/lib/go/src/runtime/chan.go
Total: 20ms 660ms (flat, cum) 1.76% 155 . . } 156 . . 157 . . // entry point for c <- x from compiled code. 158 . . // 159 . . //go:nosplit 160 20ms 20ms func chansend1(c *hchan, elem unsafe.Pointer) { 161 . 640ms chansend(c, elem, true, sys.GetCallerPC()) 162 . . } 163 . . 164 . . /* 165 . . * generic single channel send/recv 166 . . * If block is not nil,
runtime.chansend
/usr/lib/go/src/runtime/chan.go
Total: 40ms 40ms (flat, cum) 0.11% 171 . . * sleep can wake up with g.param == nil 172 . . * when a channel involved in the sleep has 173 . . * been closed. it is easiest to loop and re-run 174 . . * the operation; we'll see that it's now closed. 175 . . */ 176 10ms 10ms func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { 177 . . if c == nil { 178 . . if !block { 179 . . return false 180 . . } 181 . . gopark(nil, nil, waitReasonChanSendNilChan, traceBlockForever, 2) 182 . . throw("unreachable") 183 . . } 184 . . 185 . . if debugChan { 186 . . print("chansend: chan=", c, "\n") 187 . . } 188 . . 189 . . if raceenabled { 190 . . racereadpc(c.raceaddr(), callerpc, abi.FuncPCABIInternal(chansend)) 191 . . } 192 . . 193 30ms 30ms if c.bubble != nil && getg().bubble != c.bubble { 194 . . fatal("send on synctest channel from outside bubble") 195 . . } 196 . . 197 . . // Fast path: check for failed non-blocking operation without acquiring the lock. 198 . . //
runtime.chansend
/usr/lib/go/src/runtime/chan.go
Total: 90ms 290ms (flat, cum) 0.77% 213 . . if !block && c.closed == 0 && full(c) { 214 . . return false 215 . . } 216 . . 217 . . var t0 int64 218 10ms 10ms if blockprofilerate > 0 { 219 . . t0 = cputicks() 220 . . } 221 . . 222 . 200ms lock(&c.lock) lockWithRank(l, getLockRank(l)) lock_spinbit.go:152 lock2(l) lockrank_off.go:24 223 . . 224 . . if c.closed != 0 { 225 . . unlock(&c.lock) 226 . . panic(plainError("send on closed channel")) 227 . . } 228 . . 229 80ms 80ms if sg := c.recvq.dequeue(); sg != nil { if y == nil { chan.go:893 ⋮ if sgp == nil { chan.go:889 ⋮ if !sgp.g.selectDone.CompareAndSwap(0, 1) { chan.go:911 return Cas(&u.value, old, new) types.go:236 ⋮ 230 . . // Found a waiting receiver. We pass the value we want to send
runtime.chansend.func1
/usr/lib/go/src/runtime/chan.go
Total: 0 200ms (flat, cum) 0.53% 231 . . // directly to the receiver, bypassing the channel buffer (if any). 232 . 200ms send(c, sg, ep, func() { unlock(&c.lock) }, 3) unlockWithRank(l) lock_spinbit.go:261 unlock2(l) lockrank_off.go:35 ⋮ 233 . . return true 234 . . } 235 . . 236 . . if c.qcount < c.dataqsiz {
runtime.chansend
/usr/lib/go/src/runtime/chan.go
Total: 20ms 110ms (flat, cum) 0.29% 238 . . qp := chanbuf(c, c.sendx) 239 . . if raceenabled { 240 . . racenotify(c, c.sendx, nil) 241 . . } 242 20ms 50ms typedmemmove(c.elemtype, qp, ep) 243 . . c.sendx++ 244 . . if c.sendx == c.dataqsiz { 245 . . c.sendx = 0 246 . . } 247 . . c.qcount++ 248 . 50ms unlock(&c.lock) unlockWithRank(l) lock_spinbit.go:261 unlock2(l) lockrank_off.go:35 249 . . return true 250 . . } 251 . . 252 . . if !block { 253 . . unlock(&c.lock) 254 . . return false 255 . . } 256 . . 257 . . // Block on the channel. Some receiver will complete our operation for us. 258 . . gp := getg() 259 . 10ms mysg := acquireSudog() 260 . . mysg.releasetime = 0 261 . . if t0 != 0 { 262 . . mysg.releasetime = -1 263 . . } 264 . . // No stack splits between assigning elem and enqueuing mysg
runtime.chansend
/usr/lib/go/src/runtime/chan.go
Total: 20ms 20ms (flat, cum) 0.053% 286 . . // stack object, but sudogs aren't considered as roots of the 287 . . // stack tracer. 288 . . KeepAlive(ep) 289 . . 290 . . // someone woke us up. 291 20ms 20ms if mysg != gp.waiting { 292 . . throw("G waiting list is corrupted") 293 . . } 294 . . gp.waiting = nil 295 . . gp.activeStackChans = false 296 . . closed := !mysg.success
runtime.send
/usr/lib/go/src/runtime/chan.go
Total: 0 180ms (flat, cum) 0.48% 339 . . if sg.elem != nil { 340 . . sendDirect(c.elemtype, sg, ep) 341 . . sg.elem = nil 342 . . } 343 . . gp := sg.g 344 . 20ms unlockf() 345 . . gp.param = unsafe.Pointer(sg) 346 . . sg.success = true 347 . . if sg.releasetime != 0 { 348 . . sg.releasetime = cputicks() 349 . . } 350 . 160ms goready(gp, skip+1) systemstack(func() { proc.go:480 351 . . } 352 . . 353 . . // timerchandrain removes all elements in channel c's buffer. 354 . . // It reports whether any elements were removed. 355 . . // Because it is only intended for timers, it does not
runtime.closechan
/usr/lib/go/src/runtime/chan.go
Total: 10ms 20ms (flat, cum) 0.053% 417 . . } 418 . . if c.bubble != nil && getg().bubble != c.bubble { 419 . . fatal("close of synctest channel from outside bubble") 420 . . } 421 . . 422 . 10ms lock(&c.lock) lockWithRank(l, getLockRank(l)) lock_spinbit.go:152 lock2(l) lockrank_off.go:24 423 . . if c.closed != 0 { 424 . . unlock(&c.lock) 425 . . panic(plainError("close of closed channel")) 426 . . } 427 . . 428 . . if raceenabled { 429 . . callerpc := sys.GetCallerPC() 430 . . racewritepc(c.raceaddr(), callerpc, abi.FuncPCABIInternal(closechan)) 431 . . racerelease(c.raceaddr()) 432 . . } 433 . . 434 . . c.closed = 1 435 . . 436 . . var glist gList 437 . . 438 . . // release all readers 439 . . for { 440 10ms 10ms sg := c.recvq.dequeue() if !sgp.g.selectDone.CompareAndSwap(0, 1) { chan.go:911 return Cas(&u.value, old, new) types.go:236 441 . . if sg == nil { 442 . . break 443 . . } 444 . . if sg.elem != nil { 445 . . typedmemclr(c.elemtype, sg.elem)
runtime.recv
/usr/lib/go/src/runtime/chan.go
Total: 0 30ms (flat, cum) 0.08% 722 . . racenotify(c, c.recvx, nil) 723 . . racenotify(c, c.recvx, sg) 724 . . } 725 . . // copy data from queue to receiver 726 . . if ep != nil { 727 . 10ms typedmemmove(c.elemtype, ep, qp) 728 . . } 729 . . // copy data from sender to queue 730 . . typedmemmove(c.elemtype, qp, sg.elem) 731 . . c.recvx++ 732 . . if c.recvx == c.dataqsiz { 733 . . c.recvx = 0 734 . . } 735 . . c.sendx = c.recvx // c.sendx = (c.sendx+1) % c.dataqsiz 736 . . } 737 . . sg.elem = nil 738 . . gp := sg.g 739 . 10ms unlockf() 740 . . gp.param = unsafe.Pointer(sg) 741 . . sg.success = true 742 . . if sg.releasetime != 0 { 743 . . sg.releasetime = cputicks() 744 . . } 745 . 10ms goready(gp, skip+1) systemstack(func() { proc.go:480 746 . . }
runtime.chanparkcommit
/usr/lib/go/src/runtime/chan.go
Total: 10ms 10ms (flat, cum) 0.027% 747 . . 748 10ms 10ms func chanparkcommit(gp *g, chanLock unsafe.Pointer) bool { 749 . . // There are unlocked sudogs that point into gp's stack. Stack 750 . . // copying must lock the channels of those sudogs. 751 . . // Set activeStackChans here instead of before we try parking 752 . . // because we could self-deadlock in stack growth on the 753 . . // channel lock.
runtime.(*waitq).enqueue
/usr/lib/go/src/runtime/chan.go
Total: 10ms 10ms (flat, cum) 0.027% 868 . . func reflect_chanclose(c *hchan) { 869 . . closechan(c) 870 . . } 871 . . 872 . . func (q *waitq) enqueue(sgp *sudog) { 873 10ms 10ms sgp.next = nil 874 . . x := q.last 875 . . if x == nil { 876 . . sgp.prev = nil 877 . . q.first = sgp 878 . . q.last = sgp
runtime.(*waitq).dequeue
/usr/lib/go/src/runtime/chan.go
Total: 90ms 90ms (flat, cum) 0.24% 884 . . } 885 . . 886 . . func (q *waitq) dequeue() *sudog { 887 . . for { 888 . . sgp := q.first 889 10ms 10ms if sgp == nil { 890 . . return nil 891 . . } 892 . . y := sgp.next 893 40ms 40ms if y == nil { 894 . . q.first = nil 895 . . q.last = nil 896 . . } else { 897 . . y.prev = nil 898 . . q.first = y 899 . . sgp.next = nil // mark as removed (see dequeueSudoG) 900 . . } 901 . . 902 . . // if a goroutine was put on this queue because of a 903 . . // select, there is a small window between the goroutine 904 . . // being woken up by a different case and it grabbing the 905 . . // channel locks. Once it has the lock 906 . . // it removes itself from the queue, so we won't see it after that. 907 . . // We use a flag in the G struct to tell us when someone 908 . . // else has won the race to signal this goroutine but the goroutine 909 . . // hasn't removed itself from the queue yet. 910 . . if sgp.isSelect { 911 40ms 40ms if !sgp.g.selectDone.CompareAndSwap(0, 1) { return Cas(&u.value, old, new) types.go:236 ⋮ ⋮ 912 . . // We lost the race to wake this goroutine. 913 . . continue 914 . . } 915 . . } 916 . .
internal/bytealg.IndexByte
/usr/lib/go/src/internal/bytealg/indexbyte_arm64.s
Total: 10ms 10ms (flat, cum) 0.027% 11 . . // R2: b cap (unused) 12 . . // R3: c byte to search 13 . . // return 14 . . // R0: result 15 . . TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT,$0-40 16 10ms 10ms MOVD R3, R2 17 . . B ·IndexByteString<ABIInternal>(SB) 18 . . 19 . . // func IndexByteString(s string, c byte) int 20 . . // input: 21 . . // R0: s ptr
internal/bytealg.IndexByteString
/usr/lib/go/src/internal/bytealg/indexbyte_arm64.s
Total: 200ms 200ms (flat, cum) 0.53% 38 . . // Magic constant 0x40100401 allows us to identify 39 . . // which lane matches the requested byte. 40 . . // 0x40100401 = ((1<<0) + (4<<8) + (16<<16) + (64<<24)) 41 . . // Different bytes have different bit masks (i.e: 1, 4, 16, 64) 42 . . MOVD $0x40100401, R5 43 30ms 30ms VMOV R2, V0.B16 44 . . // Work with aligned 32-byte chunks 45 . . BIC $0x1f, R0, R3 46 . . VMOV R5, V5.S4 47 . . ANDS $0x1f, R0, R9 48 . . AND $0x1f, R1, R10 49 . . BEQ loop 50 . . 51 . . // Input string is not 32-byte aligned. We calculate the 52 . . // syndrome value for the aligned 32 bytes block containing 53 . . // the first bytes and mask off the irrelevant part. 54 . . VLD1.P (R3), [V1.B16, V2.B16] 55 80ms 80ms SUB $0x20, R9, R4 56 . . ADDS R4, R1, R1 57 . . VCMEQ V0.B16, V1.B16, V3.B16 58 . . VCMEQ V0.B16, V2.B16, V4.B16 59 . . VAND V5.B16, V3.B16, V3.B16 60 . . VAND V5.B16, V4.B16, V4.B16 61 . . VADDP V4.B16, V3.B16, V6.B16 // 256->128 62 . . VADDP V6.B16, V6.B16, V6.B16 // 128->64 63 . . VMOV V6.D[0], R6 64 . . // Clear the irrelevant lower bits 65 . . LSL $1, R9, R4 66 . . LSR R4, R6, R6 67 . . LSL R4, R6, R6 68 . . // The first block can also be the last 69 . . BLS masklast 70 . . // Have we found something already? 71 . . CBNZ R6, tail 72 . . 73 . . loop: 74 20ms 20ms VLD1.P (R3), [V1.B16, V2.B16] 75 40ms 40ms SUBS $0x20, R1, R1 76 . . VCMEQ V0.B16, V1.B16, V3.B16 77 . . VCMEQ V0.B16, V2.B16, V4.B16 78 . . // If we're out of data we finish regardless of the result 79 . . BLS end 80 . . // Use a fast check for the termination condition 81 . . VORR V4.B16, V3.B16, V6.B16 82 . . VADDP V6.D2, V6.D2, V6.D2 83 . . VMOV V6.D[0], R6 84 . . // We're not out of data, loop if we haven't found the character 85 . . CBZ R6, loop 86 . . 87 . . end: 88 . . // Termination condition found, let's calculate the syndrome value 89 10ms 10ms VAND V5.B16, V3.B16, V3.B16 90 . . VAND V5.B16, V4.B16, V4.B16 91 10ms 10ms VADDP V4.B16, V3.B16, V6.B16 92 . . VADDP V6.B16, V6.B16, V6.B16 93 . . VMOV V6.D[0], R6 94 . . // Only do the clear for the last possible block with less than 32 bytes 95 . . // Condition flags come from SUBS in the loop 96 . . BHS tail 97 . . 98 . . masklast: 99 . . // Clear the irrelevant upper bits 100 . . ADD R9, R10, R4 101 10ms 10ms AND $0x1f, R4, R4 102 . . SUB $0x20, R4, R4 103 . . NEG R4<<1, R4 104 . . LSL R4, R6, R6 105 . . LSR R4, R6, R6 106 . .
internal/bytealg.IndexByteString
/usr/lib/go/src/internal/bytealg/indexbyte_arm64.s
Total: 10ms 10ms (flat, cum) 0.027% 118 . . // Compute the offset result 119 . . SUB R11, R0, R0 120 . . RET 121 . . 122 . . fail: 123 10ms 10ms MOVD $-1, R0 124 . . RET
git.urbach.dev/cli/q/src/codegen.(*Function).findFreeRegister
/home/user/q/src/codegen/findFreeRegister.go
Total: 40ms 40ms (flat, cum) 0.11% 39 . . } 40 . . 41 . . for _, current := range f.Steps { 42 . . // These checks need to happen regardless of whether the value is alive after execution. 43 . . // If it is used as an operand, the operand restrictions of the architecture apply. 44 40ms 40ms binaryOp, isBinaryOp := current.Value.(*ssa.BinaryOp) 45 . . 46 . . if isBinaryOp && !binaryOp.Op.IsComparison() { 47 . . switch f.build.Arch { 48 . . case config.ARM: 49 . . if current.Register != -1 && binaryOp.Op == token.Mod {
git.urbach.dev/cli/q/src/codegen.(*Function).findFreeRegister
/home/user/q/src/codegen/findFreeRegister.go
Total: 170ms 170ms (flat, cum) 0.45% 77 . . } 78 . . } 79 . . } 80 . . 81 . . // If it's not alive in this step, ignore it. 82 50ms 50ms if !slices.Contains(current.Live, step) { return Index(s, v) >= 0 slices.go:118 if v == s[i] { slices.go:98 ⋮ ⋮ ⋮ return Index(s, v) >= 0 slices.go:118 for i := range s { slices.go:97 83 . . continue 84 . . } 85 . . 86 . . // Mark all the neighbor registers that are alive 87 . . // at the same time as used. 88 20ms 20ms for _, live := range current.Live { 89 10ms 10ms if live.Register == -1 { 90 . . continue 91 . . } 92 . . 93 . . usedRegisters |= (1 << live.Register) 94 . . } 95 . . 96 . . // Ignore the definition itself. 97 . . if current == step { 98 . . continue 99 . . } 100 . . 101 . . // Find all the registers that this instruction 102 . . // would clobber and mark them as used. 103 . . var clobbered []cpu.Register 104 . . 105 50ms 50ms switch instr := current.Value.(type) { 106 . . case *ssa.BinaryOp: 107 . . switch instr.Op { 108 . . case token.Div, token.Mod: 109 . . clobbered = f.CPU.DivisionClobbered 110 . . case token.Shl, token.Shr: 111 . . clobbered = f.CPU.ShiftClobbered 112 . . } 113 . . case *ssa.Call: 114 . . clobbered = f.CPU.Call.Clobbered 115 . . case *ssa.CallExtern: 116 . . clobbered = f.CPU.ExternCall.Clobbered 117 10ms 10ms case *ssa.FromTuple: 118 . . usedRegisters |= (1 << f.CPU.Call.Out[instr.Index]) 119 . . case *ssa.Parameter: 120 . . usedRegisters |= (1 << f.CPU.Call.In[instr.Index]) 121 10ms 10ms case *ssa.Syscall: 122 . . clobbered = f.CPU.Syscall.Clobbered 123 . . } 124 . . 125 . . for _, reg := range clobbered { 126 10ms 10ms usedRegisters |= (1 << reg) 127 . . } 128 . . } 129 . . 130 . . // Pick one of the register hints if possible. 131 10ms 10ms for _, reg := range step.Hints { 132 . . if usedRegisters&(1<<reg) == 0 { 133 . . return reg 134 . . } 135 . . } 136 . .
git.urbach.dev/cli/q/src/codegen..markAlive.Backward[go.shape.[]*git.urbach.dev/cli/q/src/codegen.Step,go.shape.*uint8].func1
/usr/lib/go/src/slices/iter.go
Total: 210ms 1.50s (flat, cum) 4.00% 24 . . // Backward returns an iterator over index-value pairs in the slice, 25 . . // traversing it backward with descending indices. 26 . . func Backward[Slice ~[]E, E any](s Slice) iter.Seq2[int, E] { 27 . . return func(yield func(int, E) bool) { 28 . . for i := len(s) - 1; i >= 0; i-- { 29 210ms 1.50s if !yield(i, s[i]) { current.Live = append(current.Live, live) markAlive.go:36 ⋮ if existing.IsConst() && instr.Equals(existing) { Block.go:166 ⋮ ⋮ for _, current := range slices.Backward(steps) { markAlive.go:31 ⋮ f.hintABI(step) createSteps.go:50 ⋮ if existing.IsConst() && instr.Equals(existing) { Block.go:166 ⋮ f.createLiveRanges(step) createSteps.go:51 ⋮ if slices.Contains(current.Live, live) { markAlive.go:32 ⋮ ⋮ f.assignFreeRegister(step) createSteps.go:56 step.Register = f.findFreeRegister(step) assignFreeRegister.go:6 ⋮ if existing.IsConst() && instr.Equals(existing) { Block.go:166 ⋮ ⋮ switch existing.(type) { Block.go:172 ⋮ if existing.IsConst() && instr.Equals(existing) { Block.go:166 ⋮ if slices.Contains(current.Live, live) { markAlive.go:32 return Index(s, v) >= 0 slices.go:118 if v == s[i] { slices.go:98 ⋮ ⋮ if slices.Contains(current.Live, live) { markAlive.go:32 ⋮ current.Live = append(current.Live, live) markAlive.go:36 ⋮ if slices.Contains(current.Live, live) { markAlive.go:32 return Index(s, v) >= 0 slices.go:118 for i := range s { slices.go:97 ⋮ if step.Register == -1 && f.needsRegister(step) { createSteps.go:55 ⋮ switch existing.(type) { Block.go:172 ⋮ f.hintDestination(step) createSteps.go:59 ⋮ if live.Value == current.Value { markAlive.go:38 ⋮ if step.Register == -1 && f.needsRegister(step) { createSteps.go:55 30 . . return 31 . . } 32 . . } 33 . . } 34 . . }
git.urbach.dev/cli/q/src/core.(*Function).compileInputs
/home/user/q/src/core/compileInputs.go
Total: 190ms 840ms (flat, cum) 2.24% 9 . . 10 . . // compileInputs registers every input as an identifier. 11 . . func (f *Function) compileInputs() { 12 . . offset := 0 13 . . 14 20ms 20ms for i, input := range f.Input { 15 60ms 60ms structType, isStructType := types.Unwrap(input.Typ).(*types.Struct) ⋮ ⋮ resource, isResource := wrapped.(*Resource) Unwrap.go:5 16 . . 17 . . if isStructType { 18 10ms 10ms if strings.HasPrefix(input.Name, "_") { return stringslite.HasPrefix(s, prefix) strings.go:521 return len(s) >= len(prefix) && s[:len(prefix)] == prefix strings.go:17 19 . . offset += len(structType.Fields) - 1 20 . . continue 21 . . } 22 . . 23 . 50ms structure := &ssa.Struct{ 24 . . Typ: structType, 25 . . Source: input.Source, 26 . . } 27 . . 28 . . for _, field := range structType.Fields { 29 . 50ms param := &ssa.Parameter{ 30 . . Index: uint8(offset + i), 31 20ms 120ms Name: input.Name + "." + field.Name, 32 . . Typ: field.Type, 33 . . Tokens: input.Tokens, 34 . . Structure: structure, 35 . . Source: input.Source, 36 . . } 37 . . 38 . 50ms f.Append(param) 39 . 20ms structure.Arguments = append(structure.Arguments, param) 40 . . offset++ 41 . . } 42 . . 43 . . offset-- 44 10ms 100ms f.Block().Identify(input.Name, structure) b.Identifiers = make(map[string]Value, 8) Block.go:204 ⋮ b.Identifiers[name] = value Block.go:207 ⋮ 45 . . continue 46 . . } 47 . . 48 30ms 30ms if strings.HasPrefix(input.Name, "_") { return stringslite.HasPrefix(s, prefix) strings.go:521 return len(s) >= len(prefix) && s[:len(prefix)] == prefix strings.go:17 49 . . continue 50 . . } 51 . . 52 . . input.Index = uint8(offset + i) 53 40ms 280ms f.Block().Identify(input.Name, input) if b.Identifiers == nil { Block.go:203 ⋮ b.Identifiers[name] = value Block.go:207 ⋮ return ir.Blocks[len(ir.Blocks)-1] IR.go:27 ⋮ b.Identifiers = make(map[string]Value, 8) Block.go:204 54 . 50ms f.Append(input) 55 . . } 56 . . }
git.urbach.dev/cli/q/src/compiler.Compile
/home/user/q/src/compiler/Compile.go
Total: 0 1.31s (flat, cum) 3.49% 7 . . "git.urbach.dev/cli/q/src/scanner" 8 . . ) 9 . . 10 . . // Compile waits for the scan to finish and compiles all functions. 11 . . func Compile(build *config.Build) (*core.Environment, error) { 12 . 1.31s env, err := scanner.Scan(build) 13 . . 14 . . if err != nil { 15 . . return nil, err 16 . . } 17 . .
git.urbach.dev/cli/q/src/compiler.Compile
/home/user/q/src/compiler/Compile.go
Total: 110ms 430ms (flat, cum) 1.15% 35 . . 36 . . // Parse struct field types and calculate the size of all structs. 37 . . // We couldn't do that during the scan phase because it's possible 38 . . // that a field references a type that will only be known after the 39 . . // full scan is finished. 40 . 30ms err = parseFieldTypes(env.Structs(), env) for structure := range structs { parseFieldTypes.go:13 for _, structure := range pkg.Structs { Environment.go:95 ⋮ for _, pkg := range env.Packages { Environment.go:94 41 . . 42 . . if err != nil { 43 . . return nil, err 44 . . } 45 . . 46 . . // Parse input and output types so we have type information 47 . . // ready for all functions before parallel compilation starts. 48 . . // This ensures that the function compilers have access to 49 . . // type checking for all function calls. 50 110ms 400ms err = parseTypes(env.Functions(), env) for f := range functions { parseTypes.go:14 for variant := range fn.Variants { Environment.go:65 if !yield(f) { Function.go:80 if !yield(variant) { Environment.go:66 Input: make([]types.Type, len(f.Input)), parseTypes.go:16 ⋮ input.Name = input.Tokens[0].String(f.File.Bytes) parseTypes.go:21 ⋮ f.AddSuffix(suffix.String()) parseTypes.go:64 f.name += suffix Function.go:48 ⋮ for i, output := range f.Output { parseTypes.go:32 ⋮ typeTokens := output.Tokens parseTypes.go:33 ⋮ for _, fn := range pkg.Functions { Environment.go:64 ⋮ ⋮ for variant := range fn.Variants { Environment.go:65 if !yield(f) { Function.go:80 if !yield(variant) { Environment.go:66 if f.Previous != nil || f.Next != nil { parseTypes.go:51 ⋮ } parseTypes.go:66 ⋮ for f := range functions { parseTypes.go:14 for variant := range fn.Variants { Environment.go:65 if !yield(f) { Function.go:80 if !yield(variant) { Environment.go:66 f.Type = &types.Function{ parseTypes.go:15 ⋮ typ, err := core.TypeFromTokens(input.Tokens[1:], f.File, env) parseTypes.go:22 ⋮ input.Name = input.Tokens[0].String(f.File.Bytes) parseTypes.go:21 ⋮ for i, input := range f.Input { parseTypes.go:55 ⋮ Input: make([]types.Type, len(f.Input)), parseTypes.go:16 ⋮ Output: make([]types.Type, len(f.Output)), parseTypes.go:17 ⋮ for _, fn := range pkg.Functions { Environment.go:64 51 . . 52 . . if err != nil { 53 . . return nil, err 54 . . }
git.urbach.dev/cli/q/src/compiler.Compile.func1
/home/user/q/src/compiler/Compile.go
Total: 0 11.15s (flat, cum) 29.70% 55 . . 56 . . // Start parallel compilation of all functions. 57 . . // We compile every function for syntax checks even if 58 . . // they are thrown away later during dead code elimination. 59 . 11.15s parallel(env.Functions(), func(f *core.Function) { f.Compile() }) 60 . .
git.urbach.dev/cli/q/src/compiler.Compile
/home/user/q/src/compiler/Compile.go
Total: 30ms 40ms (flat, cum) 0.11% 61 . . // Report errors if any occurred 62 30ms 40ms for f := range env.Functions() { for _, fn := range pkg.Functions { Environment.go:64 ⋮ for _, pkg := range env.Packages { Environment.go:63 ⋮ for variant := range fn.Variants { Environment.go:65 if !yield(f) { Function.go:80 if !yield(variant) { Environment.go:66 if f.Err != nil { Compile.go:63
git.urbach.dev/cli/q/src/compiler.Compile-range1
/home/user/q/src/compiler/Compile.go
Total: 20ms 20ms (flat, cum) 0.053% 63 20ms 20ms if f.Err != nil { 64 . . return nil, f.Err
git.urbach.dev/cli/q/src/compiler.Compile
/home/user/q/src/compiler/Compile.go
Total: 20ms 20ms (flat, cum) 0.053% 65 . . } 66 10ms 10ms } 67 . . 68 . . // Check for unused imports in all files 69 10ms 10ms for _, file := range env.Files { 70 . . for _, imp := range file.Imports { 71 . . if imp.Used.Load() == 0 { 72 . . return nil, errors.New(&UnusedImport{Package: imp.Package}, file, imp.Position) 73 . . }
git.urbach.dev/cli/q/src/compiler.Compile.func2
/home/user/q/src/compiler/Compile.go
Total: 10ms 4.23s (flat, cum) 11.27% 75 . . } 76 . . 77 . . // Now that we know which functions are alive, start parallel 78 . . // assembly code generation only for the live functions. 79 10ms 4.23s parallel(env.LiveFunctions(), func(f *core.Function) { f.Assemble() }) 80 . . 81 . . return env, nil 82 . . }
internal/filepathlite.(*lazybuf).append
/usr/lib/go/src/internal/filepathlite/path.go
Total: 50ms 50ms (flat, cum) 0.13% 35 . . } 36 . . return b.path[i] 37 . . } 38 . . 39 . . func (b *lazybuf) append(c byte) { 40 10ms 10ms if b.buf == nil { 41 30ms 30ms if b.w < len(b.path) && b.path[b.w] == c { 42 10ms 10ms b.w++ 43 . . return 44 . . } 45 . . b.buf = make([]byte, len(b.path)) 46 . . copy(b.buf, b.path[:b.w]) 47 . . }
internal/filepathlite.(*lazybuf).string
/usr/lib/go/src/internal/filepathlite/path.go
Total: 10ms 10ms (flat, cum) 0.027% 53 . . b.buf = slices.Insert(b.buf, 0, prefix...) 54 . . b.w += len(prefix) 55 . . } 56 . . 57 . . func (b *lazybuf) string() string { 58 10ms 10ms if b.buf == nil { 59 . . return b.volAndPath[:b.volLen+b.w] 60 . . } 61 . . return b.volAndPath[:b.volLen] + string(b.buf[:b.w])
internal/filepathlite.Clean
/usr/lib/go/src/internal/filepathlite/path.go
Total: 40ms 40ms (flat, cum) 0.11% 62 . . } 63 . . 64 . . // Clean is filepath.Clean. 65 30ms 30ms func Clean(path string) string { 66 . . originalPath := path 67 . . volLen := volumeNameLen(path) 68 . . path = path[volLen:] 69 . . if path == "" { 70 . . if volLen > 1 && IsPathSeparator(originalPath[0]) && IsPathSeparator(originalPath[1]) { 71 . . // should be UNC 72 . . return FromSlash(originalPath) 73 . . } 74 . . return originalPath + "." 75 . . } 76 10ms 10ms rooted := IsPathSeparator(path[0]) return Separator == c path_unix.go:20 77 . . 78 . . // Invariants: 79 . . // reading from path; r is index of next byte to process. 80 . . // writing to buf; w is index of next byte to write. 81 . . // dotdot is index in buf where .. must stop, either because
internal/filepathlite.Clean
/usr/lib/go/src/internal/filepathlite/path.go
Total: 100ms 100ms (flat, cum) 0.27% 117 . . } 118 . . default: 119 . . // real path element. 120 . . // add slash if needed 121 . . if rooted && out.w != 1 || !rooted && out.w != 0 { 122 20ms 20ms out.append(Separator) if b.w < len(b.path) && b.path[b.w] == c { path.go:41 ⋮ if b.buf == nil { path.go:40 123 . . } 124 . . // copy element 125 30ms 30ms for ; r < n && !IsPathSeparator(path[r]); r++ { ⋮ ⋮ return Separator == c path_unix.go:20 126 40ms 40ms out.append(path[r]) ⋮ b.w++ path.go:42 ⋮ if b.w < len(b.path) && b.path[b.w] == c { path.go:41 ⋮ 127 . . } 128 . . } 129 . . } 130 . . 131 . . // Turn empty string into "." 132 . . if out.w == 0 { 133 . . out.append('.') 134 . . } 135 . . 136 . . postClean(&out) // avoid creating absolute paths on Windows 137 10ms 10ms return FromSlash(out.string()) if b.buf == nil { path.go:58 138 . . } 139 . . 140 . . // IsLocal is filepath.IsLocal. 141 . . func IsLocal(path string) bool { 142 . . return isLocal(path)
internal/filepathlite.Base
/usr/lib/go/src/internal/filepathlite/path.go
Total: 30ms 30ms (flat, cum) 0.08% 220 . . } 221 . . return "" 222 . . } 223 . . 224 . . // Base is filepath.Base. 225 10ms 10ms func Base(path string) string { 226 . . if path == "" { 227 . . return "." 228 . . } 229 . . // Strip trailing slashes. 230 . . for len(path) > 0 && IsPathSeparator(path[len(path)-1]) { 231 . . path = path[0 : len(path)-1] 232 . . } 233 . . // Throw away volume name 234 . . path = path[len(VolumeName(path)):] 235 . . // Find the last element 236 . . i := len(path) - 1 237 10ms 10ms for i >= 0 && !IsPathSeparator(path[i]) { 238 10ms 10ms i-- 239 . . } 240 . . if i >= 0 { 241 . . path = path[i+1:] 242 . . } 243 . . // If empty now, it had only slashes.
runtime.memmove
/usr/lib/go/src/runtime/memmove_arm64.s
Total: 100ms 100ms (flat, cum) 0.27% 25 . . // The destination pointer is 16-byte aligned to minimize unaligned accesses. 26 . . // The loop tail is handled by always copying 64 bytes from the end. 27 . . 28 . . // func memmove(to, from unsafe.Pointer, n uintptr) 29 . . TEXT runtime·memmove<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-24 30 10ms 10ms CBZ R2, copy0 31 . . 32 . . // Small copies: 1..16 bytes 33 . . CMP $16, R2 34 . . BLE copy16 35 . . 36 . . // Large copies 37 10ms 10ms CMP $128, R2 38 . . BHI copy_long 39 . . CMP $32, R2 40 . . BHI copy32_128 41 . . 42 . . // Small copies: 17..32 bytes. 43 . . LDP (R1), (R6, R7) 44 . . ADD R1, R2, R4 // R4 points just past the last source byte 45 . . LDP -16(R4), (R12, R13) 46 . . STP (R6, R7), (R0) 47 . . ADD R0, R2, R5 // R5 points just past the last destination byte 48 . . STP (R12, R13), -16(R5) 49 . . RET 50 . . 51 . . // Small copies: 1..16 bytes. 52 . . copy16: 53 . . ADD R1, R2, R4 // R4 points just past the last source byte 54 . . ADD R0, R2, R5 // R5 points just past the last destination byte 55 . . CMP $8, R2 56 . . BLT copy7 57 10ms 10ms MOVD (R1), R6 58 . . MOVD -8(R4), R7 59 . . MOVD R6, (R0) 60 20ms 20ms MOVD R7, -8(R5) 61 . . RET 62 . . 63 . . copy7: 64 . . TBZ $2, R2, copy3 65 . . MOVWU (R1), R6 66 20ms 20ms MOVWU -4(R4), R7 67 . . MOVW R6, (R0) 68 . . MOVW R7, -4(R5) 69 . . RET 70 . . 71 . . copy3: 72 . . TBZ $1, R2, copy1 73 . . MOVHU (R1), R6 74 . . MOVHU -2(R4), R7 75 10ms 10ms MOVH R6, (R0) 76 . . MOVH R7, -2(R5) 77 . . RET 78 . . 79 . . copy1: 80 10ms 10ms MOVBU (R1), R6 81 10ms 10ms MOVB R6, (R0) 82 . . 83 . . copy0: 84 . . RET 85 . . 86 . . // Medium copies: 33..128 bytes.
runtime.memmove
/usr/lib/go/src/runtime/memmove_arm64.s
Total: 10ms 10ms (flat, cum) 0.027% 119 . . STP (R12, R13), -16(R5) 120 . . RET 121 . . 122 . . // Copy more than 128 bytes. 123 . . copy_long: 124 10ms 10ms ADD R1, R2, R4 // R4 points just past the last source byte 125 . . ADD R0, R2, R5 // R5 points just past the last destination byte 126 . . MOVD ZR, R7 127 . . MOVD ZR, R8 128 . . 129 . . CMP $1024, R2
runtime.memmove
/usr/lib/go/src/runtime/memmove_arm64.s
Total: 10ms 10ms (flat, cum) 0.027% 143 . . // When doing aligned stores, R7 is the dst pointer and R8 is 144 . . // the dstend pointer. 145 . . 146 . . backward_check: 147 . . // Use backward copy if there is an overlap. 148 10ms 10ms SUB R1, R0, R14 149 . . CBZ R14, copy0 150 . . CMP R2, R14 151 . . BCC copy_long_backward 152 . . 153 . . // Copy 16 bytes and then align src (R1) or dst (R0) to 16-byte alignment.
runtime.memmove
/usr/lib/go/src/runtime/memmove_arm64.s
Total: 50ms 50ms (flat, cum) 0.13% 169 . . STP (R6, R7), 16(R3) // Store B 170 . . LDP 16(R1), (R6, R7) // Load B (next iteration) 171 . . STP (R8, R9), 32(R3) // Store C 172 . . LDP 32(R1), (R8, R9) // Load C 173 . . STP (R10, R11), 48(R3) // Store D 174 20ms 20ms LDP 48(R1), (R10, R11) // Load D 175 10ms 10ms STP.W (R12, R13), 64(R3) // Store E 176 10ms 10ms LDP.W 64(R1), (R12, R13) // Load E 177 10ms 10ms SUBS $64, R2, R2 178 . . BHI loop64 179 . . 180 . . // Write the last iteration and copy 64 bytes from the end. 181 . . copy64_from_end: 182 . . LDP -64(R4), (R14, R15) // Load F
git.urbach.dev/cli/q/src/token.List.Instructions
/home/user/q/src/token/Instructions.go
Total: 170ms 6.09s (flat, cum) 16.22% 4 . . func (list List) Instructions(yield func(List) bool) { 5 . . start := 0 6 . . groupLevel := 0 7 . . blockLevel := 0 8 . . 9 70ms 70ms for i, t := range list { 10 30ms 30ms switch t.Kind { 11 . . case NewLine: 12 10ms 10ms if start == i { 13 . . start = i + 1 14 . . continue 15 . . } 16 . . 17 10ms 10ms if groupLevel > 0 || blockLevel > 0 { 18 . . continue 19 . . } 20 . . 21 . 3.84s if !yield(list[start:i]) { 22 . . return 23 . . } 24 . . 25 . . start = i + 1 26 . . 27 . . case GroupStart: 28 . . groupLevel++ 29 . . 30 . . case GroupEnd: 31 10ms 10ms groupLevel-- 32 . . 33 10ms 10ms case BlockStart: 34 . . blockLevel++ 35 . . 36 10ms 10ms case BlockEnd: 37 . . blockLevel-- 38 . . 39 10ms 10ms if groupLevel > 0 || blockLevel > 0 { 40 . . continue 41 . . } 42 . . 43 . . switch list[start].Kind { 44 . . case Const, Else, Extern, If, Loop, Switch: 45 . . default: 46 . . continue 47 . . } 48 . . 49 . 1.98s if !yield(list[start : i+1]) { 50 . . return 51 . . } 52 . . 53 . . start = i + 1 54 . . 55 . . case EOF: 56 . . if start < i { 57 . . yield(list[start:i]) 58 . . } 59 . . 60 . . return 61 . . } 62 . . } 63 . . 64 . . if start < len(list) { 65 10ms 110ms yield(list[start:]) 66 . . } 67 . . }
runtime.mapaccess1
/usr/lib/go/src/internal/runtime/maps/runtime_swiss.go
Total: 70ms 150ms (flat, cum) 0.4% 60 . . 61 . . if m.writing != 0 { 62 . . fatal("concurrent map read and map write") 63 . . } 64 . . 65 30ms 80ms hash := typ.Hasher(key, m.seed) 66 . . 67 20ms 20ms if m.dirLen <= 0 { 68 . 10ms _, elem, ok := m.getWithKeySmall(typ, hash, key) 69 . . if !ok { 70 . . return unsafe.Pointer(&zeroVal[0]) 71 . . } 72 . . return elem 73 . . } 74 . . 75 . . // Select table. 76 . . idx := m.directoryIndex(hash) 77 . . t := m.directoryAt(idx) 78 . . 79 . . // Probe table. 80 . . seq := makeProbeSeq(h1(hash), t.groups.lengthMask) 81 . . for ; ; seq = seq.next() { 82 . . g := t.groups.group(typ, seq.offset) 83 . . 84 10ms 10ms match := g.ctrls().matchH2(h2(hash)) return ctrlGroupMatchH2(g, h) group.go:154 v := uint64(g) ^ (bitsetLSB * uint64(h)) group.go:170 85 . . 86 . . for match != 0 { 87 . . i := match.first() 88 . . 89 . . slotKey := g.key(typ, i) 90 . . slotKeyOrig := slotKey 91 . . if typ.IndirectKey() { 92 . . slotKey = *((*unsafe.Pointer)(slotKey)) 93 . . } 94 10ms 30ms if typ.Key.Equal(key, slotKey) { 95 . . slotElem := unsafe.Pointer(uintptr(slotKeyOrig) + typ.ElemOff) 96 . . if typ.IndirectElem() { 97 . . slotElem = *((*unsafe.Pointer)(slotElem)) 98 . . } 99 . . return slotElem
runtime.mapaccess2
/usr/lib/go/src/internal/runtime/maps/runtime_swiss.go
Total: 20ms 50ms (flat, cum) 0.13% 124 . . if asan.Enabled && m != nil { 125 . . asan.Read(key, typ.Key.Size_) 126 . . } 127 . . 128 . . if m == nil || m.Used() == 0 { 129 . 20ms if err := mapKeyError(typ, key); err != nil { return mapKeyError2(t.Key, p) map.go:828 130 . . panic(err) // see issue 23734 131 . . } 132 . . return unsafe.Pointer(&zeroVal[0]), false 133 . . } 134 . . 135 . . if m.writing != 0 { 136 . . fatal("concurrent map read and map write") 137 . . } 138 . . 139 . . hash := typ.Hasher(key, m.seed) 140 . . 141 . . if m.dirLen == 0 { 142 10ms 20ms _, elem, ok := m.getWithKeySmall(typ, hash, key) 143 . . if !ok { 144 . . return unsafe.Pointer(&zeroVal[0]), false 145 . . } 146 . . return elem, true 147 . . } 148 . . 149 . . // Select table. 150 . . idx := m.directoryIndex(hash) 151 . . t := m.directoryAt(idx) 152 . . 153 . . // Probe table. 154 10ms 10ms seq := makeProbeSeq(h1(hash), t.groups.lengthMask) 155 . . for ; ; seq = seq.next() { 156 . . g := t.groups.group(typ, seq.offset) 157 . . 158 . . match := g.ctrls().matchH2(h2(hash)) 159 . .
runtime.mapassign
/usr/lib/go/src/internal/runtime/maps/runtime_swiss.go
Total: 40ms 310ms (flat, cum) 0.83% 199 . . msan.Read(key, typ.Key.Size_) 200 . . } 201 . . if asan.Enabled { 202 . . asan.Read(key, typ.Key.Size_) 203 . . } 204 10ms 10ms if m.writing != 0 { 205 . . fatal("concurrent map writes") 206 . . } 207 . . 208 20ms 100ms hash := typ.Hasher(key, m.seed) 209 . . 210 . . // Set writing after calling Hasher, since Hasher may panic, in which 211 . . // case we have not actually done a write. 212 . . m.writing ^= 1 // toggle, see comment on writing 213 . . 214 . . if m.dirPtr == nil { 215 . 120ms m.growToSmall(typ) 216 . . } 217 . . 218 . . if m.dirLen == 0 { 219 . . if m.used < abi.SwissMapGroupSlots { 220 10ms 80ms elem := m.putSlotSmall(typ, hash, key) 221 . . 222 . . if m.writing == 0 { 223 . . fatal("concurrent map writes") 224 . . } 225 . . m.writing ^= 1
runtime.mapassign
/usr/lib/go/src/internal/runtime/maps/runtime_swiss.go
Total: 30ms 30ms (flat, cum) 0.08% 245 . . // necessary. 246 . . var firstDeletedGroup groupReference 247 . . var firstDeletedSlot uintptr 248 . . 249 . . for ; ; seq = seq.next() { 250 30ms 30ms g := t.groups.group(typ, seq.offset) offset := uintptr(i) * typ.GroupSize group.go:325 251 . . match := g.ctrls().matchH2(h2(hash)) 252 . . 253 . . // Look for an existing slot containing this key. 254 . . for match != 0 { 255 . . i := match.first()
runtime.mapassign
/usr/lib/go/src/internal/runtime/maps/runtime_swiss.go
Total: 0 10ms (flat, cum) 0.027% 302 . . if typ.IndirectKey() { 303 . . kmem := newobject(typ.Key) 304 . . *(*unsafe.Pointer)(slotKey) = kmem 305 . . slotKey = kmem 306 . . } 307 . 10ms typedmemmove(typ.Key, slotKey, key) 308 . . 309 . . slotElem = unsafe.Pointer(uintptr(slotKeyOrig) + typ.ElemOff) 310 . . if typ.IndirectElem() { 311 . . emem := newobject(typ.Elem) 312 . . *(*unsafe.Pointer)(slotElem) = emem
runtime.(*gcControllerState).update
/usr/lib/go/src/runtime/mgcpacer.go
Total: 70ms 70ms (flat, cum) 0.19% 892 . . } 893 . . } 894 . . 895 . . func (c *gcControllerState) update(dHeapLive, dHeapScan int64) { 896 . . if dHeapLive != 0 { 897 10ms 10ms trace := traceAcquire() if !traceEnabled() { traceruntime.go:188 898 60ms 60ms live := gcController.heapLive.Add(dHeapLive) return Xadd64(&u.value, delta) types.go:344 899 . . if trace.ok() { 900 . . // gcController.heapLive changed. 901 . . trace.HeapAlloc(live) 902 . . traceRelease(trace) 903 . . }
runtime.(*gcControllerState).addScannableStack
/usr/lib/go/src/runtime/mgcpacer.go
Total: 30ms 30ms (flat, cum) 0.08% 919 . . c.maxStackScan.Add(amount) 920 . . return 921 . . } 922 . . pp.maxStackScanDelta += amount 923 . . if pp.maxStackScanDelta >= maxStackScanSlack || pp.maxStackScanDelta <= -maxStackScanSlack { 924 30ms 30ms c.maxStackScan.Add(pp.maxStackScanDelta) return Xadd64(&u.value, delta) types.go:344 ⋮ 925 . . pp.maxStackScanDelta = 0 926 . . } 927 . . } 928 . . 929 . . func (c *gcControllerState) addGlobals(amount int64) {
runtime.(*gcControllerState).heapGoalInternal
/usr/lib/go/src/runtime/mgcpacer.go
Total: 10ms 30ms (flat, cum) 0.08% 938 . . 939 . . // heapGoalInternal is the implementation of heapGoal which returns additional 940 . . // information that is necessary for computing the trigger. 941 . . // 942 . . // The returned minTrigger is always <= goal. 943 10ms 10ms func (c *gcControllerState) heapGoalInternal() (goal, minTrigger uint64) { 944 . . // Start with the goal calculated for gcPercent. 945 . . goal = c.gcPercentHeapGoal.Load() 946 . . 947 . . // Check if the memory-limit-based goal is smaller, and if so, pick that. 948 . 20ms if newGoal := c.memoryLimitHeapGoal(); newGoal < goal { 949 . . goal = newGoal 950 . . } else { 951 . . // We're not limited by the memory limit goal, so perform a series of 952 . . // adjustments that might move the goal forward in a variety of circumstances. 953 . .
runtime.(*gcControllerState).memoryLimitHeapGoal
/usr/lib/go/src/runtime/mgcpacer.go
Total: 10ms 10ms (flat, cum) 0.027% 988 . . // memoryLimitHeapGoal returns a heap goal derived from memoryLimit. 989 . . func (c *gcControllerState) memoryLimitHeapGoal() uint64 { 990 . . // Start by pulling out some values we'll need. Be careful about overflow. 991 . . var heapFree, heapAlloc, mappedReady uint64 992 . . for { 993 10ms 10ms heapFree = c.heapFree.load() // Free and unscavenged memory. 994 . . heapAlloc = c.totalAlloc.Load() - c.totalFree.Load() // Heap object bytes in use. 995 . . mappedReady = c.mappedReady.Load() // Total unreleased mapped memory. 996 . . if heapFree+heapAlloc <= mappedReady { 997 . . break 998 . . }
runtime.(*gcControllerState).memoryLimitHeapGoal
/usr/lib/go/src/runtime/mgcpacer.go
Total: 10ms 10ms (flat, cum) 0.027% 1092 . . goal = headroom 1093 . . } else { 1094 . . goal = goal - headroom 1095 . . } 1096 . . // Don't let us go below the live heap. A heap goal below the live heap doesn't make sense. 1097 10ms 10ms if goal < c.heapMarked { 1098 . . goal = c.heapMarked 1099 . . } 1100 . . return goal 1101 . . } 1102 . .
runtime.(*gcControllerState).trigger
/usr/lib/go/src/runtime/mgcpacer.go
Total: 20ms 50ms (flat, cum) 0.13% 1126 . . // 1127 . . // The returned value may be compared against heapLive to determine whether 1128 . . // the GC should trigger. Thus, the GC trigger condition should be (but may 1129 . . // not be, in the case of small movements for efficiency) checked whenever 1130 . . // the heap goal may change. 1131 10ms 10ms func (c *gcControllerState) trigger() (uint64, uint64) { 1132 10ms 40ms goal, minTrigger := c.heapGoalInternal() 1133 . . 1134 . . // Invariant: the trigger must always be less than the heap goal. 1135 . . // 1136 . . // Note that the memory limit sets a hard maximum on our heap goal, 1137 . . // but the live heap may grow beyond it.
runtime.(*gcControllerState).trigger
/usr/lib/go/src/runtime/mgcpacer.go
Total: 10ms 10ms (flat, cum) 0.027% 1185 . . if runway > goal { 1186 . . trigger = minTrigger 1187 . . } else { 1188 . . trigger = goal - runway 1189 . . } 1190 10ms 10ms trigger = max(trigger, minTrigger) 1191 . . trigger = min(trigger, maxTrigger) 1192 . . if trigger > goal { 1193 . . print("trigger=", trigger, " heapGoal=", goal, "\n") 1194 . . print("minTrigger=", minTrigger, " maxTrigger=", maxTrigger, "\n") 1195 . . throw("produced a trigger greater than the heap goal")
runtime.(*fixalloc).alloc
/usr/lib/go/src/runtime/mfixalloc.go
Total: 150ms 150ms (flat, cum) 0.4% 70 . . f.stat = stat 71 . . f.zero = true 72 . . } 73 . . 74 . . func (f *fixalloc) alloc() unsafe.Pointer { 75 10ms 10ms if f.size == 0 { 76 . . print("runtime: use of FixAlloc_Alloc before FixAlloc_Init\n") 77 . . throw("runtime: internal error") 78 . . } 79 . . 80 10ms 10ms if f.list != nil { 81 . . v := unsafe.Pointer(f.list) 82 130ms 130ms f.list = f.list.next 83 . . f.inuse += f.size 84 . . if f.zero { 85 . . memclrNoHeapPointers(v, f.size) 86 . . } 87 . . return v
runtime.(*fixalloc).free
/usr/lib/go/src/runtime/mfixalloc.go
Total: 10ms 10ms (flat, cum) 0.027% 100 . . f.inuse += f.size 101 . . return v 102 . . } 103 . . 104 . . func (f *fixalloc) free(p unsafe.Pointer) { 105 10ms 10ms f.inuse -= f.size 106 . . v := (*mlink)(p) 107 . . v.next = f.list 108 . . f.list = v 109 . . }
git.urbach.dev/cli/q/src/core.(*Function).AddInput
/home/user/q/src/core/Function.go
Total: 20ms 210ms (flat, cum) 0.56% 27 . . codegen.Function 28 . . } 29 . . 30 . . // AddInput adds an input parameter. 31 . . func (f *Function) AddInput(tokens token.List, source token.Source) { 32 20ms 210ms f.Input = append(f.Input, &ssa.Parameter{ 33 . . Tokens: tokens, 34 . . Source: source, 35 . . })
git.urbach.dev/cli/q/src/core.(*Function).AddOutput
/home/user/q/src/core/Function.go
Total: 10ms 50ms (flat, cum) 0.13% 37 . . 38 . . // AddOutput adds an output parameter. 39 . . func (f *Function) AddOutput(tokens token.List, source token.Source) { 40 10ms 50ms f.Output = append(f.Output, &ssa.Parameter{ 41 . . Tokens: tokens, 42 . . Source: source, 43 . . })
git.urbach.dev/cli/q/src/core.(*Function).AddSuffix
/home/user/q/src/core/Function.go
Total: 0 10ms (flat, cum) 0.027% 45 . . 46 . . // AddSuffix adds a suffix to the name and is used for generic functions. 47 . . func (f *Function) AddSuffix(suffix string) { 48 . 10ms f.name += suffix 49 . . f.FullName += suffix 50 . . } 51 . . 52 . . // IsExtern returns true if the function has no body. 53 . . func (f *Function) IsExtern() bool {
git.urbach.dev/cli/q/src/core.(*Function).Package
/home/user/q/src/core/Function.go
Total: 10ms 10ms (flat, cum) 0.027% 64 . . return f.name 65 . . } 66 . . 67 . . // Package returns the package name. 68 . . func (f *Function) Package() string { 69 10ms 10ms return f.pkg 70 . . } 71 . . 72 . . // String returns the unique name. 73 . . func (f *Function) String() string { 74 . . return f.FullName
git.urbach.dev/cli/q/src/core.(*Function).Variants
/home/user/q/src/core/Function.go
Total: 120ms 730ms (flat, cum) 1.94% 75 . . } 76 . . 77 . . // Variants returns all function overloads. 78 . . func (f *Function) Variants(yield func(*Function) bool) { 79 . . for { 80 120ms 730ms if !yield(f) { ⋮ if !yield(variant) { Environment.go:66 Input: make([]types.Type, len(f.Input)), parseTypes.go:16 ⋮ input.Name = input.Tokens[0].String(f.File.Bytes) parseTypes.go:21 ⋮ f.AddSuffix(suffix.String()) parseTypes.go:64 f.name += suffix Function.go:48 ⋮ for i, output := range f.Output { parseTypes.go:32 ⋮ typeTokens := output.Tokens parseTypes.go:33 ⋮ if f.Previous != nil || f.Next != nil { parseTypes.go:51 ⋮ if f.Err != nil { Compile.go:63 ⋮ f.Type = &types.Function{ parseTypes.go:15 ⋮ typ, err := core.TypeFromTokens(input.Tokens[1:], f.File, env) parseTypes.go:22 ⋮ input.Name = input.Tokens[0].String(f.File.Bytes) parseTypes.go:21 ⋮ for i, input := range f.Input { parseTypes.go:55 ⋮ Input: make([]types.Type, len(f.Input)), parseTypes.go:16 ⋮ Output: make([]types.Type, len(f.Output)), parseTypes.go:17 ⋮ 81 . . return 82 . . } 83 . . 84 . . f = f.Next 85 . .
runtime.roundupsize
/usr/lib/go/src/runtime/msize.go
Total: 150ms 150ms (flat, cum) 0.4% 13 . . 14 . . // Returns size of the memory block that mallocgc will allocate if you ask for the size, 15 . . // minus any inline space for metadata. 16 . . func roundupsize(size uintptr, noscan bool) (reqSize uintptr) { 17 . . reqSize = size 18 10ms 10ms if reqSize <= maxSmallSize-gc.MallocHeaderSize { 19 . . // Small object. 20 50ms 50ms if !noscan && reqSize > gc.MinSizeForMallocHeader { // !noscan && !heapBitsInSpan(reqSize) 21 . . reqSize += gc.MallocHeaderSize 22 . . } 23 . . // (reqSize - size) is either mallocHeaderSize or 0. We need to subtract mallocHeaderSize 24 . . // from the result if we have one, since mallocgc will add it back in. 25 10ms 10ms if reqSize <= gc.SmallSizeMax-8 { 26 80ms 80ms return uintptr(gc.SizeClassToSize[gc.SizeToSizeClass8[divRoundUp(reqSize, gc.SmallSizeDiv)]]) - (reqSize - size) 27 . . } 28 . . return uintptr(gc.SizeClassToSize[gc.SizeToSizeClass128[divRoundUp(reqSize-gc.SmallSizeMax, gc.LargeSizeDiv)]]) - (reqSize - size) 29 . . } 30 . . // Large object. Align reqSize up to the next page. Check for overflow. 31 . . reqSize += pageSize - 1
git.urbach.dev/cli/q/src/core.(*Environment).AddPackage
/home/user/q/src/core/Environment.go
Total: 10ms 140ms (flat, cum) 0.37% 21 . . NumFunctions int 22 . . } 23 . . 24 . . // AddPackage returns an existing package with the giving name or creates a new one. 25 . . func (env *Environment) AddPackage(name string, isExtern bool) *Package { 26 . 40ms pkg, exists := env.Packages[name] 27 . . 28 10ms 10ms if !exists { 29 . 20ms pkg = &Package{ 30 . . Name: name, 31 . 20ms Constants: make(map[string]*Constant), 32 . . Functions: make(map[string]*Function, 8), 33 . 20ms Structs: make(map[string]*types.Struct), 34 . . IsExtern: isExtern, 35 . . } 36 . . 37 . 30ms env.Packages[name] = pkg 38 . . } 39 . . 40 . . return pkg 41 . . } 42 . .
git.urbach.dev/cli/q/src/compiler.Compile.(*Environment).Functions.func5
/home/user/q/src/core/Environment.go
Total: 10ms 20ms (flat, cum) 0.053% 58 . . } 59 . . 60 . . // Functions returns an iterator over all functions. 61 . . func (env *Environment) Functions() iter.Seq[*Function] { 62 . . return func(yield func(*Function) bool) { 63 10ms 20ms for _, pkg := range env.Packages {
git.urbach.dev/cli/q/src/compiler.Compile.(*Environment).Functions.func6
/home/user/q/src/core/Environment.go
Total: 10ms 90ms (flat, cum) 0.24% 64 10ms 90ms for _, fn := range pkg.Functions {
git.urbach.dev/cli/q/src/compiler.Compile.(*Environment).Functions.func4
/home/user/q/src/core/Environment.go
Total: 120ms 740ms (flat, cum) 1.97% 65 120ms 740ms for variant := range fn.Variants { if !yield(f) { Function.go:80 if !yield(variant) { Environment.go:66 Input: make([]types.Type, len(f.Input)), parseTypes.go:16 ⋮ input.Name = input.Tokens[0].String(f.File.Bytes) parseTypes.go:21 ⋮ f.AddSuffix(suffix.String()) parseTypes.go:64 f.name += suffix Function.go:48 ⋮ for i, output := range f.Output { parseTypes.go:32 ⋮ typeTokens := output.Tokens parseTypes.go:33 ⋮ if f.Previous != nil || f.Next != nil { parseTypes.go:51 ⋮ ⋮ if !yield(f) { Function.go:80 if !yield(variant) { Environment.go:66 if f.Err != nil { Compile.go:63 ⋮ f.Type = &types.Function{ parseTypes.go:15 ⋮ typ, err := core.TypeFromTokens(input.Tokens[1:], f.File, env) parseTypes.go:22 ⋮ ⋮ if !yield(f) { Function.go:80 if !yield(variant) { Environment.go:66 input.Name = input.Tokens[0].String(f.File.Bytes) parseTypes.go:21 ⋮ for i, input := range f.Input { parseTypes.go:55 ⋮ Input: make([]types.Type, len(f.Input)), parseTypes.go:16 ⋮ Output: make([]types.Type, len(f.Output)), parseTypes.go:17
git.urbach.dev/cli/q/src/compiler.Compile.parseTypes.Compile.(*Environment).Functions.func4-range5
/home/user/q/src/core/Environment.go
Total: 110ms 710ms (flat, cum) 1.89% 66 110ms 710ms if !yield(variant) { Input: make([]types.Type, len(f.Input)), parseTypes.go:16 ⋮ input.Name = input.Tokens[0].String(f.File.Bytes) parseTypes.go:21 ⋮ f.AddSuffix(suffix.String()) parseTypes.go:64 f.name += suffix Function.go:48 ⋮ for i, output := range f.Output { parseTypes.go:32 ⋮ typeTokens := output.Tokens parseTypes.go:33 ⋮ if f.Previous != nil || f.Next != nil { parseTypes.go:51 ⋮ if f.Err != nil { Compile.go:63 ⋮ f.Type = &types.Function{ parseTypes.go:15 ⋮ ⋮ typ, err := core.TypeFromTokens(input.Tokens[1:], f.File, env) parseTypes.go:22 ⋮ input.Name = input.Tokens[0].String(f.File.Bytes) parseTypes.go:21 ⋮ for i, input := range f.Input { parseTypes.go:55 ⋮ Input: make([]types.Type, len(f.Input)), parseTypes.go:16 ⋮ Output: make([]types.Type, len(f.Output)), parseTypes.go:17 67 . . return 68 . . } 69 . . } 70 . . } 71 . . }
git.urbach.dev/cli/q/src/compiler.Compile.(*Environment).LiveFunctions.func7
/home/user/q/src/core/Environment.go
Total: 0 320ms (flat, cum) 0.85% 74 . . 75 . . // LiveFunctions returns an iterator over functions that are alive. 76 . . func (env *Environment) LiveFunctions() iter.Seq[*Function] { 77 . . return func(yield func(*Function) bool) { 78 . . running := true 79 . 30ms traversed := make(map[*Function]bool, env.NumFunctions) 80 . . 81 . 290ms env.Init.EachDependency(traversed, func(f *Function) { 82 . . if !running { 83 . . return
git.urbach.dev/cli/q/src/compiler.Compile.(*Environment).LiveFunctions.func7.1
/home/user/q/src/core/Environment.go
Total: 0 260ms (flat, cum) 0.69% 84 . . } 85 . . 86 . 260ms running = yield(f) 87 . . }) 88 . . } 89 . . }
git.urbach.dev/cli/q/src/core.TypeByName.(*Environment).Structs.func1
/home/user/q/src/core/Environment.go
Total: 0 30ms (flat, cum) 0.08% 91 . . // Structs returns an iterator over all structs. 92 . . func (env *Environment) Structs() iter.Seq[*types.Struct] { 93 . . return func(yield func(*types.Struct) bool) { 94 . 30ms for _, pkg := range env.Packages {
git.urbach.dev/cli/q/src/compiler.Compile.(*Environment).Structs.func3
/home/user/q/src/core/Environment.go
Total: 0 10ms (flat, cum) 0.027% 95 . 10ms for _, structure := range pkg.Structs { 96 . . if !yield(structure) { 97 . . return 98 . . } 99 . . } 100 . . }
git.urbach.dev/cli/q/src/expression.(*Expression).AddChild
/home/user/q/src/expression/Expression.go
Total: 40ms 230ms (flat, cum) 0.61% 15 . . precedence int8 16 . . } 17 . . 18 . . // AddChild adds a child to the expression. 19 . . func (expr *Expression) AddChild(child *Expression) { 20 20ms 20ms if expr.Children == nil { 21 . 170ms expr.Children = make([]*Expression, 0, 2) 22 . . } 23 . . 24 20ms 40ms expr.Children = append(expr.Children, child) 25 . . child.Parent = expr 26 . . } 27 . .
git.urbach.dev/cli/q/src/expression.(*Expression).EachLeaf
/home/user/q/src/expression/Expression.go
Total: 30ms 90ms (flat, cum) 0.24% 28 . . // EachLeaf iterates through all leaves in the tree. 29 . . func (expr *Expression) EachLeaf(yield func(*Expression) bool) bool { 30 . . if expr.IsLeaf() { 31 10ms 40ms return yield(expr) 32 . . } 33 . . 34 20ms 20ms for _, child := range expr.Children { 35 . 30ms if !child.EachLeaf(yield) { 36 . . return false 37 . . } 38 . . } 39 . . 40 . . return true
git.urbach.dev/cli/q/src/expression.(*Expression).InsertAbove
/home/user/q/src/expression/Expression.go
Total: 20ms 110ms (flat, cum) 0.29% 55 . . // and attaches this expression as a child of the new parent. Effectively, it promotes the 56 . . // given tree above the current node. It assumes that the caller is the last child. 57 . . func (expr *Expression) InsertAbove(tree *Expression) { 58 . . if expr.Parent != nil { 59 . . expr.Parent.Children[len(expr.Parent.Children)-1] = tree 60 10ms 10ms tree.Parent = expr.Parent 61 . . } 62 . . 63 10ms 100ms tree.AddChild(expr) expr.Children = make([]*Expression, 0, 2) Expression.go:21 ⋮ if expr.Children == nil { Expression.go:20 ⋮ expr.Children = make([]*Expression, 0, 2) Expression.go:21 64 . . } 65 . . 66 . . // IsLeaf returns true if the expression has no children. 67 . . func (expr *Expression) IsLeaf() bool {
git.urbach.dev/cli/q/src/expression.(*Expression).LastChild
/home/user/q/src/expression/Expression.go
Total: 10ms 10ms (flat, cum) 0.027% 69 . . } 70 . . 71 . . // LastChild returns the last child. 72 . . func (expr *Expression) LastChild() *Expression { 73 10ms 10ms return expr.Children[len(expr.Children)-1] 74 . . } 75 . .
git.urbach.dev/cli/q/src/expression.(*Expression).Source.(*Expression).Leaves.func1
/home/user/q/src/expression/Expression.go
Total: 0 60ms (flat, cum) 0.16% 77 . . func (expr *Expression) Leaves() iter.Seq[*Expression] { 78 . . return func(yield func(*Expression) bool) { 79 . 60ms expr.EachLeaf(yield) 80 . . } 81 . . } 82 . . 83 . . // RemoveChild removes a child from the expression. 84 . . func (expr *Expression) RemoveChild(child *Expression) {
git.urbach.dev/cli/q/src/expression.(*Expression).Source
/home/user/q/src/expression/Expression.go
Total: 40ms 100ms (flat, cum) 0.27% 102 . . expr.Token.Reset() 103 . . expr.precedence = 0 104 . . } 105 . . 106 . . // Source returns the start and end positions in the source file. 107 10ms 10ms func (expr *Expression) Source() token.Source { 108 . . start := expr.Token.Position 109 . . end := expr.Token.End() 110 . . 111 30ms 90ms for leaf := range expr.Leaves() { expr.EachLeaf(yield) Expression.go:79 ⋮ ⋮ ⋮ 112 . . if leaf.Token.Position < start { 113 . . start = leaf.Token.Position
git.urbach.dev/cli/q/src/expression.(*Expression).Source-range1
/home/user/q/src/expression/Expression.go
Total: 20ms 20ms (flat, cum) 0.053% 115 . . end = leaf.Token.End() 116 . . } 117 20ms 20ms } 118 . . 119 . . return token.Source{StartPos: start, EndPos: end} 120 . . } 121 . . 122 . . // SourceString returns the string that was parsed in this expression.
git.urbach.dev/cli/q/src/expression.(*Expression).String
/home/user/q/src/expression/Expression.go
Total: 0 120ms (flat, cum) 0.32% 156 . . } 157 . . 158 . . // String generates a textual representation of the expression. 159 . . func (expr *Expression) String(source []byte) string { 160 . . builder := strings.Builder{} 161 . 120ms expr.write(&builder, source) 162 . . return builder.String() 163 . . }
git.urbach.dev/cli/q/src/ssa.(*Liveness).AddUser
/home/user/q/src/ssa/Liveness.go
Total: 80ms 430ms (flat, cum) 1.15% 5 . . users []Value 6 . . } 7 . . 8 . . // AddUser adds a new user of the value. 9 . . func (l *Liveness) AddUser(user Value) { 10 80ms 430ms l.users = append(l.users, user) 11 . . } 12 . . 13 . . // RemoveUser removes a user of the value.
git.urbach.dev/cli/q/src/ssa.(*Liveness).RemoveUser
/home/user/q/src/ssa/Liveness.go
Total: 10ms 10ms (flat, cum) 0.027% 14 . . func (l *Liveness) RemoveUser(user Value) { 15 . . for i, search := range l.users { 16 . . if search == user { 17 10ms 10ms l.users = append(l.users[:i], l.users[i+1:]...) 18 . . return 19 . . } 20 . . }
git.urbach.dev/cli/q/src/ssa.(*Liveness).Users
/home/user/q/src/ssa/Liveness.go
Total: 50ms 50ms (flat, cum) 0.13% 22 . . 23 . . // Users returns the users of the value. 24 . . func (l *Liveness) Users() []Value { 25 50ms 50ms return l.users 26 . . }
runtime.(*timers).wakeTime
/usr/lib/go/src/runtime/time.go
Total: 60ms 60ms (flat, cum) 0.16% 983 . . // Note that the order of these two loads matters: 984 . . // adjust updates minWhen to make it safe to clear minNextWhen. 985 . . // We read minWhen after reading minNextWhen so that 986 . . // if we see a cleared minNextWhen, we are guaranteed to see 987 . . // the updated minWhen. 988 60ms 60ms nextWhen := ts.minWhenModified.Load() return Loadint64(&i.value) types.go:74 ⋮ 989 . . when := ts.minWhenHeap.Load() 990 . . if when == 0 || (nextWhen != 0 && nextWhen < when) { 991 . . when = nextWhen 992 . . } 993 . . return when
runtime.(*timers).check
/usr/lib/go/src/runtime/time.go
Total: 120ms 120ms (flat, cum) 0.32% 1001 . . // If the time when the next timer should run is not 0, 1002 . . // it is always larger than the returned time. 1003 . . // We pass now in and out to avoid extra calls of nanotime. 1004 . . // 1005 . . //go:yeswritebarrierrec 1006 30ms 30ms func (ts *timers) check(now int64, bubble *synctestBubble) (rnow, pollUntil int64, ran bool) { 1007 . . ts.trace("check") 1008 . . // If it's not yet time for the first timer, or the first adjusted 1009 . . // timer, then there is nothing to do. 1010 50ms 50ms next := ts.wakeTime() nextWhen := ts.minWhenModified.Load() time.go:988 return Loadint64(&i.value) types.go:74 1011 10ms 10ms if next == 0 { 1012 . . // No timers to run or adjust. 1013 . . return now, 0, false 1014 . . } 1015 . . 1016 . . if now == 0 { 1017 30ms 30ms now = nanotime() return nanotime1() time_nofake.go:33 1018 . . } 1019 . . 1020 . . // If this is the local P, and there are a lot of deleted timers, 1021 . . // clear them out. We only do this for the local P to reduce 1022 . . // lock contention on timersLock.
runtime.memequal
/usr/lib/go/src/internal/bytealg/equal_arm64.s
Total: 10ms 10ms (flat, cum) 0.027% 27 . . CMP R0, R1 28 . . BEQ equal 29 . . CMP $1, R2 30 . . // handle 1-byte special case for better performance 31 . . BEQ one 32 10ms 10ms CMP $16, R2 33 . . // handle specially if length < 16 34 . . BLO tail 35 . . BIC $0x3f, R2, R3 36 . . CBZ R3, chunk16 37 . . // work with 64-byte chunks
runtime.memequal
/usr/lib/go/src/internal/bytealg/equal_arm64.s
Total: 120ms 120ms (flat, cum) 0.32% 59 . . BIC $0xf, R2, R3 60 . . CBZ R3, tail 61 . . ADD R3, R0, R6 // end of chunks 62 . . chunk16_loop: 63 . . LDP.P 16(R0), (R4, R5) 64 20ms 20ms LDP.P 16(R1), (R7, R9) 65 . . EOR R4, R7 66 . . CBNZ R7, not_equal 67 . . EOR R5, R9 68 . . CBNZ R9, not_equal 69 10ms 10ms CMP R0, R6 70 . . BNE chunk16_loop 71 . . AND $0xf, R2, R2 72 . . CBZ R2, equal 73 . . tail: 74 . . // special compare of tail with length < 16 75 . . TBZ $3, R2, lt_8 76 . . MOVD (R0), R4 77 . . MOVD (R1), R5 78 10ms 10ms EOR R4, R5 79 . . CBNZ R5, not_equal 80 . . SUB $8, R2, R6 // offset of the last 8 bytes 81 . . MOVD (R0)(R6), R4 82 . . MOVD (R1)(R6), R5 83 . . EOR R4, R5 84 . . CBNZ R5, not_equal 85 . . B equal 86 . . PCALIGN $16 87 . . lt_8: 88 . . TBZ $2, R2, lt_4 89 . . MOVWU (R0), R4 90 10ms 10ms MOVWU (R1), R5 91 40ms 40ms EOR R4, R5 92 . . CBNZ R5, not_equal 93 . . SUB $4, R2, R6 // offset of the last 4 bytes 94 . . MOVWU (R0)(R6), R4 95 . . MOVWU (R1)(R6), R5 96 . . EOR R4, R5 97 . . CBNZ R5, not_equal 98 . . B equal 99 . . PCALIGN $16 100 . . lt_4: 101 . . TBZ $1, R2, lt_2 102 . . MOVHU.P 2(R0), R4 103 10ms 10ms MOVHU.P 2(R1), R5 104 . . CMP R4, R5 105 . . BNE not_equal 106 . . lt_2: 107 . . TBZ $0, R2, equal 108 . . one: 109 10ms 10ms MOVBU (R0), R4 110 . . MOVBU (R1), R5 111 . . CMP R4, R5 112 . . BNE not_equal 113 . . equal: 114 . . MOVD $1, R0 115 10ms 10ms RET 116 . . not_equal: 117 . . MOVB ZR, R0 118 . . RET
git.urbach.dev/cli/q/src/asm.(*Assembler).Append
/home/user/q/src/asm/Assembler.go
Total: 40ms 280ms (flat, cum) 0.75% 16 . . Libraries dll.List 17 . . } 18 . . 19 . . // Append adds another instruction. 20 . . func (a *Assembler) Append(instr Instruction) { 21 10ms 120ms if a.Skip(instr) { 22 . . return 23 . . } 24 . . 25 30ms 160ms a.Instructions = append(a.Instructions, instr) 26 . . } 27 . .
git.urbach.dev/cli/q/src/asm.(*Assembler).Last
/home/user/q/src/asm/Assembler.go
Total: 10ms 10ms (flat, cum) 0.027% 28 . . // Last returns the last instruction. 29 . . func (a *Assembler) Last() Instruction { 30 10ms 10ms return a.Instructions[len(a.Instructions)-1] 31 . . } 32 . . 33 . . // Compile compiles the instructions to machine code. 34 . . func (a *Assembler) Compile(build *config.Build) (code []byte, data []byte, libs dll.List) { 35 . . data, dataLabels := a.Data.Finalize()
git.urbach.dev/cli/q/src/asm.(*Assembler).SetData
/home/user/q/src/asm/Assembler.go
Total: 0 20ms (flat, cum) 0.053% 96 . . func (a *Assembler) SetData(label string, bytes []byte) { 97 . . if a.Data == nil { 98 . . a.Data = data.Data{} 99 . . } 100 . . 101 . 20ms a.Data.Insert(label, bytes) data[label] = raw Insert.go:5 102 . . } 103 . . 104 . . // SetLast sets the last instruction. 105 . . func (a *Assembler) SetLast(instr Instruction) {
git.urbach.dev/cli/q/src/asm.(*Assembler).Skip
/home/user/q/src/asm/Assembler.go
Total: 90ms 110ms (flat, cum) 0.29% 106 . . a.Instructions[len(a.Instructions)-1] = instr 107 . . } 108 . . 109 . . // Skip returns true if appending the instruction can be skipped. 110 10ms 10ms func (a *Assembler) Skip(instr Instruction) bool { 111 . . if len(a.Instructions) == 0 { 112 10ms 10ms return false 113 . . } 114 . . 115 . . // Call to os.exit + anything is skipped if it's not a label 116 20ms 20ms call, isCall := a.Last().(*Call) return a.Instructions[len(a.Instructions)-1] Assembler.go:30 ⋮ 117 . . 118 20ms 20ms if isCall && call.Label == "run.exit" { 119 . . switch instr.(type) { 120 . . case *Label: 121 . . default: 122 . . return true 123 . . } 124 . . } 125 . . 126 20ms 20ms switch instr := instr.(type) { 127 . . case *Label: 128 . . // Jump + Label can be replaced by just the Label if both addresses are equal 129 . . jump, isJump := a.Last().(*Jump) 130 . . 131 10ms 30ms if isJump && jump.Label == instr.Name { 132 . . a.SetLast(instr) 133 . . return true 134 . . } 135 . . 136 . . case *Return:
runtime.add
/usr/lib/go/src/runtime/stubs.go
Total: 60ms 60ms (flat, cum) 0.16% 20 . . // See go.dev/issue/67401. 21 . . // 22 . . //go:linkname add 23 . . //go:nosplit 24 . . func add(p unsafe.Pointer, x uintptr) unsafe.Pointer { 25 60ms 60ms return unsafe.Pointer(uintptr(p) + x) 26 . . } 27 . . 28 . . // getg returns the pointer to the current g. 29 . . // The compiler rewrites calls to this function into instructions 30 . . // that fetch the g directly (from TLS or from the dedicated register).
runtime.alignUp
/usr/lib/go/src/runtime/stubs.go
Total: 10ms 10ms (flat, cum) 0.027% 361 . . 362 . . // alignUp rounds n up to a multiple of a. a must be a power of 2. 363 . . // 364 . . //go:nosplit 365 . . func alignUp(n, a uintptr) uintptr { 366 10ms 10ms return (n + a - 1) &^ (a - 1) 367 . . } 368 . . 369 . . // alignDown rounds n down to a multiple of a. a must be a power of 2. 370 . . // 371 . . //go:nosplit
runtime.bool2int
/usr/lib/go/src/runtime/stubs.go
Total: 60ms 60ms (flat, cum) 0.16% 389 . . 390 . . // bool2int returns 0 if x is false or 1 if x is true. 391 . . func bool2int(x bool) int { 392 . . // Avoid branches. In the SSA compiler, this compiles to 393 . . // exactly what you would want it to. 394 60ms 60ms return int(*(*uint8)(unsafe.Pointer(&x))) 395 . . } 396 . . 397 . . // abort crashes the runtime in situations where even throw might not 398 . . // work. In general it should do something a debugger will recognize 399 . . // (e.g., an INT3 on x86). A crash in abort is recognized by the