Current vpkpx implementation: 1)Both c and assembly code: Dump of assembler code for function helper_vpkpx: 1267 { 0x0000000000195fe0 <+0>: 48 83 ec 38 sub $0x38,%rsp 1268 int i, j; 1269 ppc_avr_t result; 1270 #if defined(HOST_WORDS_BIGENDIAN) 1271 const ppc_avr_t *x[2] = { a, b }; 1272 #else 1273 const ppc_avr_t *x[2] = { b, a }; 0x0000000000195fe4 <+4>: b9 07 00 00 00 mov $0x7,%ecx 1267 { 0x0000000000195fe9 <+9>: 64 48 8b 04 25 28 00 00 00 mov %fs:0x28,%rax 0x0000000000195ff2 <+18>: 48 89 44 24 28 mov %rax,0x28(%rsp) 0x0000000000195ff7 <+23>: 31 c0 xor %eax,%eax 0x0000000000195ff9 <+25>: 4c 8d 4c 24 10 lea 0x10(%rsp),%r9 1268 int i, j; 1269 ppc_avr_t result; 1270 #if defined(HOST_WORDS_BIGENDIAN) 1271 const ppc_avr_t *x[2] = { a, b }; 1272 #else 1273 const ppc_avr_t *x[2] = { b, a }; 0x0000000000195ffe <+30>: 48 89 54 24 10 mov %rdx,0x10(%rsp) 0x0000000000196003 <+35>: 48 89 74 24 18 mov %rsi,0x18(%rsp) 0x0000000000196008 <+40>: 44 8d 51 fc lea -0x4(%rcx),%r10d 0x000000000019600c <+44>: 48 83 c6 0c add $0xc,%rsi 1278 uint32_t e = x[i]->u32[j]; 0x0000000000196010 <+48>: 8b 06 mov (%rsi),%eax 1279 1280 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 0x0000000000196012 <+50>: 4c 63 d9 movslq %ecx,%r11 0x0000000000196015 <+53>: 83 e9 01 sub $0x1,%ecx 0x0000000000196018 <+56>: 48 83 ee 04 sub $0x4,%rsi 0x000000000019601c <+60>: 89 c2 mov %eax,%edx 0x000000000019601e <+62>: c1 ea 09 shr $0x9,%edx 0x0000000000196021 <+65>: 41 89 d0 mov %edx,%r8d 0x0000000000196024 <+68>: 89 c2 mov %eax,%edx 0x0000000000196026 <+70>: c1 e8 03 shr $0x3,%eax 0x0000000000196029 <+73>: c1 ea 06 shr $0x6,%edx 0x000000000019602c <+76>: 66 41 81 e0 00 fc and $0xfc00,%r8w 0x0000000000196032 <+82>: 83 e0 1f and $0x1f,%eax 0x0000000000196035 <+85>: 66 81 e2 e0 03 and $0x3e0,%dx 0x000000000019603a <+90>: 44 09 c2 or %r8d,%edx 0x000000000019603d <+93>: 09 d0 or %edx,%eax 1277 VECTOR_FOR_INORDER_I(j, u32) { 0x000000000019603f <+95>: 41 39 ca cmp %ecx,%r10d 1279 1280 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 0x0000000000196042 <+98>: 66 42 89 04 5c mov %ax,(%rsp,%r11,2) 1277 VECTOR_FOR_INORDER_I(j, u32) { 0x0000000000196047 <+103>: 75 c7 jne 0x196010 1276 VECTOR_FOR_INORDER_I(i, u64) { 0x0000000000196049 <+105>: 41 83 fa ff cmp $0xffffffff,%r10d 0x000000000019604d <+109>: 44 89 d1 mov %r10d,%ecx 0x0000000000196050 <+112>: 74 0e je 0x196060 0x0000000000196052 <+114>: 49 8b 31 mov (%r9),%rsi 0x0000000000196055 <+117>: 49 83 e9 08 sub $0x8,%r9 0x0000000000196059 <+121>: eb ad jmp 0x196008 0x000000000019605b <+123>: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 1281 ((e >> 6) & 0x3e0) | 1282 ((e >> 3) & 0x1f)); 1283 // printf("%x\n",result.u16[4 * i + j]); 1284 } 1285 } 1286 // printf("%lx\n",result.u64[0]); 1287 // printf("%lx\n",result.u64[1]); 1288 *r = result; 0x0000000000196060 <+128>: 48 8b 04 24 mov (%rsp),%rax 0x0000000000196064 <+132>: 48 8b 54 24 08 mov 0x8(%rsp),%rdx 0x0000000000196069 <+137>: 48 89 07 mov %rax,(%rdi) 0x000000000019606c <+140>: 48 89 57 08 mov %rdx,0x8(%rdi) 1289 } 0x0000000000196070 <+144>: 48 8b 44 24 28 mov 0x28(%rsp),%rax 0x0000000000196075 <+149>: 64 48 33 04 25 28 00 00 00 xor %fs:0x28,%rax 0x000000000019607e <+158>: 75 05 jne 0x196085 0x0000000000196080 <+160>: 48 83 c4 38 add $0x38,%rsp 0x0000000000196084 <+164>: c3 retq 0x0000000000196085 <+165>: e8 2e 66 f0 ff callq 0x9c6b8 End of assembler dump. 2) Only assembly code: Dump of assembler code for function helper_vpkpx: 0x0000000000195fe0 <+0>: 48 83 ec 38 sub $0x38,%rsp 0x0000000000195fe4 <+4>: b9 07 00 00 00 mov $0x7,%ecx 0x0000000000195fe9 <+9>: 64 48 8b 04 25 28 00 00 00 mov %fs:0x28,%rax 0x0000000000195ff2 <+18>: 48 89 44 24 28 mov %rax,0x28(%rsp) 0x0000000000195ff7 <+23>: 31 c0 xor %eax,%eax 0x0000000000195ff9 <+25>: 4c 8d 4c 24 10 lea 0x10(%rsp),%r9 0x0000000000195ffe <+30>: 48 89 54 24 10 mov %rdx,0x10(%rsp) 0x0000000000196003 <+35>: 48 89 74 24 18 mov %rsi,0x18(%rsp) 0x0000000000196008 <+40>: 44 8d 51 fc lea -0x4(%rcx),%r10d 0x000000000019600c <+44>: 48 83 c6 0c add $0xc,%rsi 0x0000000000196010 <+48>: 8b 06 mov (%rsi),%eax 0x0000000000196012 <+50>: 4c 63 d9 movslq %ecx,%r11 0x0000000000196015 <+53>: 83 e9 01 sub $0x1,%ecx 0x0000000000196018 <+56>: 48 83 ee 04 sub $0x4,%rsi 0x000000000019601c <+60>: 89 c2 mov %eax,%edx 0x000000000019601e <+62>: c1 ea 09 shr $0x9,%edx 0x0000000000196021 <+65>: 41 89 d0 mov %edx,%r8d 0x0000000000196024 <+68>: 89 c2 mov %eax,%edx 0x0000000000196026 <+70>: c1 e8 03 shr $0x3,%eax 0x0000000000196029 <+73>: c1 ea 06 shr $0x6,%edx 0x000000000019602c <+76>: 66 41 81 e0 00 fc and $0xfc00,%r8w 0x0000000000196032 <+82>: 83 e0 1f and $0x1f,%eax 0x0000000000196035 <+85>: 66 81 e2 e0 03 and $0x3e0,%dx 0x000000000019603a <+90>: 44 09 c2 or %r8d,%edx 0x000000000019603d <+93>: 09 d0 or %edx,%eax 0x000000000019603f <+95>: 41 39 ca cmp %ecx,%r10d 0x0000000000196042 <+98>: 66 42 89 04 5c mov %ax,(%rsp,%r11,2) 0x0000000000196047 <+103>: 75 c7 jne 0x196010 0x0000000000196049 <+105>: 41 83 fa ff cmp $0xffffffff,%r10d 0x000000000019604d <+109>: 44 89 d1 mov %r10d,%ecx 0x0000000000196050 <+112>: 74 0e je 0x196060 0x0000000000196052 <+114>: 49 8b 31 mov (%r9),%rsi 0x0000000000196055 <+117>: 49 83 e9 08 sub $0x8,%r9 0x0000000000196059 <+121>: eb ad jmp 0x196008 0x000000000019605b <+123>: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 0x0000000000196060 <+128>: 48 8b 04 24 mov (%rsp),%rax 0x0000000000196064 <+132>: 48 8b 54 24 08 mov 0x8(%rsp),%rdx 0x0000000000196069 <+137>: 48 89 07 mov %rax,(%rdi) 0x000000000019606c <+140>: 48 89 57 08 mov %rdx,0x8(%rdi) 0x0000000000196070 <+144>: 48 8b 44 24 28 mov 0x28(%rsp),%rax 0x0000000000196075 <+149>: 64 48 33 04 25 28 00 00 00 xor %fs:0x28,%rax 0x000000000019607e <+158>: 75 05 jne 0x196085 0x0000000000196080 <+160>: 48 83 c4 38 add $0x38,%rsp 0x0000000000196084 <+164>: c3 retq 0x0000000000196085 <+165>: e8 2e 66 f0 ff callq 0x9c6b8 End of assembler dump. Implementation you suggested: 1)Both c and assembly code: Dump of assembler code for function helper_vpkpx: 1313 { 0x0000000000195fe0 <+0>: 55 push %rbp 0x0000000000195fe1 <+1>: 53 push %rbx 1314 uint64_t rh = pkpx_2(a->u64[1], a->u64[0]); 0x0000000000195fe2 <+2>: 48 8b 46 08 mov 0x8(%rsi),%rax 0x0000000000195fe6 <+6>: 48 8b 0e mov (%rsi),%rcx 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x0000000000195fe9 <+9>: 49 89 c1 mov %rax,%r9 1296 r = ((a >> (shr + 9)) & 0xfc00); 0x0000000000195fec <+12>: 48 89 c6 mov %rax,%rsi 0x0000000000195fef <+15>: 49 89 c3 mov %rax,%r11 0x0000000000195ff2 <+18>: 48 c1 ee 29 shr $0x29,%rsi 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x0000000000195ff6 <+22>: 49 c1 e9 26 shr $0x26,%r9 1296 r = ((a >> (shr + 9)) & 0xfc00); 0x0000000000195ffa <+26>: 49 c1 eb 09 shr $0x9,%r11 0x0000000000195ffe <+30>: 81 e6 00 fc 00 00 and $0xfc00,%esi 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x0000000000196004 <+36>: 41 81 e1 e0 03 00 00 and $0x3e0,%r9d 0x000000000019600b <+43>: 49 89 ca mov %rcx,%r10 1296 r = ((a >> (shr + 9)) & 0xfc00); 0x000000000019600e <+46>: 49 89 f0 mov %rsi,%r8 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x0000000000196011 <+49>: 4c 89 ce mov %r9,%rsi 1298 r |= ((a >> (shr + 3)) & 0x1f); 0x0000000000196014 <+52>: 49 89 c1 mov %rax,%r9 0x0000000000196017 <+55>: 49 c1 e9 23 shr $0x23,%r9 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x000000000019601b <+59>: 4c 09 c6 or %r8,%rsi 0x000000000019601e <+62>: 49 c1 ea 26 shr $0x26,%r10 1298 r |= ((a >> (shr + 3)) & 0x1f); 0x0000000000196022 <+66>: 41 83 e1 1f and $0x1f,%r9d 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x0000000000196026 <+70>: 41 81 e2 e0 03 00 00 and $0x3e0,%r10d 1298 r |= ((a >> (shr + 3)) & 0x1f); 0x000000000019602d <+77>: 49 09 f1 or %rsi,%r9 1296 r = ((a >> (shr + 9)) & 0xfc00); 0x0000000000196030 <+80>: 4c 89 de mov %r11,%rsi 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x0000000000196033 <+83>: 49 89 c3 mov %rax,%r11 0x0000000000196036 <+86>: 49 c1 eb 06 shr $0x6,%r11 1296 r = ((a >> (shr + 9)) & 0xfc00); 0x000000000019603a <+90>: 81 e6 00 fc 00 00 and $0xfc00,%esi 1298 r |= ((a >> (shr + 3)) & 0x1f); 0x0000000000196040 <+96>: 48 c1 e8 03 shr $0x3,%rax 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x0000000000196044 <+100>: 41 81 e3 e0 03 00 00 and $0x3e0,%r11d 1298 r |= ((a >> (shr + 3)) & 0x1f); 0x000000000019604b <+107>: 83 e0 1f and $0x1f,%eax 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x000000000019604e <+110>: 49 09 f3 or %rsi,%r11 1298 r |= ((a >> (shr + 3)) & 0x1f); 0x0000000000196051 <+113>: 49 09 c3 or %rax,%r11 1296 r = ((a >> (shr + 9)) & 0xfc00); 0x0000000000196054 <+116>: 48 89 c8 mov %rcx,%rax 0x0000000000196057 <+119>: 48 c1 e8 29 shr $0x29,%rax 0x000000000019605b <+123>: 25 00 fc 00 00 and $0xfc00,%eax 0x0000000000196060 <+128>: 48 89 c6 mov %rax,%rsi 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x0000000000196063 <+131>: 4c 89 d0 mov %r10,%rax 1298 r |= ((a >> (shr + 3)) & 0x1f); 0x0000000000196066 <+134>: 49 89 ca mov %rcx,%r10 0x0000000000196069 <+137>: 49 c1 ea 23 shr $0x23,%r10 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x000000000019606d <+141>: 48 09 f0 or %rsi,%rax 1298 r |= ((a >> (shr + 3)) & 0x1f); 0x0000000000196070 <+144>: 41 83 e2 1f and $0x1f,%r10d 0x0000000000196074 <+148>: 49 09 c2 or %rax,%r10 1315 uint64_t rl = pkpx_2(b->u64[1], b->u64[0]); 0x0000000000196077 <+151>: 48 8b 02 mov (%rdx),%rax 0x000000000019607a <+154>: 48 8b 52 08 mov 0x8(%rdx),%rdx 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x000000000019607e <+158>: 49 89 d0 mov %rdx,%r8 1296 r = ((a >> (shr + 9)) & 0xfc00); 0x0000000000196081 <+161>: 48 89 d6 mov %rdx,%rsi 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x0000000000196084 <+164>: 49 c1 e8 26 shr $0x26,%r8 1296 r = ((a >> (shr + 9)) & 0xfc00); 0x0000000000196088 <+168>: 48 c1 ee 29 shr $0x29,%rsi 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x000000000019608c <+172>: 41 81 e0 e0 03 00 00 and $0x3e0,%r8d 1296 r = ((a >> (shr + 9)) & 0xfc00); 0x0000000000196093 <+179>: 48 89 f3 mov %rsi,%rbx 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x0000000000196096 <+182>: 4c 89 c6 mov %r8,%rsi 1298 r |= ((a >> (shr + 3)) & 0x1f); 0x0000000000196099 <+185>: 49 89 d0 mov %rdx,%r8 1296 r = ((a >> (shr + 9)) & 0xfc00); 0x000000000019609c <+188>: 81 e3 00 fc 00 00 and $0xfc00,%ebx 1298 r |= ((a >> (shr + 3)) & 0x1f); 0x00000000001960a2 <+194>: 49 c1 e8 23 shr $0x23,%r8 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x00000000001960a6 <+198>: 48 09 de or %rbx,%rsi 1298 r |= ((a >> (shr + 3)) & 0x1f); 0x00000000001960a9 <+201>: 41 83 e0 1f and $0x1f,%r8d 0x00000000001960ad <+205>: 49 09 f0 or %rsi,%r8 1296 r = ((a >> (shr + 9)) & 0xfc00); 0x00000000001960b0 <+208>: 48 89 d6 mov %rdx,%rsi 0x00000000001960b3 <+211>: 48 c1 ee 09 shr $0x9,%rsi 1316 r->u64[1] = rh; 0x00000000001960b7 <+215>: 49 c1 e1 30 shl $0x30,%r9 0x00000000001960bb <+219>: 49 c1 e3 20 shl $0x20,%r11 1296 r = ((a >> (shr + 9)) & 0xfc00); 0x00000000001960bf <+223>: 48 89 f3 mov %rsi,%rbx 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x00000000001960c2 <+226>: 48 89 d6 mov %rdx,%rsi 1298 r |= ((a >> (shr + 3)) & 0x1f); 0x00000000001960c5 <+229>: 48 c1 ea 03 shr $0x3,%rdx 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x00000000001960c9 <+233>: 48 c1 ee 06 shr $0x6,%rsi 1296 r = ((a >> (shr + 9)) & 0xfc00); 0x00000000001960cd <+237>: 81 e3 00 fc 00 00 and $0xfc00,%ebx 1298 r |= ((a >> (shr + 3)) & 0x1f); 0x00000000001960d3 <+243>: 83 e2 1f and $0x1f,%edx 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x00000000001960d6 <+246>: 81 e6 e0 03 00 00 and $0x3e0,%esi 1316 r->u64[1] = rh; 0x00000000001960dc <+252>: 49 c1 e2 10 shl $0x10,%r10 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x00000000001960e0 <+256>: 48 09 de or %rbx,%rsi 1296 r = ((a >> (shr + 9)) & 0xfc00); 0x00000000001960e3 <+259>: 48 89 c3 mov %rax,%rbx 1298 r |= ((a >> (shr + 3)) & 0x1f); 0x00000000001960e6 <+262>: 48 09 f2 or %rsi,%rdx 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x00000000001960e9 <+265>: 48 89 c6 mov %rax,%rsi 1296 r = ((a >> (shr + 9)) & 0xfc00); 0x00000000001960ec <+268>: 48 c1 eb 29 shr $0x29,%rbx 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x00000000001960f0 <+272>: 48 c1 ee 26 shr $0x26,%rsi 1296 r = ((a >> (shr + 9)) & 0xfc00); 0x00000000001960f4 <+276>: 81 e3 00 fc 00 00 and $0xfc00,%ebx 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x00000000001960fa <+282>: 81 e6 e0 03 00 00 and $0x3e0,%esi 1296 r = ((a >> (shr + 9)) & 0xfc00); 0x0000000000196100 <+288>: 48 89 dd mov %rbx,%rbp 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x0000000000196103 <+291>: 48 89 f3 mov %rsi,%rbx 1298 r |= ((a >> (shr + 3)) & 0x1f); 0x0000000000196106 <+294>: 48 89 c6 mov %rax,%rsi 0x0000000000196109 <+297>: 48 c1 ee 23 shr $0x23,%rsi 1297 r |= ((a >> (shr + 6)) & 0x3e0); 0x000000000019610d <+301>: 48 09 eb or %rbp,%rbx 1298 r |= ((a >> (shr + 3)) & 0x1f); 0x0000000000196110 <+304>: 83 e6 1f and $0x1f,%esi 0x0000000000196113 <+307>: 48 09 de or %rbx,%rsi 1316 r->u64[1] = rh; 0x0000000000196116 <+310>: 48 89 cb mov %rcx,%rbx 0x0000000000196119 <+313>: 48 c1 eb 09 shr $0x9,%rbx 0x000000000019611d <+317>: 81 e3 00 fc 00 00 and $0xfc00,%ebx 0x0000000000196123 <+323>: 48 89 dd mov %rbx,%rbp 0x0000000000196126 <+326>: 48 89 cb mov %rcx,%rbx 0x0000000000196129 <+329>: 48 c1 e9 03 shr $0x3,%rcx 0x000000000019612d <+333>: 48 c1 eb 06 shr $0x6,%rbx 0x0000000000196131 <+337>: 83 e1 1f and $0x1f,%ecx 0x0000000000196134 <+340>: 81 e3 e0 03 00 00 and $0x3e0,%ebx 0x000000000019613a <+346>: 48 09 eb or %rbp,%rbx 0x000000000019613d <+349>: 48 09 d9 or %rbx,%rcx 0x0000000000196140 <+352>: 4c 09 c9 or %r9,%rcx 0x0000000000196143 <+355>: 4c 09 d9 or %r11,%rcx 0x0000000000196146 <+358>: 4c 09 d1 or %r10,%rcx 0x0000000000196149 <+361>: 48 89 4f 08 mov %rcx,0x8(%rdi) 1317 r->u64[0] = rl; 0x000000000019614d <+365>: 48 89 c1 mov %rax,%rcx 0x0000000000196150 <+368>: 48 c1 e9 09 shr $0x9,%rcx 0x0000000000196154 <+372>: 81 e1 00 fc 00 00 and $0xfc00,%ecx 0x000000000019615a <+378>: 49 89 c9 mov %rcx,%r9 0x000000000019615d <+381>: 48 89 c1 mov %rax,%rcx 0x0000000000196160 <+384>: 48 c1 e9 06 shr $0x6,%rcx 0x0000000000196164 <+388>: 48 c1 e8 03 shr $0x3,%rax 0x0000000000196168 <+392>: 49 c1 e0 30 shl $0x30,%r8 0x000000000019616c <+396>: 81 e1 e0 03 00 00 and $0x3e0,%ecx 0x0000000000196172 <+402>: 83 e0 1f and $0x1f,%eax 0x0000000000196175 <+405>: 48 c1 e2 20 shl $0x20,%rdx 0x0000000000196179 <+409>: 4c 09 c9 or %r9,%rcx 0x000000000019617c <+412>: 48 09 c8 or %rcx,%rax 0x000000000019617f <+415>: 48 89 f1 mov %rsi,%rcx 0x0000000000196182 <+418>: 4c 09 c0 or %r8,%rax 0x0000000000196185 <+421>: 48 c1 e1 10 shl $0x10,%rcx 0x0000000000196189 <+425>: 48 09 d0 or %rdx,%rax 0x000000000019618c <+428>: 48 09 c8 or %rcx,%rax 1318 } 0x000000000019618f <+431>: 5b pop %rbx 1317 r->u64[0] = rl; 0x0000000000196190 <+432>: 48 89 07 mov %rax,(%rdi) 1318 } 0x0000000000196193 <+435>: 5d pop %rbp 0x0000000000196194 <+436>: c3 retq End of assembler dump. 2) Only assembly code: Dump of assembler code for function helper_vpkpx: 0x0000000000195fe0 <+0>: 55 push %rbp 0x0000000000195fe1 <+1>: 53 push %rbx 0x0000000000195fe2 <+2>: 48 8b 46 08 mov 0x8(%rsi),%rax 0x0000000000195fe6 <+6>: 48 8b 0e mov (%rsi),%rcx 0x0000000000195fe9 <+9>: 49 89 c1 mov %rax,%r9 0x0000000000195fec <+12>: 48 89 c6 mov %rax,%rsi 0x0000000000195fef <+15>: 49 89 c3 mov %rax,%r11 0x0000000000195ff2 <+18>: 48 c1 ee 29 shr $0x29,%rsi 0x0000000000195ff6 <+22>: 49 c1 e9 26 shr $0x26,%r9 0x0000000000195ffa <+26>: 49 c1 eb 09 shr $0x9,%r11 0x0000000000195ffe <+30>: 81 e6 00 fc 00 00 and $0xfc00,%esi 0x0000000000196004 <+36>: 41 81 e1 e0 03 00 00 and $0x3e0,%r9d 0x000000000019600b <+43>: 49 89 ca mov %rcx,%r10 0x000000000019600e <+46>: 49 89 f0 mov %rsi,%r8 0x0000000000196011 <+49>: 4c 89 ce mov %r9,%rsi 0x0000000000196014 <+52>: 49 89 c1 mov %rax,%r9 0x0000000000196017 <+55>: 49 c1 e9 23 shr $0x23,%r9 0x000000000019601b <+59>: 4c 09 c6 or %r8,%rsi 0x000000000019601e <+62>: 49 c1 ea 26 shr $0x26,%r10 0x0000000000196022 <+66>: 41 83 e1 1f and $0x1f,%r9d 0x0000000000196026 <+70>: 41 81 e2 e0 03 00 00 and $0x3e0,%r10d 0x000000000019602d <+77>: 49 09 f1 or %rsi,%r9 0x0000000000196030 <+80>: 4c 89 de mov %r11,%rsi 0x0000000000196033 <+83>: 49 89 c3 mov %rax,%r11 0x0000000000196036 <+86>: 49 c1 eb 06 shr $0x6,%r11 0x000000000019603a <+90>: 81 e6 00 fc 00 00 and $0xfc00,%esi 0x0000000000196040 <+96>: 48 c1 e8 03 shr $0x3,%rax 0x0000000000196044 <+100>: 41 81 e3 e0 03 00 00 and $0x3e0,%r11d 0x000000000019604b <+107>: 83 e0 1f and $0x1f,%eax 0x000000000019604e <+110>: 49 09 f3 or %rsi,%r11 0x0000000000196051 <+113>: 49 09 c3 or %rax,%r11 0x0000000000196054 <+116>: 48 89 c8 mov %rcx,%rax 0x0000000000196057 <+119>: 48 c1 e8 29 shr $0x29,%rax 0x000000000019605b <+123>: 25 00 fc 00 00 and $0xfc00,%eax 0x0000000000196060 <+128>: 48 89 c6 mov %rax,%rsi 0x0000000000196063 <+131>: 4c 89 d0 mov %r10,%rax 0x0000000000196066 <+134>: 49 89 ca mov %rcx,%r10 0x0000000000196069 <+137>: 49 c1 ea 23 shr $0x23,%r10 0x000000000019606d <+141>: 48 09 f0 or %rsi,%rax 0x0000000000196070 <+144>: 41 83 e2 1f and $0x1f,%r10d 0x0000000000196074 <+148>: 49 09 c2 or %rax,%r10 0x0000000000196077 <+151>: 48 8b 02 mov (%rdx),%rax 0x000000000019607a <+154>: 48 8b 52 08 mov 0x8(%rdx),%rdx 0x000000000019607e <+158>: 49 89 d0 mov %rdx,%r8 0x0000000000196081 <+161>: 48 89 d6 mov %rdx,%rsi 0x0000000000196084 <+164>: 49 c1 e8 26 shr $0x26,%r8 0x0000000000196088 <+168>: 48 c1 ee 29 shr $0x29,%rsi 0x000000000019608c <+172>: 41 81 e0 e0 03 00 00 and $0x3e0,%r8d 0x0000000000196093 <+179>: 48 89 f3 mov %rsi,%rbx 0x0000000000196096 <+182>: 4c 89 c6 mov %r8,%rsi 0x0000000000196099 <+185>: 49 89 d0 mov %rdx,%r8 0x000000000019609c <+188>: 81 e3 00 fc 00 00 and $0xfc00,%ebx 0x00000000001960a2 <+194>: 49 c1 e8 23 shr $0x23,%r8 0x00000000001960a6 <+198>: 48 09 de or %rbx,%rsi 0x00000000001960a9 <+201>: 41 83 e0 1f and $0x1f,%r8d 0x00000000001960ad <+205>: 49 09 f0 or %rsi,%r8 0x00000000001960b0 <+208>: 48 89 d6 mov %rdx,%rsi 0x00000000001960b3 <+211>: 48 c1 ee 09 shr $0x9,%rsi 0x00000000001960b7 <+215>: 49 c1 e1 30 shl $0x30,%r9 0x00000000001960bb <+219>: 49 c1 e3 20 shl $0x20,%r11 0x00000000001960bf <+223>: 48 89 f3 mov %rsi,%rbx 0x00000000001960c2 <+226>: 48 89 d6 mov %rdx,%rsi 0x00000000001960c5 <+229>: 48 c1 ea 03 shr $0x3,%rdx 0x00000000001960c9 <+233>: 48 c1 ee 06 shr $0x6,%rsi 0x00000000001960cd <+237>: 81 e3 00 fc 00 00 and $0xfc00,%ebx 0x00000000001960d3 <+243>: 83 e2 1f and $0x1f,%edx 0x00000000001960d6 <+246>: 81 e6 e0 03 00 00 and $0x3e0,%esi 0x00000000001960dc <+252>: 49 c1 e2 10 shl $0x10,%r10 0x00000000001960e0 <+256>: 48 09 de or %rbx,%rsi 0x00000000001960e3 <+259>: 48 89 c3 mov %rax,%rbx 0x00000000001960e6 <+262>: 48 09 f2 or %rsi,%rdx 0x00000000001960e9 <+265>: 48 89 c6 mov %rax,%rsi 0x00000000001960ec <+268>: 48 c1 eb 29 shr $0x29,%rbx 0x00000000001960f0 <+272>: 48 c1 ee 26 shr $0x26,%rsi 0x00000000001960f4 <+276>: 81 e3 00 fc 00 00 and $0xfc00,%ebx 0x00000000001960fa <+282>: 81 e6 e0 03 00 00 and $0x3e0,%esi 0x0000000000196100 <+288>: 48 89 dd mov %rbx,%rbp 0x0000000000196103 <+291>: 48 89 f3 mov %rsi,%rbx 0x0000000000196106 <+294>: 48 89 c6 mov %rax,%rsi 0x0000000000196109 <+297>: 48 c1 ee 23 shr $0x23,%rsi 0x000000000019610d <+301>: 48 09 eb or %rbp,%rbx 0x0000000000196110 <+304>: 83 e6 1f and $0x1f,%esi 0x0000000000196113 <+307>: 48 09 de or %rbx,%rsi 0x0000000000196116 <+310>: 48 89 cb mov %rcx,%rbx 0x0000000000196119 <+313>: 48 c1 eb 09 shr $0x9,%rbx 0x000000000019611d <+317>: 81 e3 00 fc 00 00 and $0xfc00,%ebx 0x0000000000196123 <+323>: 48 89 dd mov %rbx,%rbp 0x0000000000196126 <+326>: 48 89 cb mov %rcx,%rbx 0x0000000000196129 <+329>: 48 c1 e9 03 shr $0x3,%rcx 0x000000000019612d <+333>: 48 c1 eb 06 shr $0x6,%rbx 0x0000000000196131 <+337>: 83 e1 1f and $0x1f,%ecx 0x0000000000196134 <+340>: 81 e3 e0 03 00 00 and $0x3e0,%ebx 0x000000000019613a <+346>: 48 09 eb or %rbp,%rbx 0x000000000019613d <+349>: 48 09 d9 or %rbx,%rcx 0x0000000000196140 <+352>: 4c 09 c9 or %r9,%rcx 0x0000000000196143 <+355>: 4c 09 d9 or %r11,%rcx 0x0000000000196146 <+358>: 4c 09 d1 or %r10,%rcx 0x0000000000196149 <+361>: 48 89 4f 08 mov %rcx,0x8(%rdi) 0x000000000019614d <+365>: 48 89 c1 mov %rax,%rcx 0x0000000000196150 <+368>: 48 c1 e9 09 shr $0x9,%rcx 0x0000000000196154 <+372>: 81 e1 00 fc 00 00 and $0xfc00,%ecx 0x000000000019615a <+378>: 49 89 c9 mov %rcx,%r9 0x000000000019615d <+381>: 48 89 c1 mov %rax,%rcx 0x0000000000196160 <+384>: 48 c1 e9 06 shr $0x6,%rcx 0x0000000000196164 <+388>: 48 c1 e8 03 shr $0x3,%rax 0x0000000000196168 <+392>: 49 c1 e0 30 shl $0x30,%r8 0x000000000019616c <+396>: 81 e1 e0 03 00 00 and $0x3e0,%ecx 0x0000000000196172 <+402>: 83 e0 1f and $0x1f,%eax 0x0000000000196175 <+405>: 48 c1 e2 20 shl $0x20,%rdx 0x0000000000196179 <+409>: 4c 09 c9 or %r9,%rcx 0x000000000019617c <+412>: 48 09 c8 or %rcx,%rax 0x000000000019617f <+415>: 48 89 f1 mov %rsi,%rcx 0x0000000000196182 <+418>: 4c 09 c0 or %r8,%rax 0x0000000000196185 <+421>: 48 c1 e1 10 shl $0x10,%rcx 0x0000000000196189 <+425>: 48 09 d0 or %rdx,%rax 0x000000000019618c <+428>: 48 09 c8 or %rcx,%rax 0x000000000019618f <+431>: 5b pop %rbx 0x0000000000196190 <+432>: 48 89 07 mov %rax,(%rdi) 0x0000000000196193 <+435>: 5d pop %rbp 0x0000000000196194 <+436>: c3 retq End of assembler dump.