[text] SASS code using nvdisasm

Viewer

copydownloadembedprintName: SASS code using nvdisasm
  1. .headerflags    @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM30 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM30)"
  2.         .elftype        @"ET_EXEC"
  3.  
  4.  
  5. //--------------------- .text._Z7testPtxPiPfPj    --------------------------
  6.         .section        .text._Z7testPtxPiPfPj,"ax",@progbits
  7.         .sectioninfo    @"SHI_REGISTERS=12"
  8.         .align  64
  9.         .global         _Z7testPtxPiPfPj
  10.         .type           _Z7testPtxPiPfPj,@function
  11.         .size           _Z7testPtxPiPfPj,(.L_39 - _Z7testPtxPiPfPj)
  12.         .other          _Z7testPtxPiPfPj,@"STO_CUDA_ENTRY STV_DEFAULT"
  13. _Z7testPtxPiPfPj:
  14. .text._Z7testPtxPiPfPj:
  15.         /*0008*/                   MOV R1, c[0x0][0x44];
  16.         /*0010*/                   S2R R0, SR_CLOCKLO;
  17.         /*0018*/                   MOV R2, c[0x0][0x140];
  18.         /*0020*/                   MOV R3, c[0x0][0x144];
  19.         /*0028*/                   LD.E.CV R2, [R2];
  20.         /*0030*/                   S2R R3, SR_CLOCKLO;
  21.         /*0038*/                   I2F.F32.U32 R2, R2;
  22.         /*0048*/                   IADD32I R4, R2, 0x1800000;
  23.         /*0050*/                   LOP32I.AND R4, R4, 0x7f800000;
  24.         /*0058*/                   ISETP.GT.U32.AND P0, PT, R4, c[0x2][0x0], PT;
  25.         /*0060*/               @P0 BRA `(.L_10);
  26.         /*0068*/                   MOV R5, R2;
  27.         /*0070*/                   CAL `($_Z7testPtxPiPfPj$__cuda_sm20_rcp_rn_f32_slowpath);
  28.         /*0078*/                   MOV R7, R4;
  29.         /*0088*/                   BRA `(.L_11);
  30. .L_10:
  31.         /*0090*/                   MUFU.RCP R7, R2;
  32.         /*0098*/                   FFMA R2, R2, R7, c[0x2][0x4];
  33.         /*00a0*/                   FADD.FTZ R2, -R2, -RZ;
  34.         /*00a8*/                   FFMA R7, R7, R2, R7;
  35. .L_11:
  36.         /*00b0*/                   IADD R6, -R0, R3;
  37.         /*00b8*/                   MOV R4, c[0x0][0x148];
  38.         /*00c8*/                   MOV R5, c[0x0][0x14c];
  39.         /*00d0*/                   MOV R2, c[0x0][0x150];
  40.         /*00d8*/                   MOV R3, c[0x0][0x154];
  41.         /*00e0*/                   ST.E [R4], R7;
  42.         /*00e8*/                   ST.E [R2], R6;
  43.         /*00f0*/                   EXIT;
  44.         .weak           $_Z7testPtxPiPfPj$__cuda_sm20_rcp_rn_f32_slowpath
  45.         .type           $_Z7testPtxPiPfPj$__cuda_sm20_rcp_rn_f32_slowpath,@function
  46.         .size           $_Z7testPtxPiPfPj$__cuda_sm20_rcp_rn_f32_slowpath,(.L_39 - $_Z7testPtxPiPfPj$__cuda_sm20_rcp_rn_f32_slowpath)
  47. $_Z7testPtxPiPfPj$__cuda_sm20_rcp_rn_f32_slowpath:
  48.         /*00f8*/                   SHL R2, R5, 0x1;
  49.         /*0108*/                   PRMT R4, RZ, 0x7, R2;
  50.         /*0110*/                   MOV R2, R5;
  51.         /*0118*/                   ISETP.NE.U32.AND P0, PT, R4, RZ, PT;
  52.         /*0120*/               @P0 BRA `(.L_12);
  53.         /*0128*/                   SHL R4, R2, 0x1;
  54.         /*0130*/                   ISETP.NE.AND P0, PT, R4, RZ, PT;
  55.         /*0138*/              @!P0 MUFU.RCP R4, R2;
  56.         /*0148*/              @!P0 RET;
  57.         /*0150*/                   FFMA R2, R2, 1.84467440737095516160e+19, RZ;
  58.         /*0158*/                   MUFU.RCP R4, R2;
  59.         /*0160*/                   FFMA R2, R2, R4, c[0x2][0x4];
  60.         /*0168*/                   FADD.FTZ R2, -R2, -RZ;
  61.         /*0170*/                   FFMA R4, R4, R2, R4;
  62.         /*0178*/                   FFMA R4, R4, 1.84467440737095516160e+19, RZ;
  63.         /*0188*/                   RET;
  64. .L_12:
  65.         /*0190*/                   IADD32I R5, R4, -0xfd;
  66.         /*0198*/                   ISETP.GT.U32.AND P0, PT, R5, 0x1, PT;
  67.         /*01a0*/               @P0 BRA `(.L_13);
  68.         /*01a8*/                   LOP32I.AND R11, R2, 0x7fffff;
  69.         /*01b0*/                   IADD32I R4, R4, -0xfc;
  70.         /*01b8*/                   MOV32I R10, 0x3;
  71.         /*01c8*/                   LOP32I.AND R2, R2, 0x80000000;
  72.         /*01d0*/                   LOP32I.OR R6, R11, 0x3f800000;
  73.         /*01d8*/                   ISETP.NE.U32.AND P1, PT, R11, RZ, PT;
  74.         /*01e0*/                   SHL R10, R10, R5;
  75.         /*01e8*/                   MUFU.RCP R7, R6;
  76.         /*01f0*/                   FFMA R6, R6, R7, c[0x2][0x4];
  77.         /*01f8*/                   FADD.FTZ R6, -R6, -RZ;
  78.         /*0208*/                   FFMA.RM R8, R7, R6, R7;
  79.         /*0210*/                   FFMA.RP R6, R7, R6, R7;
  80.         /*0218*/                   LOP32I.AND R9, R8, 0x7fffff;
  81.         /*0220*/                   FSET.NEU.FTZ.AND R6, R8, R6, PT;
  82.         /*0228*/                   LOP32I.OR R9, R9, 0x800000;
  83.         /*0230*/                   IADD R6, -R6, RZ;
  84.         /*0238*/                   LOP.AND R10, R10, R9;
  85.         /*0248*/                   SHR.U32 R4, R9, R4;
  86.         /*0250*/                   SHR.U32 R10, R10, R5;
  87.         /*0258*/                   LOP.AND R5, R5, R9;
  88.         /*0260*/                   LOP32I.AND R7, R10, 0x2;
  89.         /*0268*/                   LOP.OR R5, R6, R5;
  90.         /*0270*/                   LOP32I.AND R10, R10, 0x1;
  91.         /*0278*/                   ISETP.NE.U32.AND P0, PT, R7, RZ, PT;
  92.         /*0288*/                   ISETP.NE.U32.OR P0, PT, R5, RZ, P0;
  93.         /*0290*/                   ISETP.NE.U32.AND P0, PT, R10, RZ, P0;
  94.         /*0298*/               @P0 IADD32I R4, R4, 0x1;
  95.         /*02a0*/              @!P1 SHL R4, R4, 0x1;
  96.         /*02a8*/                   LOP.OR R4, R4, R2;
  97.         /*02b0*/                   RET;
  98. .L_13:
  99.         /*02b8*/                   MUFU.RCP R4, R2;
  100.         /*02c8*/                   RET;
  101. .L_14:
  102.         /*02d0*/                   BRA `(.L_14);
  103. .L_39:
  104.  

Editor

You can edit this paste and save as new:


File Description
  • SASS code using nvdisasm
  • Paste Code
  • 29 Dec-2020
  • 5.55 Kb
You can Share it: