汇编代码片段

作者在 2012-08-05 10:56:14 发布以下内容
    #  xt-xcc::8.0.2

    #-----------------------------------------------------------
    # Compiling F4.c (/tmp/cc0M#4b60948d.ijEhEV)
    #-----------------------------------------------------------

    #-----------------------------------------------------------
    # Options:
    #-----------------------------------------------------------
    #  Target:xtensa, ISA:xtensa, Pointer Size:32
    #  -O2    (Optimization level)
    #  -g0    (Debug level)
    #  -m2    (Report advisories)
    #-----------------------------------------------------------

    .file    "F4.c"
    .file    1    "/../benchmark/huaweibench/8.case/F4.c"


    .text
    .align    8

    .section    .bss, "wa"
    .org 0x0
    .align    16
    .global    C00000684
    .type    C00000684, @object
    .size    C00000684, 4
C00000684:    # 0x0
    .skip 4
    .org 0x10
    .align    16
    .global    C0000067F
    .type    C0000067F, @object
    .size    C0000067F, 153600
C0000067F:    # 0x10
    .skip 153600
    .org 0x25810
    .align    16
    .global    C00000686
    .type    C00000686, @object
    .size    C00000686, 400
C00000686:    # 0x25810
    .skip 400
    .org 0x259a0
    .align    16
    .global    C00000680
    .type    C00000680, @object
    .size    C00000680, 76800
C00000680:    # 0x259a0
    .skip 76800
    .org 0x385a0
    .align    16
    .global    C00000681
    .type    C00000681, @object
    .size    C00000681, 9600
C00000681:    # 0x385a0
    .skip 9600
    .org 0x3ab20
    .align    16
    .global    C00000682
    .type    C00000682, @object
    .size    C00000682, 200
C00000682:    # 0x3ab20
    .skip 200
    .org 0x3abf0
    .align    16
    .global    C00000683
    .type    C00000683, @object
    .size    C00000683, 1024
C00000683:    # 0x3abf0
    .skip 1024
    .org 0x3aff0
    .align    16
    .global    C00000685
    .type    C00000685, @object
    .size    C00000685, 12800
C00000685:    # 0x3aff0
    .skip 12800

    .text
    .literal_position
    .literal    .LC0_1_32, 4205

    # Program Unit: TestCode
    .type    TestCode, @function
    .align     4
    .global    TestCode
TestCode:    # 0x4
    # uwArrVar01 = 64
    # wArrVar02 = 0
    # uwArrVar03 = 80
    # rtom_spill__TIE_HiDSP170_vec8x16_temp_0 = 112
    # gra_spill_temp_1 = 128
    # gra_spill_temp_2 = 132
.LBB1_TestCode:    # 0x4
#<freq> BB:1 => BB:16 probability = 0.02929
#<freq> BB:1 => BB:3 probability = 0.97071
    .frequency 1.000 0.000
    entry    a1,176                      #  
    beqz    a4,.Lt_0_4866                # [1]  

#.LBB3_TestCode:    # 0xa
{    # format h64
    neg    a9,a2                         # [0]  
    nop                               #  
    slli    a10,a4,8                     # [0]  
}
{    # format h64
    movi    a12,16                       # [1]  
    nop                               #  
    movi    a13,1                        # [1]  
}
{    # format x64
    movvr40    v1,a13                    # [2]  
    nop                               #  
    nop                               #  
}
{    # format x64
    movvr40    v0,a12                    # [3]  
    nop                               #  
    nop                               #  
}
{    # format h64
    add    a10,a10,a2                    # [4]  
    nop                               #  
    mov.n    a4,a2                       # [4]  
}
{    # format h64
    s32i    a9,a1,132                    # [5]  gra_spill_temp_2
    nop                               #  
    s32i    a10,a1,128                   # [5]  gra_spill_temp_1
}

.Lt_0_5378:    # 0x3a
#<loop> Loop body line 76, nesting depth: 1, estimated iterations: 100
#<swpf>  non-innermost loop
    .frequency 0.971 47.808
    l32i.n    a14,a3,0                   # [0]  id:174
{    # format h64
    l32i    a13,a4,0                     # [1]  id:166
    nop                               #  
    l32i    a12,a4,20                    # [1]  id:168
}
{    # format h64
    l32i    a11,a4,40                    # [2]  id:170
    nop                               #  
    l32i    a10,a4,60                    # [2]  id:172
}
{    # format h64
    s32i    a10,a1,76                    # [3]  uwArrVar01+12
    nop                               #  
    s32i    a11,a1,72                    # [3]  uwArrVar01+8
}
{    # format h64
    s32i    a12,a1,68                    # [4]  uwArrVar01+4
    nop                               #  
    s32i    a13,a1,64                    # [4]  uwArrVar01
}
{    # format h64
    lvs32.i    v5,a1,64                  # [5]  uwArrVar01
    nop                               #  
    addi    a9,a14,-12                   # [5]  
}
{    # format x64
    movvr40    v4,a9                     # [6]  
    nop                               #  
    sshvr.sat40.4x40    v5,v5,v0         # [6]  
}
{    # format x64
    nop                               #  
    nop                               #  
    sshvl.sat40.4x40    v4,v1,v4         # [7]  
}
{    # format x64
    nop                               #  
    nop                               #  
    add40    v4,v4,v5                    # [8]  
}
{    # format x64
    svs32.i    v4,a1,64                  # [10]  uwArrVar01
    nop                               #  
    nop                               #  
}
{    # format h64
    l32i    a15,a1,68                    # [11]  uwArrVar01+4
    nop                               #  
    l32i    a12,a1,64                    # [11]  uwArrVar01
}
{    # format h64
    l32i    a8,a1,76                     # [12]  uwArrVar01+12
    nop                               #  
    l32i    a13,a1,72                    # [12]  uwArrVar01+8
}
{    # format h64
    l32i    a9,a1,132                    # [14]  gra_spill_temp_2
    nop                               #  
    mov.n    a11,a5                      # [14]  
}
{    # format h64
    or    a13,a13,a8                     # [15]  
    nop                               #  
    or    a12,a12,a15                    # [15]  
}
{    # format h64
    addi    a10,a1,-16                   # [16]  wArrVar02-16
    nop                               #  
    or    a12,a12,a13                    # [16]  
}
{    # format h64
    nsau    a12,a12                      # [17]  
    nop                               #  
    add    a9,a4,a9                      # [17]  
}
{    # format h64
    addi    a12,a12,-17                  # [18]  
    nop                               #  
    add    a14,a14,a12                   # [18]  
}
{    # format x64
    movvr20    v2,a12                    # [19]  
    nop                               #  
    sshvl.sat40.4x40    v10,v4,v0        # [19]  
}
{    # format h64
    addi    a14,a14,-17                  # [20]  
    nop                               #  
    add    a9,a9,a2                      # [20]  
}
{    # format h64
    s32i    a14,a3,0                     # [21]  id:178
    nop                               #  
    addi    a9,a9,-16                    # [21]  
}

#.LBB21_TestCode:    # 0xd5
#<loop> Part of loop body line 76, head labeled .Lt_0_5378
#<loop> unrolled 4 times (fully)
{    # format x64
    lvs16.iu    v9,a9,16                 # [0]  id:180
    nop                               #  
    nop                               #  
}
{    # format x64
    nop                               #  
    nop                               #  
    sshvl.sat20.8x20    v9,v9,v2         # [1]  
}
{    # format x64
    svs16.iu    v9,a10,16                # [3]  id:181 wArrVar02+0x0
    nop                               #  
    nop                               #  
}
{    # format x64
    lvs16.iu    v8,a9,16                 # [4]  id:180
    nop                               #  
    nop                               #  
}
{    # format x64
    nop                               #  
    nop                               #  
    sshvl.sat20.8x20    v8,v8,v2         # [5]  
}
{    # format x64
    svs16.iu    v8,a10,16                # [7]  id:181 wArrVar02+0x0
    nop                               #  
    nop                               #  
}
{    # format x64
    lvs16.iu    v7,a9,16                 # [8]  id:180
    nop                               #  
    nop                               #  
}
{    # format x64
    nop                               #  
    nop                               #  
    sshvl.sat20.8x20    v7,v7,v2         # [9]  
}
{    # format x64
    svs16.iu    v7,a10,16                # [11]  id:181 wArrVar02+0x0
    nop                               #  
    nop                               #  
}
{    # format x64
    lvs16.iu    v6,a9,16                 # [12]  id:180
    nop                               #  
    nop                               #  
}
{    # format x64
    nop                               #  
    nop                               #  
    sshvl.sat20.8x20    v6,v6,v2         # [13]  
}
{    # format x64
    svs16.iu    v6,a10,16                # [15]  id:181 wArrVar02+0x0
    nop                               #  
    nop                               #  
}

#.LBB19_TestCode:    # 0x135
#<loop> Part of loop body line 76, head labeled .Lt_0_5378
#<freq> BB:19 => BB:10 probability = 0.86517
#<freq> BB:19 => BB:8 probability = 0.13483
{    # format x64
    svs32.i    v10,a1,64                 # [0]  uwArrVar01
    nop                               #  
    nop                               #  
}
    bnei    a5,2,.L_0_8706               # [1]  

#.LBB8_TestCode:    # 0x140
#<loop> Part of loop body line 76, head labeled .Lt_0_5378
#<freq> BB:8 => BB:9 probability = 0.50000
#<freq> BB:8 => BB:10 probability = 0.50000
{    # format h64
    l32i    a10,a1,48                    # [0]  wArrVar02+48
    nop                               #  
    l32i    a12,a1,56                    # [0]  wArrVar02+56
}
{    # format h64
    movi    a11,0                        # [1]  
    nop                               #  
    l32i    a13,a1,52                    # [1]  wArrVar02+52
}
{    # format h64
    l32i    a14,a1,36                    # [2]  wArrVar02+36
    nop                               #  
    l32i    a8,a1,16                     # [2]  wArrVar02+16
}
{    # format h64
    s32i    a8,a1,80                     # [3]  uwArrVar03
    nop                               #  
    l32i    a15,a1,32                    # [3]  wArrVar02+32
}
{    # format h64
    s32i    a15,a1,84                    # [4]  uwArrVar03+4
    nop                               #  
    s32i    a14,a1,88                    # [4]  uwArrVar03+8
}
{    # format h64
    s32i    a11,a1,104                   # [5]  uwArrVar03+24
    nop                               #  
    s32i    a11,a1,108                   # [5]  uwArrVar03+28
}
{    # format h64
    s32i    a13,a1,96                    # [6]  uwArrVar03+16
    nop                               #  
    s32i    a12,a1,100                   # [6]  uwArrVar03+20
}
{    # format h64
    lvs16.i    v13,a1,96                 # [7]  uwArrVar03+16
    nop                               #  
    s32i    a10,a1,92                    # [7]  uwArrVar03+12
}
{    # format x64
    lvs16.i    v12,a1,80                 # [8]  uwArrVar03
    nop                               #  
    abs20    v13,v13                     # [8]  
}
{    # format x64
    nop                               #  
    nop                               #  
    abs20    v12,v12                     # [9]  
}
{    # format x64
    lvs16.i    v11,a1,64                 # [10]  uwArrVar01
    nop                               #  
    add20    v12,v12,v13                 # [10]  
}
{    # format x64
    movi    a9,4                         # [11]  
    nop                               #  
    abs20    v11,v11                     # [11]  
}
{    # format x64
    movvr20    v13,a9                    # [12]  
    nop                               #  
    radd20    v12,v12                    # [12]  
}
{    # format x64
    nop                               #  
    nop                               #  
    sshvr.sat20.8x20    v12,v12,v13      # [13]  
}
{    # format x64
    l32r    a14,.LC0_1_32                # [14]  
    nop                               #  
    radd20    v11,v11                    # [14]  
}
{    # format x64
    movar16    a8,v12                    # [15]  
    nop                               #  
    sshvr.sat20.8x20    v11,v11,v13      # [15]  
}
{    # format x64
    movar16    a15,v11                   # [17]  
    nop                               #  
    nop                               #  
}
    mul16s    a14,a14,a8                 # [18]  
    srai    a14,a14,11                   # [20]  
    blt    a14,a15,.L_0_8706             # [21]  

#.LBB9_TestCode:    # 0x1d1
#<loop> Part of loop body line 76, head labeled .Lt_0_5378
    mov.n    a11,a5                      #  

.L_0_8706:    # 0x1d4
.Lt_0_6658:    # 0x1d4
#<loop> Part of loop body line 76, head labeled .Lt_0_5378
#<freq> BB:10 => BB:11 probability = 0.13483
#<freq> BB:10 => BB:12 probability = 0.86517
    .frequency 3.288 45.491
    bnez    a11,.Lt_0_7170               #  

#.LBB11_TestCode:    # 0x1d7
#<loop> Part of loop body line 76, head labeled .Lt_0_5378
    movi.n    a9,0                       # [0]  
{    # format x64
    movvr20    v15,a9                    # [1]  
    nop                               #  
    nop                               #  
}
{    # format x64
    svs16.i    v15,a1,112                # [3]  rtom_spill__TIE_HiDSP170_vec8x16_temp_0
    nop                               #  
    nop                               #  
}
{    # format x64
    lvs16.i    v14,a1,112                # [4]  rtom_spill__TIE_HiDSP170_vec8x16_temp_0
    nop                               #  
 
实例 | 阅读 6422 次
文章评论,共1条
vfdff(作者)
2012-08-05 11:02
1
Audio DSP, Baseband DSP, or Your Own Customized Value-Add Engine – Tensilica’s IP Cores Excel in the SOC Dataplane<br />
Tensilica is the #1 supplier of audio DSP IP cores and 4G baseband DSP IP cores for the mobile, handset, and home entertainment markets.<br />
<br />
In fact, no matter what the function is, if your SOC design demands a highly-efficient, programmable computational engine for a data-intensive task, our innovative technology can provide a solution for you.<br />
<br />
For the most common and broadly applicable tasks in the dataplane, Tensilica has ready made solutions like our HiFi Audio DSPs, our ConnX Communications DSPs and our Diamond Standard controllers for deeply embedded dataplane control.<br />
<br />
For more specialized tasks, you can rapidly build your own customized dataplane processor for tasks like image signal processing, video processing, security protocol processing, or network packet processing using our Xtensa Processor Generator.<br />
<br />
Why do Tensilica’s customers keep coming back for more? Two main reasons:<br />
<br />
1.They like our comprehensive, total solutions for audio and baseband DSPs. <br />
2.They’ve discovered that it’s easy to develop their own programmable cores with Tensilica’s customized processors that provide much higher performance with lower area and lower power. This is essential for tasks in the SOC dataplane and is why we call our processor cores DPUs (dataplane processing units).<br />
<br />
<br />
DPUs Do the Hard Stuff – the Data Processing that the Control Processor Can’t Do<br />
<br />
Where Tensilica’s cores really shine is in the dataplane - the &quot;other&quot; part of the chip where typically designers use RTL blocks to do the &quot;heavy lifting&quot;. The problem with those RTL blocks is that they take a long time to design, take even longer to verify, and are not programmable for making changes post-silicon.<br />
<br />
DPUs combine the best of CPUs and DSPs with 10-to-100x the performance. DPUs can do BOTH performance intensive DSP (audio, video, imaging, and baseband signal processing) and embedded RISC CPU processing functions (security, networking, and deeply embedded control).<br />
<br />
#1 in Audio DSP IP Cores<br />
Tensilica offers the best audio DSP IP cores in the industry. With a full range of audio codecs plus sound-enhancement software from industry leaders AM3D, Dolby, DTS, QSound, and SRS, Tensilica’s HiFi Audio is becoming the de facto standard for high-quality audio. In home entertainment, we were the first IP provider certified for DTS Master Audio and we offer outstanding support for Dolby codecs and HD Radio. Because they require so little power, Tensilica’s HiFi Audio DSPs have brought home entertainment quality to smartphones, digital cameras and other portable electronic devices.<br />
<br />
 <br />
<br />
#1 in LTE DSP IP Cores<br />
Why have so many companies picked Tensilica’s DSPs for their 4G LTE designs? With the widest range of DSP IP cores, Tensilica can provide the performance level you need for your design, from our basic ConnX D2 dual-MAC DSP all the way up to our newest ConnX BBE64, a 64 – 128 MAC DSP designed specifically to meet the throughput challenges of LTE Advanced communications. Our Atlas reference architecture implements the entire 3GPP LTE layer 1 PHY, including the computationally demanding Turbo decoder, in a completely processor-based, fully programmable DSP core reference architecture.<br />
<br />
Xtensa Customizable Processors<br />
You can get the best performance, lowest power and smallest size by customizing a processor for your exact application. There are three fundamental ways you can optimize Xtensa processors:<br />
<br />
1.You can stream data into and out of the processor directly, without going through the processor bus. This means no load/store overhead and RTL-like performance.<br />
2.You can use our check-box configuration options to pick just what you need in terms of memories, interfaces, and more.<br />
3.You can add custom instructions that merge several operations into one, do something unique for your product, and allow for fast parallel execution.<br />
Diamond Standard Controllers<br />
Tensilica offers a wide range of standard controller options from very small, 32-bit ultra-low-power, cache-less RISC controller up to a powerful high-performance 3-issue VLIW CPU.<br />
<br />
Backed by the Best Tools in the Industry<br />
Whether you're a hardware designer or a software developer, an inexperienced engineer or an experienced Tensilica user, Tensilica has a comprehensive set of tools to make your job much easier and more productive. Our tools that help you design a custom processor are unrivalled in our industry.<br />
<br />
Every processor you get from Tensilica, be it a standard DSP or a processor you customized yourself, comes with an automatically generated, matching software tool chain that includes an outstanding C/C++ compiler, ISS, debugger, and code generation and analysis tools that will speed the software development process.<br />
<br />
Base Architecture and Cool Technology<br />
In this section, you'll read about the architecture behind all of these products - Tensilica's efficient, low-power Xtensa architecture. You also can read about the complete software tool chain, third party ecosystem, EDA flows, and models available with every Tensilica processor.
游客请输入验证码