<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://dreamcast.wiki/wiki/index.php?action=history&amp;feed=atom&amp;title=SH4_FIPR_Optimizations</id>
	<title>SH4 FIPR Optimizations - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://dreamcast.wiki/wiki/index.php?action=history&amp;feed=atom&amp;title=SH4_FIPR_Optimizations"/>
	<link rel="alternate" type="text/html" href="https://dreamcast.wiki/wiki/index.php?title=SH4_FIPR_Optimizations&amp;action=history"/>
	<updated>2026-05-12T22:15:12Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.39.3</generator>
	<entry>
		<id>https://dreamcast.wiki/wiki/index.php?title=SH4_FIPR_Optimizations&amp;diff=3853&amp;oldid=prev</id>
		<title>BBHoodsta at 18:42, 16 February 2026</title>
		<link rel="alternate" type="text/html" href="https://dreamcast.wiki/wiki/index.php?title=SH4_FIPR_Optimizations&amp;diff=3853&amp;oldid=prev"/>
		<updated>2026-02-16T18:42:47Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;a href=&quot;https://dreamcast.wiki/wiki/index.php?title=SH4_FIPR_Optimizations&amp;amp;diff=3853&amp;amp;oldid=3852&quot;&gt;Show changes&lt;/a&gt;</summary>
		<author><name>BBHoodsta</name></author>
	</entry>
	<entry>
		<id>https://dreamcast.wiki/wiki/index.php?title=SH4_FIPR_Optimizations&amp;diff=3852&amp;oldid=prev</id>
		<title>BBHoodsta: Updated the function because it crashed before</title>
		<link rel="alternate" type="text/html" href="https://dreamcast.wiki/wiki/index.php?title=SH4_FIPR_Optimizations&amp;diff=3852&amp;oldid=prev"/>
		<updated>2026-02-16T18:40:26Z</updated>

		<summary type="html">&lt;p&gt;Updated the function because it crashed before&lt;/p&gt;
&lt;a href=&quot;https://dreamcast.wiki/wiki/index.php?title=SH4_FIPR_Optimizations&amp;amp;diff=3852&amp;amp;oldid=3789&quot;&gt;Show changes&lt;/a&gt;</summary>
		<author><name>BBHoodsta</name></author>
	</entry>
	<entry>
		<id>https://dreamcast.wiki/wiki/index.php?title=SH4_FIPR_Optimizations&amp;diff=3789&amp;oldid=prev</id>
		<title>GyroVorbis at 14:25, 26 July 2025</title>
		<link rel="alternate" type="text/html" href="https://dreamcast.wiki/wiki/index.php?title=SH4_FIPR_Optimizations&amp;diff=3789&amp;oldid=prev"/>
		<updated>2025-07-26T14:25:29Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 14:25, 26 July 2025&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l44&quot;&gt;Line 44:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 44:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;fmov.s fr7, @%[result] ! PIPELINE STALL!!!!&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;fmov.s fr7, @%[result] ! PIPELINE STALL!!!!&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;/pre&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;/pre&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Now this is very very bad. FIPR has 5&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;-8 &lt;/del&gt;cycles of latency, so every fucking call to FIPR, since the very next instruction tries to use the result before its been calculated, the entire pipeline must stall waiting for the result... FOR EVERY FIPR CALL.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Now this is very very bad. FIPR has &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;4-&lt;/ins&gt;5 cycles of latency, so every fucking call to FIPR, since the very next instruction tries to use the result before its been calculated, the entire pipeline must stall waiting for the result... FOR EVERY FIPR CALL.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;So you&amp;#039;re losing MASSIVE perf benefits there.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;So you&amp;#039;re losing MASSIVE perf benefits there.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;The solution? You have to pipeline your FIPRs so that while the previous FIPR call is still calculating, you&amp;#039;re loading up and issuing the next FIPR call.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;The solution? You have to pipeline your FIPRs so that while the previous FIPR call is still calculating, you&amp;#039;re loading up and issuing the next FIPR call.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;

&lt;!-- diff cache key dcwiki:diff::1.12:old-3788:rev-3789 --&gt;
&lt;/table&gt;</summary>
		<author><name>GyroVorbis</name></author>
	</entry>
	<entry>
		<id>https://dreamcast.wiki/wiki/index.php?title=SH4_FIPR_Optimizations&amp;diff=3788&amp;oldid=prev</id>
		<title>GyroVorbis at 14:22, 26 July 2025</title>
		<link rel="alternate" type="text/html" href="https://dreamcast.wiki/wiki/index.php?title=SH4_FIPR_Optimizations&amp;diff=3788&amp;oldid=prev"/>
		<updated>2025-07-26T14:22:56Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 14:22, 26 July 2025&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l1&quot;&gt;Line 1:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 1:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Yo, guys. At like 1AM @ian micheal got me looking at pl_mpeg&amp;#039;s audio decoder to see if I could see any potential gainz...&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Yo, guys. At like 1AM @ian micheal got me looking at pl_mpeg&amp;#039;s audio decoder to see if I could see any potential gainz... &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;So here is its innermost hottest audio synthesis loop:&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br/&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br/&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;pre&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;pre&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;

&lt;!-- diff cache key dcwiki:diff::1.12:old-3787:rev-3788 --&gt;
&lt;/table&gt;</summary>
		<author><name>GyroVorbis</name></author>
	</entry>
	<entry>
		<id>https://dreamcast.wiki/wiki/index.php?title=SH4_FIPR_Optimizations&amp;diff=3787&amp;oldid=prev</id>
		<title>GyroVorbis: Created page with &quot;Yo, guys. At like 1AM @ian micheal got me looking at pl_mpeg&#039;s audio decoder to see if I could see any potential gainz...  &lt;pre&gt; for (int i = 32; i; --i) {     float u;     u = pl_fipr(d[0], d[1], d[2], d[3], v1[0], v2[0], v1[128], v2[128]);     u += pl_fipr(d[4], d[5], d[6], d[7], v1[256], v2[256], v1[384], v2[384]);     u += pl_fipr(d[8], d[9], d[10], d[11], v1[512], v2[512], v1[640], v2[640]);     u += pl_fipr(d[12], d[13], d[14], d[15], v1[768], v2[768], v1[896], v2[...&quot;</title>
		<link rel="alternate" type="text/html" href="https://dreamcast.wiki/wiki/index.php?title=SH4_FIPR_Optimizations&amp;diff=3787&amp;oldid=prev"/>
		<updated>2025-07-26T14:18:22Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot;Yo, guys. At like 1AM @ian micheal got me looking at pl_mpeg&amp;#039;s audio decoder to see if I could see any potential gainz...  &amp;lt;pre&amp;gt; for (int i = 32; i; --i) {     float u;     u = pl_fipr(d[0], d[1], d[2], d[3], v1[0], v2[0], v1[128], v2[128]);     u += pl_fipr(d[4], d[5], d[6], d[7], v1[256], v2[256], v1[384], v2[384]);     u += pl_fipr(d[8], d[9], d[10], d[11], v1[512], v2[512], v1[640], v2[640]);     u += pl_fipr(d[12], d[13], d[14], d[15], v1[768], v2[768], v1[896], v2[...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;Yo, guys. At like 1AM @ian micheal got me looking at pl_mpeg&amp;#039;s audio decoder to see if I could see any potential gainz...&lt;br /&gt;
&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
for (int i = 32; i; --i) {&lt;br /&gt;
    float u;&lt;br /&gt;
    u = pl_fipr(d[0], d[1], d[2], d[3], v1[0], v2[0], v1[128], v2[128]);&lt;br /&gt;
    u += pl_fipr(d[4], d[5], d[6], d[7], v1[256], v2[256], v1[384], v2[384]);&lt;br /&gt;
    u += pl_fipr(d[8], d[9], d[10], d[11], v1[512], v2[512], v1[640], v2[640]);&lt;br /&gt;
    u += pl_fipr(d[12], d[13], d[14], d[15], v1[768], v2[768], v1[896], v2[896]);&lt;br /&gt;
    d += 32;&lt;br /&gt;
    v1++;&lt;br /&gt;
    v2++;&lt;br /&gt;
    *out++ = (short)((int)u &amp;gt;&amp;gt; 16);&lt;br /&gt;
}&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
Which... you&amp;#039;d think would be preeeetty efficient, right? 4 back-to-back FIPRs? I mean, it is hella gainzy compared to not using FIPR.&lt;br /&gt;
&lt;br /&gt;
But there are two problems with back-to-back FIPR-y, I wanna teach anyone interested:&lt;br /&gt;
&lt;br /&gt;
1) Very often one of the vector arguments stays constant between FIPR calls, but unfortunately the compiler is too dumb to not reload all 8 registers between calls regardless.&lt;br /&gt;
* LUCKILY every argument to these FIPRs is unique so this is not applicable, but... very often that&amp;#039;s a perf destroyer.&lt;br /&gt;
&lt;br /&gt;
2) THE COMPILER CANNOT PIPELINE FIPR FOR SHIT.&lt;br /&gt;
* VERY applicable here. You know what the ASM looks like for these FIPR calls? Something like this:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
! load first vector arg into fv0 (nothing wrong with this)&lt;br /&gt;
fmov.s @%[d]+, fr0&lt;br /&gt;
fmov.s @%[d}+, fr1&lt;br /&gt;
fmov.s @%[d]+, fr2&lt;br /&gt;
fmov.s @%[d]+, fr3&lt;br /&gt;
&lt;br /&gt;
! load second vector arg into fv4 (nothing wrong with this)&lt;br /&gt;
fmov.s @%[v1], fr4&lt;br /&gt;
add    %[offset], @[v1]&lt;br /&gt;
fmov.s @%[v2], fr5&lt;br /&gt;
add    %[offset], @[v2]&lt;br /&gt;
fmov.s @%[v1], fr6&lt;br /&gt;
fmov.s @%[v2], fr7&lt;br /&gt;
&lt;br /&gt;
! issue actual FIPR calculation&lt;br /&gt;
fipr fv0, fv4&lt;br /&gt;
&lt;br /&gt;
! VERY NEXT INSTRUCTION TRY TO STORE THE RESULT&lt;br /&gt;
fmov.s fr7, @%[result] ! PIPELINE STALL!!!!&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
Now this is very very bad. FIPR has 5-8 cycles of latency, so every fucking call to FIPR, since the very next instruction tries to use the result before its been calculated, the entire pipeline must stall waiting for the result... FOR EVERY FIPR CALL.&lt;br /&gt;
So you&amp;#039;re losing MASSIVE perf benefits there.&lt;br /&gt;
The solution? You have to pipeline your FIPRs so that while the previous FIPR call is still calculating, you&amp;#039;re loading up and issuing the next FIPR call.&lt;br /&gt;
&lt;br /&gt;
So I wrote a new routine that replaces that inner loop body doing manually pipelined FIPR calls... This should be way better:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
for (int i = 32; i; --i) {&lt;br /&gt;
#if 0 // Old FIPR path which didn&amp;#039;t pipeline for shit.&lt;br /&gt;
    float u;&lt;br /&gt;
    u = pl_fipr(d[0], d[1], d[2], d[3], v1[0], v2[0], v1[128], v2[128]);&lt;br /&gt;
    u += pl_fipr(d[4], d[5], d[6], d[7], v1[256], v2[256], v1[384], v2[384]);&lt;br /&gt;
    u += pl_fipr(d[8], d[9], d[10], d[11], v1[512], v2[512], v1[640], v2[640]);&lt;br /&gt;
    u += pl_fipr(d[12], d[13], d[14], d[15], v1[768], v2[768], v1[896], v2[896]);&lt;br /&gt;
#else // New hand-written FIPR path with manual pipelining&lt;br /&gt;
    float u = shz_pl_inner_loop(d, v1, v2);&lt;br /&gt;
#endif&lt;br /&gt;
    d += 32;&lt;br /&gt;
    v1++;&lt;br /&gt;
    v2++;&lt;br /&gt;
    *out++ = (short)((int)u &amp;gt;&amp;gt; 16);&lt;br /&gt;
}&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
Where the new implementation is this inline ASM:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
__always_inline &lt;br /&gt;
float shz_pl_inner_loop(const float *d, const float *v1, const float *v2) {&lt;br /&gt;
    float fp_scratch[2];&lt;br /&gt;
    uint32_t int_scratch;&lt;br /&gt;
&lt;br /&gt;
    asm volatile(R&amp;quot;(&lt;br /&gt;
        ! Swap to back-bank so we don&amp;#039;t need to clobber any FP regs.&lt;br /&gt;
        frchg&lt;br /&gt;
&lt;br /&gt;
        ! Load first vector into fv0 for first FIPR.&lt;br /&gt;
        xor     %[s], %[s]&lt;br /&gt;
        fmov.s  @%[d]+, fr0&lt;br /&gt;
        add     #64, %[s]&lt;br /&gt;
        fmov.s  @%[d]+, fr1&lt;br /&gt;
        add     #64, %[s]&lt;br /&gt;
        fmov.s  @%[d]+, fr2&lt;br /&gt;
        add     #16, %[r]&lt;br /&gt;
        fmov.s  @%[d]+, fr3&lt;br /&gt;
&lt;br /&gt;
        ! Load second vector into fv4 for first FIPR&lt;br /&gt;
        fmov.s  @%[v1], fr4&lt;br /&gt;
        add     %[s], %[v1]&lt;br /&gt;
        fmov.s  @%[v2], fr5&lt;br /&gt;
        add     %[s], %[v2]&lt;br /&gt;
        fmov.s  @%[v1], fr6&lt;br /&gt;
        add     %[s], %[v1]&lt;br /&gt;
        fmov.s  @%[v2], fr7&lt;br /&gt;
        add     %[s], %[v2]&lt;br /&gt;
&lt;br /&gt;
        ! Issue first FIPR&lt;br /&gt;
        fipr    fv0, fv4&lt;br /&gt;
        ! DO NOT SAVE THE RESULT YET&lt;br /&gt;
&lt;br /&gt;
        ! Load first vector into fv8 for second FIPR.&lt;br /&gt;
        fmov.s  @%[d]+, fr8&lt;br /&gt;
        fmov.s  @%[d]+, fr9&lt;br /&gt;
        fmov.s  @%[d]+, fr10&lt;br /&gt;
        fmov.s  @%[d]+, fr11&lt;br /&gt;
&lt;br /&gt;
        ! Load second vector into fv12 for second FIPR.&lt;br /&gt;
        fmov.s  @%[v1], fr12&lt;br /&gt;
        add     %[s], %[v1]&lt;br /&gt;
        fmov.s  @%[v2], fr13&lt;br /&gt;
        add     %[s], %[v2]&lt;br /&gt;
        fmov.s  @%[v1], fr14&lt;br /&gt;
        add     %[s], %[v1]&lt;br /&gt;
        fmov.s  @%[v2], fr15&lt;br /&gt;
        add     %[s], %[v2]&lt;br /&gt;
&lt;br /&gt;
        ! Issue second FIPR&lt;br /&gt;
        fipr    fv8, fv12&lt;br /&gt;
        ! Store result from FIRST FIPR now that it&amp;#039;s ready&lt;br /&gt;
        fmov.s  fr7, @-%[r]&lt;br /&gt;
&lt;br /&gt;
        ! Load first vector into fv0 for third FIPR&lt;br /&gt;
        fmov.s  @%[d]+, fr0&lt;br /&gt;
        fmov.s  @%[d]+, fr1&lt;br /&gt;
        fmov.s  @%[d]+, fr2&lt;br /&gt;
        fmov.s  @%[d]+, fr3&lt;br /&gt;
&lt;br /&gt;
        ! Load second vector into fv4 for third FIPR&lt;br /&gt;
        fmov.s  @%[v1], fr4&lt;br /&gt;
        add     %[s], %[v1]&lt;br /&gt;
        fmov.s  @%[v2], fr5&lt;br /&gt;
        add     %[s], %[v2]&lt;br /&gt;
        fmov.s  @%[v1], fr6&lt;br /&gt;
        add     %[s], %[v1]&lt;br /&gt;
        fmov.s  @%[v2], fr7&lt;br /&gt;
        add     %[s], %[v2]&lt;br /&gt;
        &lt;br /&gt;
        ! Issue third FIPR&lt;br /&gt;
        fipr    fv0, fv4&lt;br /&gt;
        ! Store result from SECOND FIPR now that it&amp;#039;s ready.&lt;br /&gt;
        fmov.s  fr15, @-%[r]&lt;br /&gt;
&lt;br /&gt;
        ! Load first vector into fv8 for fourth FIPR&lt;br /&gt;
        fmov.s  @%[d]+, fr8&lt;br /&gt;
        fmov.s  @%[d]+, fr9&lt;br /&gt;
        fmov.s  @%[d]+, fr10&lt;br /&gt;
        fmov.s  @%[d]+, fr11&lt;br /&gt;
&lt;br /&gt;
        ! Load second vector into fv12 for fourth FIPR&lt;br /&gt;
        fmov.s  @%[v1], fr12&lt;br /&gt;
        add     %[s], %[v1]&lt;br /&gt;
        fmov.s  @%[v2], fr13&lt;br /&gt;
        add     %[s], %[v2]&lt;br /&gt;
        fmov.s  @%[v1], fr14&lt;br /&gt;
        fmov.s  @%[v2], fr15&lt;br /&gt;
&lt;br /&gt;
        ! Issue fourth FIPR&lt;br /&gt;
        fipr    fv8, fv12&lt;br /&gt;
&lt;br /&gt;
        ! Add up results from previous FIPRs while we wait&lt;br /&gt;
        fmov.s  @%[r]+, fr0&lt;br /&gt;
        fmov.s  @%[r]+, fr1&lt;br /&gt;
        fadd    fr1, fr0&lt;br /&gt;
        fadd    fr7, fr0&lt;br /&gt;
        add     #-8, %[r]&lt;br /&gt;
&lt;br /&gt;
        ! Add result from fourth FIPR now that it&amp;#039;s ready&lt;br /&gt;
        fadd    fr15, fr0&lt;br /&gt;
&lt;br /&gt;
        ! Store final result&lt;br /&gt;
        fmov.s  fr0, @%[r]&lt;br /&gt;
&lt;br /&gt;
        ! Swap back to primary FP register bank&lt;br /&gt;
        frchg&lt;br /&gt;
    )&amp;quot;&lt;br /&gt;
    : [d] &amp;quot;+&amp;amp;r&amp;quot; (d), [v1] &amp;quot;+r&amp;quot; (v1), [v2] &amp;quot;+r&amp;quot; (v2),&lt;br /&gt;
      [r] &amp;quot;+r&amp;quot; (fp_scratch), [s] &amp;quot;=&amp;amp;r&amp;quot; (int_scratch),&lt;br /&gt;
      &amp;quot;=m&amp;quot; (*fp_scratch));&lt;br /&gt;
&lt;br /&gt;
    return fp_scratch[0];&lt;br /&gt;
}&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;/div&gt;</summary>
		<author><name>GyroVorbis</name></author>
	</entry>
</feed>