Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
PoroCYon
oidos
Commits
9131032b
Commit
9131032b
authored
Apr 21, 2017
by
Aske Simon Christensen
Browse files
SSE2 version of additive core
parent
45316802
Changes
2
Hide whitespace changes
Inline
Side-by-side
synth/src/additive.asm
View file @
9131032b
%if __BITS__ == 32
%define NAME
_additive_core
%define NAME
(n) _%+n
%define r(n) e%+n
%define PSIZE 4
%define STACK_OFFSET (4*4 + 4)
%else
%define NAME
additive_core
%define NAME
(n) n
%define r(n) r%+n
%define PSIZE 8
%define STACK_OFFSET (4*8 + 2*16 + 8)
default
rel
%endif
global
NAME
global
NAME
(
supports_avx
)
global
NAME
(
additive_core_sse2
)
global
NAME
(
additive_core_avx
)
section
sec
text
al
ign
=
1
NAME:
section
con
rdata
al
ign
=
16
c_zero:
dq
0.0
,
0.0
,
0.0
,
0.0
c_one:
dq
1.0
,
1.0
,
1.0
,
1.0
section
sup
text
NAME
(
supports_avx
):
push
r
(
bx
)
mov
eax
,
1
cpuid
mov
r
(
ax
),
r
(
cx
)
shr
r
(
ax
),
28
and
r
(
ax
),
1
pop
r
(
bx
)
ret
section
ss
e2
text
NAME
(
additive_core_sse2
):
; Disable denormals
push
r
(
ax
)
stmxcsr
[
r
(
sp
)]
or
dword
[
r
(
sp
)],
0x8040
ldmxcsr
[
r
(
sp
)]
pop
r
(
ax
)
%if __BITS__ == 64
; Save register arguments to stack
mov
[
rsp
+
8
],
rcx
mov
[
rsp
+
16
],
rdx
mov
[
rsp
+
24
],
r8
mov
[
rsp
+
32
],
r9
; Save callee-save registers
sub
rsp
,
2
*
16
movupd
[
rsp
+
0
*
16
],
xmm6
movupd
[
rsp
+
1
*
16
],
xmm7
%endif
push
r
(
bx
)
push
r
(
bp
)
push
r
(
si
)
push
r
(
di
)
; Initialize
xorpd
xmm0
,
xmm0
movsd
xmm6
,
[
r
(
sp
)
+
STACK_OFFSET
+
6
*
PSIZE
+
0
*
8
]
unpcklpd
xmm6
,
xmm6
movsd
xmm7
,
[
r
(
sp
)
+
STACK_OFFSET
+
6
*
PSIZE
+
1
*
8
]
unpcklpd
xmm7
,
xmm7
; Pointers
mov
r
(
ax
),
[
r
(
sp
)
+
STACK_OFFSET
+
0
*
PSIZE
]
; state_re
mov
r
(
dx
),
[
r
(
sp
)
+
STACK_OFFSET
+
1
*
PSIZE
]
; state_im
mov
r
(
bx
),
[
r
(
sp
)
+
STACK_OFFSET
+
2
*
PSIZE
]
; step_re
mov
r
(
bp
),
[
r
(
sp
)
+
STACK_OFFSET
+
3
*
PSIZE
]
; step_im
mov
r
(
si
),
[
r
(
sp
)
+
STACK_OFFSET
+
4
*
PSIZE
]
; filter_low
mov
r
(
di
),
[
r
(
sp
)
+
STACK_OFFSET
+
5
*
PSIZE
]
; filter_high
; Count
mov
r
(
cx
),
[
r
(
sp
)
+
STACK_OFFSET
+
6
*
PSIZE
+
2
*
8
]
.loop:
; Update oscillator
movupd
xmm2
,
[
r
(
ax
)]
movupd
xmm3
,
[
r
(
dx
)]
movapd
xmm4
,
xmm2
movapd
xmm5
,
xmm3
movupd
xmm1
,
[
r
(
bx
)]
mulpd
xmm2
,
xmm1
mulpd
xmm3
,
xmm1
movupd
xmm1
,
[
r
(
bp
)]
mulpd
xmm4
,
xmm1
mulpd
xmm5
,
xmm1
subpd
xmm2
,
xmm5
addpd
xmm3
,
xmm4
movupd
[
r
(
ax
)],
xmm2
movupd
[
r
(
dx
)],
xmm3
; Update filter
movupd
xmm4
,
[
r
(
si
)]
movupd
xmm5
,
[
r
(
di
)]
movapd
xmm3
,
xmm4
minpd
xmm3
,
xmm5
addpd
xmm4
,
xmm6
addpd
xmm5
,
xmm7
movupd
[
r
(
si
)],
xmm4
movupd
[
r
(
di
)],
xmm5
maxpd
xmm3
,
[
c_zero
]
minpd
xmm3
,
[
c_one
]
; Accumulate filtered oscillator
mulpd
xmm2
,
xmm3
addpd
xmm0
,
xmm2
; Advance pointers
add
r
(
ax
),
16
add
r
(
dx
),
16
add
r
(
bx
),
16
add
r
(
bp
),
16
add
r
(
si
),
16
add
r
(
di
),
16
sub
r
(
cx
),
2
ja
.loop
; Final summation
movapd
xmm1
,
xmm0
unpckhpd
xmm1
,
xmm1
addsd
xmm0
,
xmm1
; Restore callee-save registers
pop
r
(
di
)
pop
r
(
si
)
pop
r
(
bp
)
pop
r
(
bx
)
%if __BITS__ == 64
movupd
xmm6
,
[
rsp
+
0
*
16
]
movupd
xmm7
,
[
rsp
+
1
*
16
]
add
rsp
,
2
*
16
%else
; Return result on FP stack
sub
esp
,
8
movsd
[
esp
],
xmm0
fld
qword
[
esp
]
add
esp
,
8
%endif
ret
section
avx
text
NAME
(
additive_core_avx
):
; Disable denormals
push
r
(
ax
)
vstmxcsr
[
r
(
sp
)]
...
...
@@ -41,9 +179,6 @@ NAME:
; Initialize
vxorpd
ymm0
,
ymm0
mov
eax
,
1
vcvtsi2sd
xmm1
,
eax
vbroadcastsd
ymm1
,
xmm1
vbroadcastsd
ymm6
,
[
r
(
sp
)
+
STACK_OFFSET
+
6
*
PSIZE
+
0
*
8
]
vbroadcastsd
ymm7
,
[
r
(
sp
)
+
STACK_OFFSET
+
6
*
PSIZE
+
1
*
8
]
...
...
@@ -57,8 +192,6 @@ NAME:
; Count
mov
r
(
cx
),
[
r
(
sp
)
+
STACK_OFFSET
+
6
*
PSIZE
+
2
*
8
]
add
r
(
cx
),
3
shr
r
(
cx
),
2
.loop:
; Update oscillator
...
...
@@ -81,9 +214,8 @@ NAME:
vaddpd
ymm5
,
ymm5
,
ymm7
vmovupd
[
r
(
si
)],
ymm4
vmovupd
[
r
(
di
)],
ymm5
vxorpd
ymm4
,
ymm4
vminpd
ymm3
,
ymm3
,
ymm1
vmaxpd
ymm3
,
ymm3
,
ymm4
vmaxpd
ymm3
,
ymm3
,
[
c_zero
]
vminpd
ymm3
,
ymm3
,
[
c_one
]
; Accumulate filtered oscillator
vmulpd
ymm2
,
ymm2
,
ymm3
...
...
@@ -97,7 +229,8 @@ NAME:
add
r
(
si
),
32
add
r
(
di
),
32
loop
.loop
sub
r
(
cx
),
4
ja
.loop
; Final summation
vextractf128
xmm1
,
ymm0
,
1
...
...
synth/src/oidos_generate.rs
View file @
9131032b
...
...
@@ -321,7 +321,9 @@ pub struct OidosSoundGenerator {
f_add_low
:
f64
,
f_add_high
:
f64
,
gain
:
f64
gain
:
f64
,
avx_support
:
bool
}
impl
SoundGenerator
for
OidosSoundGenerator
{
...
...
@@ -345,7 +347,9 @@ impl SoundGenerator for OidosSoundGenerator {
f_add_low
:
(
-
param
.f_sweeplow
*
param
.f_slopelow
)
as
f64
,
f_add_high
:
(
param
.f_sweephigh
*
param
.f_slopehigh
)
as
f64
,
gain
:
param
.gain
as
f64
gain
:
param
.gain
as
f64
,
avx_support
:
unsafe
{
supports_avx
()
}
};
let
f_lowlimit
=
param
.f_low
as
f64
+
tone
as
f64
;
...
...
@@ -403,16 +407,26 @@ impl SoundGenerator for OidosSoundGenerator {
fn
produce_sample
(
&
mut
self
)
->
f32
{
let
s
=
unsafe
{
additive_core
(
self
.state_re
.as_mut_ptr
(),
self
.state_im
.as_mut_ptr
(),
self
.step_re
.as_ptr
(),
self
.step_im
.as_ptr
(),
self
.filter_low
.as_mut_ptr
(),
self
.filter_high
.as_mut_ptr
(),
self
.f_add_low
,
self
.f_add_high
,
self
.n_partials
)
if
self
.avx_support
{
additive_core_avx
(
self
.state_re
.as_mut_ptr
(),
self
.state_im
.as_mut_ptr
(),
self
.step_re
.as_ptr
(),
self
.step_im
.as_ptr
(),
self
.filter_low
.as_mut_ptr
(),
self
.filter_high
.as_mut_ptr
(),
self
.f_add_low
,
self
.f_add_high
,
self
.n_partials
)
}
else
{
additive_core_sse2
(
self
.state_re
.as_mut_ptr
(),
self
.state_im
.as_mut_ptr
(),
self
.step_re
.as_ptr
(),
self
.step_im
.as_ptr
(),
self
.filter_low
.as_mut_ptr
(),
self
.filter_high
.as_mut_ptr
(),
self
.f_add_low
,
self
.f_add_high
,
self
.n_partials
)
}
};
(
s
*
(
self
.gain
/
(
self
.n_partials
as
f64
+
(
self
.gain
-
1.0
)
*
s
*
s
))
.sqrt
())
as
f32
}
}
extern
"cdecl"
{
fn
additive_core
(
state_re
:
*
mut
f64
,
state_im
:
*
mut
f64
,
step_re
:
*
const
f64
,
step_im
:
*
const
f64
,
filter_low
:
*
mut
f64
,
filter_high
:
*
mut
f64
,
f_add_low
:
f64
,
f_add_high
:
f64
,
n
:
usize
)
->
f64
;
fn
supports_avx
()
->
bool
;
fn
additive_core_sse2
(
state_re
:
*
mut
f64
,
state_im
:
*
mut
f64
,
step_re
:
*
const
f64
,
step_im
:
*
const
f64
,
filter_low
:
*
mut
f64
,
filter_high
:
*
mut
f64
,
f_add_low
:
f64
,
f_add_high
:
f64
,
n
:
usize
)
->
f64
;
fn
additive_core_avx
(
state_re
:
*
mut
f64
,
state_im
:
*
mut
f64
,
step_re
:
*
const
f64
,
step_im
:
*
const
f64
,
filter_low
:
*
mut
f64
,
filter_high
:
*
mut
f64
,
f_add_low
:
f64
,
f_add_high
:
f64
,
n
:
usize
)
->
f64
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment