Commit 4a466198 authored by Erik Faye-Lund's avatar Erik Faye-Lund
Browse files

a lot of minor fixes and new features

git-svn-id: https://pimpmobile.svn.sourceforge.net/svnroot/pimpmobile/trunk@79 3d5ecaf0-f903-0410-b953-c2c1a4d75763
parent e6c27180
...@@ -4,16 +4,17 @@ ...@@ -4,16 +4,17 @@
#define CYCLES_PR_FRAME 280896 #define CYCLES_PR_FRAME 280896
/* 32 is the maximum amount of channels in fasttracker2. a nice default. */ /* 32 is the maximum amount of channels in fasttracker2. a nice default. */
#define CHANNELS 32 #define CHANNELS 8
/* check the sample-rate calculator at http://www.pineight.com/gba/samplerates/ for more glitch-free samplerates */ /* check the sample-rate calculator at http://www.pineight.com/gba/samplerates/ for more glitch-free samplerates */
#define SAMPLERATE (18157 * 2) /* 0x4000100 = 0xFFFF, 0x4000102 = 0x0083 */
#define SAMPLERATE (18157.16)
/* only 130 bytes big, quite damn pleasing results */ /* only 130 bytes big, quite damn pleasing results */
#define AMIGA_DELTA_LUT_LOG_SIZE 7 #define AMIGA_DELTA_LUT_LOG_SIZE 7
/* derivated settings. don't touch. */ /* derivated settings. don't touch. */
#define SOUND_BUFFER_SIZE (CYCLES_PR_FRAME / ((1 << 24) / SAMPLERATE)) #define SOUND_BUFFER_SIZE (CYCLES_PR_FRAME / ((int)((1 << 24) / SAMPLERATE)))
#define AMIGA_DELTA_LUT_SIZE (1 << AMIGA_DELTA_LUT_LOG_SIZE) #define AMIGA_DELTA_LUT_SIZE (1 << AMIGA_DELTA_LUT_LOG_SIZE)
#define AMIGA_DELTA_LUT_FRAC_BITS (15 - AMIGA_DELTA_LUT_LOG_SIZE) #define AMIGA_DELTA_LUT_FRAC_BITS (15 - AMIGA_DELTA_LUT_LOG_SIZE)
......
...@@ -4,9 +4,9 @@ ...@@ -4,9 +4,9 @@
#include <gba_video.h> #include <gba_video.h>
#if 0 #if 0
#define DEBUG_COLOR(r, g, b) BG_COLORS[0] = RGB5((r), (g), (b)) #define PROFILE_COLOR(r, g, b) BG_COLORS[0] = RGB5((r), (g), (b))
#else #else
#define DEBUG_COLOR(r, g, b) #define PROFILE_COLOR(r, g, b)
#endif #endif
#endif /* DEBUG_H */ #endif /* DEBUG_H */
...@@ -96,6 +96,8 @@ typedef enum ...@@ -96,6 +96,8 @@ typedef enum
typedef enum typedef enum
{ {
EFF_AMIGA_FILTER = 0x0, EFF_AMIGA_FILTER = 0x0,
EFF_FINE_PORTA_UP = 0x1,
EFF_FINE_PORTA_DOWN = 0x2,
EFF_FINE_VOLUME_SLIDE_UP = 0xA, EFF_FINE_VOLUME_SLIDE_UP = 0xA,
EFF_FINE_VOLUME_SLIDE_DOWN = 0xB, EFF_FINE_VOLUME_SLIDE_DOWN = 0xB,
EFF_NOTE_DELAY = 0xD, EFF_NOTE_DELAY = 0xD,
...@@ -243,6 +245,7 @@ typedef struct ...@@ -243,6 +245,7 @@ typedef struct
s32 porta_target; s32 porta_target;
u16 porta_speed; u16 porta_speed;
s8 volume_slide_speed; s8 volume_slide_speed;
u8 note_delay;
s8 volume; s8 volume;
u8 pan; u8 pan;
......
...@@ -12,7 +12,7 @@ typedef signed int s32; ...@@ -12,7 +12,7 @@ typedef signed int s32;
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
const u8 clz_lut[256] = const u8 __pimp_clz_lut[256] =
{ {
0x8, 0x7, 0x6, 0x6, 0x5, 0x5, 0x5, 0x5, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x8, 0x7, 0x6, 0x6, 0x5, 0x5, 0x5, 0x5, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4,
0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3,
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include "config.h" #include "config.h"
extern const unsigned char clz_lut[256]; extern const unsigned char __pimp_clz_lut[256];
static inline unsigned clz(unsigned input) static inline unsigned clz(unsigned input)
{ {
...@@ -16,7 +16,7 @@ static inline unsigned clz(unsigned input) ...@@ -16,7 +16,7 @@ static inline unsigned clz(unsigned input)
else c += 8; else c += 8;
/* a 256 entries lut ain't too bad... */ /* a 256 entries lut ain't too bad... */
return clz_lut[input] + c; return __pimp_clz_lut[input] + c;
} }
static inline unsigned clz16(unsigned input) static inline unsigned clz16(unsigned input)
...@@ -28,12 +28,12 @@ static inline unsigned clz16(unsigned input) ...@@ -28,12 +28,12 @@ static inline unsigned clz16(unsigned input)
else c += 8; else c += 8;
/* a 256 entries lut ain't too bad... */ /* a 256 entries lut ain't too bad... */
return clz_lut[input] + c; return __pimp_clz_lut[input] + c;
} }
static inline unsigned clz8(unsigned input) static inline unsigned clz8(unsigned input)
{ {
return clz_lut[input]; return __pimp_clz_lut[input];
} }
......
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
#include "debug.h" #include "debug.h"
#include <gba_systemcalls.h> #include <gba_systemcalls.h>
#include <gba_video.h>
#include <gba_dma.h> #include <gba_dma.h>
#include <gba_timers.h> #include <gba_timers.h>
...@@ -146,15 +145,18 @@ inline void timing_end() ...@@ -146,15 +145,18 @@ inline void timing_end()
u32 dc_offs = 0; u32 dc_offs = 0;
static inline void mix_channel(channel_t &chan, s32 *target, size_t samples) static inline void mix_channel(channel_t &chan, s32 *target, size_t samples)
{ {
if (chan.volume < 1) return;
dc_offs += chan.volume * 128; dc_offs += chan.volume * 128;
assert(samples > 0); assert(samples > 0);
PROFILE_COLOR(0, 31, 0);
while (samples > 0 && detect_loop_event(chan, samples) == true) while (samples > 0 && detect_loop_event(chan, samples) == true)
{ {
do do
{ {
assert((chan.sample_cursor >> 12) < chan.sample_length); assert((chan.sample_cursor >> 12) < chan.sample_length);
assert(chan.sample_data != 0);
s32 samp = ((u8*)chan.sample_data)[chan.sample_cursor >> 12]; s32 samp = ((u8*)chan.sample_data)[chan.sample_cursor >> 12];
chan.sample_cursor += chan.sample_cursor_delta; chan.sample_cursor += chan.sample_cursor_delta;
...@@ -169,18 +171,21 @@ static inline void mix_channel(channel_t &chan, s32 *target, size_t samples) ...@@ -169,18 +171,21 @@ static inline void mix_channel(channel_t &chan, s32 *target, size_t samples)
if (process_loop_event(chan) == false) if (process_loop_event(chan) == false)
{ {
// the sample has stopped, we need to fill the rest of the buffer with the dc-offset, so it doesn't ruin our unsigned mixing-thing
while (samples--) while (samples--)
{ {
*target++ += chan.volume * 128; *target++ += chan.volume * 128;
} }
// terminate sample
chan.sample_data = 0; chan.sample_data = 0;
PROFILE_COLOR(31, 0, 0);
return; return;
} }
} }
PROFILE_COLOR(31, 0, 31);
assert(chan.sample_data != 0); assert(chan.sample_data != 0);
chan.sample_cursor = mix_samples(target, samples, chan.sample_data, chan.volume, chan.sample_cursor, chan.sample_cursor_delta); chan.sample_cursor = mix_samples(target, samples, chan.sample_data, chan.volume, chan.sample_cursor, chan.sample_cursor_delta);
PROFILE_COLOR(31, 0, 0);
} }
void mixer::reset() void mixer::reset()
...@@ -193,7 +198,7 @@ void mixer::reset() ...@@ -193,7 +198,7 @@ void mixer::reset()
} }
} }
s32 sound_mix_buffer[SOUND_BUFFER_SIZE] IWRAM_DATA ALIGN(4); s32 sound_mix_buffer[SOUND_BUFFER_SIZE] IWRAM_DATA;
void mixer::mix(s8 *target, size_t samples) void mixer::mix(s8 *target, size_t samples)
{ {
...@@ -202,56 +207,14 @@ void mixer::mix(s8 *target, size_t samples) ...@@ -202,56 +207,14 @@ void mixer::mix(s8 *target, size_t samples)
// zero out the sample-buffer // zero out the sample-buffer
u32 zero = 0; u32 zero = 0;
CpuFastSet(&zero, sound_mix_buffer, DMA_SRC_FIXED | (samples)); CpuFastSet(&zero, sound_mix_buffer, DMA_SRC_FIXED | (samples));
dc_offs = 0; dc_offs = 0;
for (u32 c = 0; c < CHANNELS; ++c) for (u32 c = 0; c < CHANNELS; ++c)
{ {
channel_t &chan = (channel_t &)channels[c]; channel_t &chan = (channel_t &)channels[c];
if (0 != chan.sample_data && 0 != chan.sample_cursor_delta) mix_channel(chan, sound_mix_buffer, samples); if (0 != chan.sample_data) mix_channel(chan, sound_mix_buffer, samples);
} }
dc_offs >>= 8; dc_offs >>= 8;
register s32 *src = sound_mix_buffer; clip_samples(target, sound_mix_buffer, samples, dc_offs);
register s8 *dst = target;
register u32 dc_offs_local = dc_offs;
// the compiler is too smart -- we need to prevent it from doing some arm11-optimizations.
register s32 high_clamp = 127 + dc_offs;
register s32 low_clamp = -128 + dc_offs;
// consider optimizing this further
#define ITERATION \
{ \
s32 samp = (*src++) >> 8; \
if (samp > high_clamp) samp = high_clamp; \
if (samp < low_clamp) samp = low_clamp; \
samp -= dc_offs_local; \
*dst++ = samp; \
}
register u32 s = samples >> 4;
switch (samples & 15)
{
do
{
ITERATION;
case 15: ITERATION;
case 14: ITERATION;
case 13: ITERATION;
case 12: ITERATION;
case 11: ITERATION;
case 10: ITERATION;
case 9: ITERATION;
case 8: ITERATION;
case 7: ITERATION;
case 6: ITERATION;
case 5: ITERATION;
case 4: ITERATION;
case 3: ITERATION;
case 2: ITERATION;
case 1: ITERATION;
case 0:;
}
while (s--);
}
#undef ITERATION
} }
...@@ -30,8 +30,9 @@ namespace mixer ...@@ -30,8 +30,9 @@ namespace mixer
void reset(); void reset();
void mix(s8 *target, size_t samples); void mix(s8 *target, size_t samples);
u32 mix_samples(s32 *target, u32 samples, const u8 *sample_data, u32 vol, u32 sample_cursor, s32 sample_cursor_delta);
u32 mix_samples(s32 *target, u32 samples, const u8 *sample_data, u32 vol, u32 sample_cursor, s32 sample_cursor_delta);
void clip_samples(s8 *target, s32 *source, u32 samples, u32 dc_offs);
} }
#endif /* MIXER_H */ #endif /* MIXER_H */
...@@ -6,19 +6,51 @@ ...@@ -6,19 +6,51 @@
#include <gba_base.h> #include <gba_base.h>
#include <gba_video.h> #include <gba_video.h>
int profile_counter = 0; #include <gba_interrupt.h>
/*
IDEA:
avoiding buffer clearing: self modifying code...
for the first channel mixed:
- replace the ldmia with a nop
- set bit #1 in byte #3 of the MLAs to 0 to make them MULs instead
- then for the next chan, set all stuff back
this way we don't have to clear the sample-buffer before mixing
to it, and we've saved some cycles on the first channel.
the advantage over having separate loops is less iwram-usage.
*/
/*
the magic bug: is it caused by interrupting while not having a stack-pointer set up? if so, try to disable interrupts while mixing...
helped on something, but not on all...
*/
static u32 mix_simple(s32 *target, u32 samples, const u8 *sample_data, u32 vol, u32 sample_cursor, s32 sample_cursor_delta) static u32 mix_simple(s32 *target, u32 samples, const u8 *sample_data, u32 vol, u32 sample_cursor, s32 sample_cursor_delta)
{ {
assert(target != 0); assert(target != NULL);
assert(sample_data != 0); assert(sample_data != NULL);
assert((samples & 7) == 0); assert((samples & 7) == 0);
assert(samples != 0); assert(samples != 0);
u32 ime = REG_IME;
REG_IME = 0;
/*
ADD 1S
OR 1S
OR + shift 1S+1I
MUL 1S+mI
MLA 1S+(m+1)I
*/
// iprintf("%d...", samples);
asm( asm(
"\ "\
b .Ldataskip%= \n\ b .Ldataskip%= \n\
.Lstack_store%=: \n\ .Lstack_store%=: \n\
.align 4 \n\ .word 0 \n\
.Ldataskip%=: \n\ .Ldataskip%=: \n\
str sp, .Lstack_store%= \n\ str sp, .Lstack_store%= \n\
.Lloop%=: \n\ .Lloop%=: \n\
...@@ -64,31 +96,36 @@ static u32 mix_simple(s32 *target, u32 samples, const u8 *sample_data, u32 vol, ...@@ -64,31 +96,36 @@ static u32 mix_simple(s32 *target, u32 samples, const u8 *sample_data, u32 vol,
: "=r"(sample_cursor) : "=r"(sample_cursor)
: :
[cursor] "0"(sample_cursor), [cursor] "0"(sample_cursor),
[counter] "r"(samples >> 3), [counter] "r"(samples / 8),
[data] "r"(sample_data), [data] "r"(sample_data),
[target] "r"(target), [target] "r"(target),
[delta] "r"(sample_cursor_delta), [delta] "r"(sample_cursor_delta),
[vol] "r"(vol) [vol] "r"(vol)
: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "sp", "1", "2", "4", "cc" : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "sp", "1", "2", "4", "cc"
); );
REG_IME = ime;
// iprintf("ok\n");
return sample_cursor; return sample_cursor;
} }
/* bugs here sometimes for some strange reason... ?? (magic bug2k?) */
static u32 mix_bresenham(s32 *target, u32 samples, const u8 *sample_data, u32 vol, u32 sample_cursor, s32 sample_cursor_delta) static u32 mix_bresenham(s32 *target, u32 samples, const u8 *sample_data, u32 vol, u32 sample_cursor, s32 sample_cursor_delta)
{ {
const u8 *old_sample_data = sample_data; const u8 *old_sample_data = sample_data;
sample_data += (sample_cursor >> 12); sample_data += (sample_cursor >> 12);
assert(target != 0); assert(target != NULL);
assert(sample_data != 0); assert(sample_data != NULL);
assert((samples & 7) == 0); assert((samples & 7) == 0);
assert(samples != 0); assert(samples != 0);
u32 ime = REG_IME;
REG_IME = 0;
asm( asm(
"\ "\
b .Ldataskip%= \n\ b .Ldataskip%= \n\
.Lstack_store%=: \n\ .Lstack_store%=: \n\
.align 4 \n\
.word \n\ .word \n\
.Ldataskip%=: \n\ .Ldataskip%=: \n\
str sp, .Lstack_store%= \n\ str sp, .Lstack_store%= \n\
...@@ -145,20 +182,24 @@ static u32 mix_bresenham(s32 *target, u32 samples, const u8 *sample_data, u32 vo ...@@ -145,20 +182,24 @@ static u32 mix_bresenham(s32 *target, u32 samples, const u8 *sample_data, u32 vo
: "=r"(sample_cursor), "=r"(sample_data) : "=r"(sample_cursor), "=r"(sample_data)
: :
[cursor] "0"(sample_cursor << 20), [cursor] "0"(sample_cursor << 20),
[counter] "r"(samples >> 3), [counter] "r"(samples / 8),
[data] "1"(sample_data), [data] "1"(sample_data),
[target] "r"(target), [target] "r"(target),
[delta] "r"(sample_cursor_delta << 20), [delta] "r"(sample_cursor_delta << 20),
[vol] "r"(vol) [vol] "r"(vol)
: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "sp", "cc" : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "sp", "cc"
); );
REG_IME = ime;
return ((sample_data - old_sample_data - 1) << 12) + (sample_cursor >> 20); return ((sample_data - old_sample_data - 1) << 12) + (sample_cursor >> 20);
} }
u32 mixer::mix_samples(s32 *target, u32 samples, const u8 *sample_data, u32 vol, u32 sample_cursor, s32 sample_cursor_delta) u32 mixer::mix_samples(s32 *target, u32 samples, const u8 *sample_data, u32 vol, u32 sample_cursor, s32 sample_cursor_delta)
{ {
// PROFILE_COLOR(31, 0, 31);
assert(target != 0); assert(target != 0);
assert(sample_data != 0); assert(sample_data != 0);
// iprintf("-%d-", samples);
/* mix heading 0-7 samples (the innerloops are unrolled 8 times) */ /* mix heading 0-7 samples (the innerloops are unrolled 8 times) */
for (unsigned i = samples & 7; i; --i) for (unsigned i = samples & 7; i; --i)
...@@ -171,17 +212,80 @@ u32 mixer::mix_samples(s32 *target, u32 samples, const u8 *sample_data, u32 vol, ...@@ -171,17 +212,80 @@ u32 mixer::mix_samples(s32 *target, u32 samples, const u8 *sample_data, u32 vol,
if (samples == 0) return sample_cursor; if (samples == 0) return sample_cursor;
// return mix_simple(target, samples, sample_data, vol, sample_cursor, sample_cursor_delta); #if 1
{
// if (samples < 32) samples = 32;
u32 ret = mix_simple(target, samples, sample_data, vol, sample_cursor, sample_cursor_delta);
return ret;
}
#endif
/* decide what innerloop to take */ /* decide what innerloop to take */
if (sample_cursor_delta > 0 && sample_cursor_delta < (1 << 12)) if (sample_cursor_delta > 0 && sample_cursor_delta < (1 << 12))
{ {
u32 ret = mix_bresenham(target, samples, sample_data, vol, sample_cursor, sample_cursor_delta); u32 ret = mix_bresenham(target, samples, sample_data, vol, sample_cursor, sample_cursor_delta);
// PROFILE_COLOR(31, 0, 0);
return ret; return ret;
} }
else else
{ {
u32 ret = mix_simple(target, samples, sample_data, vol, sample_cursor, sample_cursor_delta); u32 ret = mix_simple(target, samples, sample_data, vol, sample_cursor, sample_cursor_delta);
// PROFILE_COLOR(31, 0, 0);
return ret; return ret;
} }
} }
void mixer::clip_samples(s8 *target, s32 *source, u32 samples, u32 dc_offs)
{
assert(target != NULL);
assert(source != NULL);
register s32 *src = source;
register s8 *dst = target;
register u32 dc_offs_local = dc_offs;
// the compiler is too smart -- we need to prevent it from doing some arm11-optimizations.
register s32 high_clamp = 127 + dc_offs;
register s32 low_clamp = -128 + dc_offs;
/* TODO: do this separatly, when _all_ mixing for an entire frame is done. makes profiling a lot easier. */
/* also consider optimizing this further */
#define ITERATION \
{ \
s32 samp = (*src++) >> 8; \
if (samp > high_clamp) samp = high_clamp; \
if (samp < low_clamp) samp = low_clamp; \
samp -= dc_offs_local; \
*dst++ = samp; \
}
PROFILE_COLOR(0, 0, 31);
register u32 s = samples / 16;
switch (samples & 15)
{
do
{
ITERATION;
case 15: ITERATION;
case 14: ITERATION;
case 13: ITERATION;
case 12: ITERATION;
case 11: ITERATION;
case 10: ITERATION;
case 9: ITERATION;
case 8: ITERATION;
case 7: ITERATION;
case 6: ITERATION;
case 5: ITERATION;
case 4: ITERATION;
case 3: ITERATION;
case 2: ITERATION;
case 1: ITERATION;
case 0:;
}
while (s--);
}
#undef ITERATION
PROFILE_COLOR(31, 0, 0);
}
...@@ -19,3 +19,24 @@ u32 mixer::mix_samples(s32 *target, u32 samples, const u8 *sample_data, u32 vol, ...@@ -19,3 +19,24 @@ u32 mixer::mix_samples(s32 *target, u32 samples, const u8 *sample_data, u32 vol,
return sample_cursor; return sample_cursor;
} }
void mixer::clip_samples(s8 *target, s32 *source, u32 samples, u32 dc_offs)
{
s32 high_clamp = 127 + dc_offs;
s32 low_clamp = -128 + dc_offs;
assert(target != NULL);
assert(source != NULL);
for (unsigned i = samples; i; --i)
{
s32 samp = *source++;
samp -= dc_offs << 8;
samp >>= 6;
// s32 samp = (*source++) >> 8;
// samp -= dc_offs;
if (samp > 127) samp = 127;
if (samp < -128) samp = -128;
*target++ = samp;
}
}
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#define PRINT_PATTERNS // #define PRINT_PATTERNS
...@@ -48,7 +48,19 @@ static u32 curr_bpm = 125; ...@@ -48,7 +48,19 @@ static u32 curr_bpm = 125;
static u32 curr_tempo = 5; static u32 curr_tempo = 5;
static u32 curr_tick = 0; static u32 curr_tick = 0;