Commit 4a466198 authored by Erik Faye-Lund's avatar Erik Faye-Lund
Browse files

a lot of minor fixes and new features

git-svn-id: https://pimpmobile.svn.sourceforge.net/svnroot/pimpmobile/trunk@79 3d5ecaf0-f903-0410-b953-c2c1a4d75763
parent e6c27180
......@@ -4,16 +4,17 @@
#define CYCLES_PR_FRAME 280896
/* 32 is the maximum amount of channels in fasttracker2. a nice default. */
#define CHANNELS 32
#define CHANNELS 8
/* check the sample-rate calculator at http://www.pineight.com/gba/samplerates/ for more glitch-free samplerates */
#define SAMPLERATE (18157 * 2)
/* 0x4000100 = 0xFFFF, 0x4000102 = 0x0083 */
#define SAMPLERATE (18157.16)
/* only 130 bytes big, quite damn pleasing results */
#define AMIGA_DELTA_LUT_LOG_SIZE 7
/* derivated settings. don't touch. */
#define SOUND_BUFFER_SIZE (CYCLES_PR_FRAME / ((1 << 24) / SAMPLERATE))
#define SOUND_BUFFER_SIZE (CYCLES_PR_FRAME / ((int)((1 << 24) / SAMPLERATE)))
#define AMIGA_DELTA_LUT_SIZE (1 << AMIGA_DELTA_LUT_LOG_SIZE)
#define AMIGA_DELTA_LUT_FRAC_BITS (15 - AMIGA_DELTA_LUT_LOG_SIZE)
......
......@@ -4,9 +4,9 @@
#include <gba_video.h>
#if 0
#define DEBUG_COLOR(r, g, b) BG_COLORS[0] = RGB5((r), (g), (b))
#define PROFILE_COLOR(r, g, b) BG_COLORS[0] = RGB5((r), (g), (b))
#else
#define DEBUG_COLOR(r, g, b)
#define PROFILE_COLOR(r, g, b)
#endif
#endif /* DEBUG_H */
......@@ -96,6 +96,8 @@ typedef enum
typedef enum
{
EFF_AMIGA_FILTER = 0x0,
EFF_FINE_PORTA_UP = 0x1,
EFF_FINE_PORTA_DOWN = 0x2,
EFF_FINE_VOLUME_SLIDE_UP = 0xA,
EFF_FINE_VOLUME_SLIDE_DOWN = 0xB,
EFF_NOTE_DELAY = 0xD,
......@@ -243,6 +245,7 @@ typedef struct
s32 porta_target;
u16 porta_speed;
s8 volume_slide_speed;
u8 note_delay;
s8 volume;
u8 pan;
......
......@@ -12,7 +12,7 @@ typedef signed int s32;
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
const u8 clz_lut[256] =
const u8 __pimp_clz_lut[256] =
{
0x8, 0x7, 0x6, 0x6, 0x5, 0x5, 0x5, 0x5, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4,
0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3,
......
......@@ -3,7 +3,7 @@
#include "config.h"
extern const unsigned char clz_lut[256];
extern const unsigned char __pimp_clz_lut[256];
static inline unsigned clz(unsigned input)
{
......@@ -16,7 +16,7 @@ static inline unsigned clz(unsigned input)
else c += 8;
/* a 256 entries lut ain't too bad... */
return clz_lut[input] + c;
return __pimp_clz_lut[input] + c;
}
static inline unsigned clz16(unsigned input)
......@@ -28,12 +28,12 @@ static inline unsigned clz16(unsigned input)
else c += 8;
/* a 256 entries lut ain't too bad... */
return clz_lut[input] + c;
return __pimp_clz_lut[input] + c;
}
static inline unsigned clz8(unsigned input)
{
return clz_lut[input];
return __pimp_clz_lut[input];
}
......
......@@ -4,7 +4,6 @@
#include "debug.h"
#include <gba_systemcalls.h>
#include <gba_video.h>
#include <gba_dma.h>
#include <gba_timers.h>
......@@ -146,15 +145,18 @@ inline void timing_end()
u32 dc_offs = 0;
static inline void mix_channel(channel_t &chan, s32 *target, size_t samples)
{
if (chan.volume < 1) return;
dc_offs += chan.volume * 128;
assert(samples > 0);
PROFILE_COLOR(0, 31, 0);
while (samples > 0 && detect_loop_event(chan, samples) == true)
{
do
{
assert((chan.sample_cursor >> 12) < chan.sample_length);
assert(chan.sample_data != 0);
s32 samp = ((u8*)chan.sample_data)[chan.sample_cursor >> 12];
chan.sample_cursor += chan.sample_cursor_delta;
......@@ -169,18 +171,21 @@ static inline void mix_channel(channel_t &chan, s32 *target, size_t samples)
if (process_loop_event(chan) == false)
{
// the sample has stopped, we need to fill the rest of the buffer with the dc-offset, so it doesn't ruin our unsigned mixing-thing
while (samples--)
{
*target++ += chan.volume * 128;
}
// terminate sample
chan.sample_data = 0;
PROFILE_COLOR(31, 0, 0);
return;
}
}
PROFILE_COLOR(31, 0, 31);
assert(chan.sample_data != 0);
chan.sample_cursor = mix_samples(target, samples, chan.sample_data, chan.volume, chan.sample_cursor, chan.sample_cursor_delta);
PROFILE_COLOR(31, 0, 0);
}
void mixer::reset()
......@@ -193,7 +198,7 @@ void mixer::reset()
}
}
s32 sound_mix_buffer[SOUND_BUFFER_SIZE] IWRAM_DATA ALIGN(4);
s32 sound_mix_buffer[SOUND_BUFFER_SIZE] IWRAM_DATA;
void mixer::mix(s8 *target, size_t samples)
{
......@@ -207,51 +212,9 @@ void mixer::mix(s8 *target, size_t samples)
for (u32 c = 0; c < CHANNELS; ++c)
{
channel_t &chan = (channel_t &)channels[c];
if (0 != chan.sample_data && 0 != chan.sample_cursor_delta) mix_channel(chan, sound_mix_buffer, samples);
if (0 != chan.sample_data) mix_channel(chan, sound_mix_buffer, samples);
}
dc_offs >>= 8;
register s32 *src = sound_mix_buffer;
register s8 *dst = target;
register u32 dc_offs_local = dc_offs;
// the compiler is too smart -- we need to prevent it from doing some arm11-optimizations.
register s32 high_clamp = 127 + dc_offs;
register s32 low_clamp = -128 + dc_offs;
// consider optimizing this further
#define ITERATION \
{ \
s32 samp = (*src++) >> 8; \
if (samp > high_clamp) samp = high_clamp; \
if (samp < low_clamp) samp = low_clamp; \
samp -= dc_offs_local; \
*dst++ = samp; \
}
register u32 s = samples >> 4;
switch (samples & 15)
{
do
{
ITERATION;
case 15: ITERATION;
case 14: ITERATION;
case 13: ITERATION;
case 12: ITERATION;
case 11: ITERATION;
case 10: ITERATION;
case 9: ITERATION;
case 8: ITERATION;
case 7: ITERATION;
case 6: ITERATION;
case 5: ITERATION;
case 4: ITERATION;
case 3: ITERATION;
case 2: ITERATION;
case 1: ITERATION;
case 0:;
}
while (s--);
}
#undef ITERATION
clip_samples(target, sound_mix_buffer, samples, dc_offs);
}
......@@ -30,8 +30,9 @@ namespace mixer
void reset();
void mix(s8 *target, size_t samples);
u32 mix_samples(s32 *target, u32 samples, const u8 *sample_data, u32 vol, u32 sample_cursor, s32 sample_cursor_delta);
u32 mix_samples(s32 *target, u32 samples, const u8 *sample_data, u32 vol, u32 sample_cursor, s32 sample_cursor_delta);
void clip_samples(s8 *target, s32 *source, u32 samples, u32 dc_offs);
}
#endif /* MIXER_H */
......@@ -6,19 +6,51 @@
#include <gba_base.h>
#include <gba_video.h>
int profile_counter = 0;
#include <gba_interrupt.h>
/*
IDEA:
avoiding buffer clearing: self modifying code...
for the first channel mixed:
- replace the ldmia with a nop
- set bit #1 in byte #3 of the MLAs to 0 to make them MULs instead
- then for the next chan, set all stuff back
this way we don't have to clear the sample-buffer before mixing
to it, and we've saved some cycles on the first channel.
the advantage over having separate loops is less iwram-usage.
*/
/*
the magic bug: is it caused by interrupting while not having a stack-pointer set up? if so, try to disable interrupts while mixing...
helped on something, but not on all...
*/
static u32 mix_simple(s32 *target, u32 samples, const u8 *sample_data, u32 vol, u32 sample_cursor, s32 sample_cursor_delta)
{
assert(target != 0);
assert(sample_data != 0);
assert(target != NULL);
assert(sample_data != NULL);
assert((samples & 7) == 0);
assert(samples != 0);
u32 ime = REG_IME;
REG_IME = 0;
/*
ADD 1S
OR 1S
OR + shift 1S+1I
MUL 1S+mI
MLA 1S+(m+1)I
*/
// iprintf("%d...", samples);
asm(
"\
b .Ldataskip%= \n\
.Lstack_store%=: \n\
.align 4 \n\
.word 0 \n\
.Ldataskip%=: \n\
str sp, .Lstack_store%= \n\
.Lloop%=: \n\
......@@ -64,31 +96,36 @@ static u32 mix_simple(s32 *target, u32 samples, const u8 *sample_data, u32 vol,
: "=r"(sample_cursor)
:
[cursor] "0"(sample_cursor),
[counter] "r"(samples >> 3),
[counter] "r"(samples / 8),
[data] "r"(sample_data),
[target] "r"(target),
[delta] "r"(sample_cursor_delta),
[vol] "r"(vol)
: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "sp", "1", "2", "4", "cc"
);
REG_IME = ime;
// iprintf("ok\n");
return sample_cursor;
}
/* bugs here sometimes for some strange reason... ?? (magic bug2k?) */
static u32 mix_bresenham(s32 *target, u32 samples, const u8 *sample_data, u32 vol, u32 sample_cursor, s32 sample_cursor_delta)
{
const u8 *old_sample_data = sample_data;
sample_data += (sample_cursor >> 12);
assert(target != 0);
assert(sample_data != 0);
assert(target != NULL);
assert(sample_data != NULL);
assert((samples & 7) == 0);
assert(samples != 0);
u32 ime = REG_IME;
REG_IME = 0;
asm(
"\
b .Ldataskip%= \n\
.Lstack_store%=: \n\
.align 4 \n\
.word \n\
.Ldataskip%=: \n\
str sp, .Lstack_store%= \n\
......@@ -145,21 +182,25 @@ static u32 mix_bresenham(s32 *target, u32 samples, const u8 *sample_data, u32 vo
: "=r"(sample_cursor), "=r"(sample_data)
:
[cursor] "0"(sample_cursor << 20),
[counter] "r"(samples >> 3),
[counter] "r"(samples / 8),
[data] "1"(sample_data),
[target] "r"(target),
[delta] "r"(sample_cursor_delta << 20),
[vol] "r"(vol)
: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "sp", "cc"
);
REG_IME = ime;
return ((sample_data - old_sample_data - 1) << 12) + (sample_cursor >> 20);
}
u32 mixer::mix_samples(s32 *target, u32 samples, const u8 *sample_data, u32 vol, u32 sample_cursor, s32 sample_cursor_delta)
{
// PROFILE_COLOR(31, 0, 31);
assert(target != 0);
assert(sample_data != 0);
// iprintf("-%d-", samples);
/* mix heading 0-7 samples (the innerloops are unrolled 8 times) */
for (unsigned i = samples & 7; i; --i)
{
......@@ -171,17 +212,80 @@ u32 mixer::mix_samples(s32 *target, u32 samples, const u8 *sample_data, u32 vol,
if (samples == 0) return sample_cursor;
// return mix_simple(target, samples, sample_data, vol, sample_cursor, sample_cursor_delta);
#if 1
{
// if (samples < 32) samples = 32;
u32 ret = mix_simple(target, samples, sample_data, vol, sample_cursor, sample_cursor_delta);
return ret;
}
#endif
/* decide what innerloop to take */
if (sample_cursor_delta > 0 && sample_cursor_delta < (1 << 12))
{
u32 ret = mix_bresenham(target, samples, sample_data, vol, sample_cursor, sample_cursor_delta);
// PROFILE_COLOR(31, 0, 0);
return ret;
}
else
{
u32 ret = mix_simple(target, samples, sample_data, vol, sample_cursor, sample_cursor_delta);
// PROFILE_COLOR(31, 0, 0);
return ret;
}
}
void mixer::clip_samples(s8 *target, s32 *source, u32 samples, u32 dc_offs)
{
assert(target != NULL);
assert(source != NULL);
register s32 *src = source;
register s8 *dst = target;
register u32 dc_offs_local = dc_offs;
// the compiler is too smart -- we need to prevent it from doing some arm11-optimizations.
register s32 high_clamp = 127 + dc_offs;
register s32 low_clamp = -128 + dc_offs;
/* TODO: do this separatly, when _all_ mixing for an entire frame is done. makes profiling a lot easier. */
/* also consider optimizing this further */
#define ITERATION \
{ \
s32 samp = (*src++) >> 8; \
if (samp > high_clamp) samp = high_clamp; \
if (samp < low_clamp) samp = low_clamp; \
samp -= dc_offs_local; \
*dst++ = samp; \
}
PROFILE_COLOR(0, 0, 31);
register u32 s = samples / 16;
switch (samples & 15)
{
do
{
ITERATION;
case 15: ITERATION;
case 14: ITERATION;
case 13: ITERATION;
case 12: ITERATION;
case 11: ITERATION;
case 10: ITERATION;
case 9: ITERATION;
case 8: ITERATION;
case 7: ITERATION;
case 6: ITERATION;
case 5: ITERATION;
case 4: ITERATION;
case 3: ITERATION;
case 2: ITERATION;
case 1: ITERATION;
case 0:;
}
while (s--);
}
#undef ITERATION
PROFILE_COLOR(31, 0, 0);
}
......@@ -19,3 +19,24 @@ u32 mixer::mix_samples(s32 *target, u32 samples, const u8 *sample_data, u32 vol,
return sample_cursor;
}
void mixer::clip_samples(s8 *target, s32 *source, u32 samples, u32 dc_offs)
{
s32 high_clamp = 127 + dc_offs;
s32 low_clamp = -128 + dc_offs;
assert(target != NULL);
assert(source != NULL);
for (unsigned i = samples; i; --i)
{
s32 samp = *source++;
samp -= dc_offs << 8;
samp >>= 6;
// s32 samp = (*source++) >> 8;
// samp -= dc_offs;
if (samp > 127) samp = 127;
if (samp < -128) samp = -128;
*target++ = samp;
}
}
......@@ -31,7 +31,7 @@
#define PRINT_PATTERNS
// #define PRINT_PATTERNS
......@@ -48,7 +48,19 @@ static u32 curr_bpm = 125;
static u32 curr_tempo = 5;
static u32 curr_tick = 0;
static s32 global_volume = 2 << 8; /* 24.8 fixed point */
int pimp_get_row()
{
return curr_row;
}
int pimp_get_order()
{
return curr_order;
}
static s32 global_volume = 1 << 10; /* 24.8 fixed point */
static pimp_pattern_t *curr_pattern = 0;
......@@ -60,7 +72,7 @@ static void set_bpm(int bpm)
{
assert(bpm > 0);
/* the shift is because we're using 8 fractional-bits for the tick-length */
tick_len = ((SAMPLERATE * 5) << 8) / (bpm * 2);
tick_len = int((SAMPLERATE * 5) * 256) / (bpm * 2);
}
static int get_order(const pimp_module_t *mod, int i)
......@@ -194,7 +206,7 @@ extern "C" void pimp_init(const void *module, const void *sample_bank)
REG_SOUNDCNT_X = SOUND_ENABLE;
/* setup timer-shit */
REG_TM0CNT_L = (1 << 16) - ((1 << 24) / SAMPLERATE);
REG_TM0CNT_L = (1 << 16) - int((1 << 24) / SAMPLERATE);
REG_TM0CNT_H = TIMER_START;
}
......@@ -245,7 +257,7 @@ void update_row()
volume_dirty = true;
}
if (chan.instrument != 0 && note->note > 0 && chan.effect != EFF_PORTA_NOTE)
if (chan.instrument != 0 && note->note > 0 && chan.effect != EFF_PORTA_NOTE && !(chan.effect == EFF_MULTI_FX && chan.effect_param == EFF_NOTE_DELAY))
{
chan.sample = get_sample(mod, chan.instrument, chan.instrument->sample_map[note->note]);
mc.sample_cursor = 0;
......@@ -273,12 +285,28 @@ void update_row()
volume_dirty = true;
}
/* todo: switch here instead */
if (note->volume_command >= 0x10 && note->volume_command < 0x50)
switch (note->volume_command >> 4)
{
case 0x0: break;
case 0x1:
case 0x2:
case 0x3:
case 0x4:
case 0x5:
if (note->volume_command > 0x50)
{
/* something else */
}
else
{
chan.volume = note->volume_command - 0x10;
volume_dirty = true;
}
break;
default:
iprintf("unsupported volume-command %02X\n", chan.effect_param);
}
switch (chan.effect)
{
......@@ -316,7 +344,14 @@ void update_row()
case EFF_SAMPLE_OFFSET:
if (note->note > 0)
{
mixer::channels[c].sample_cursor = (chan.effect_param * 256) << 12;
mc.sample_cursor = (chan.effect_param * 256) << 12;
/*
if (mc.sample_cursor > mc.sample_length)
{
if (mod->flags & FLAG_SAMPLE_OFFSET_CLAMP) mc.sample_cursor = 0; //mc.sample_length;
// else mc.sample_data = NULL; // kill sample
}
*/
}
break;
......@@ -341,6 +376,18 @@ void update_row()
{
case EFF_AMIGA_FILTER: break;
case EFF_FINE_PORTA_UP:
chan.final_period -= chan.effect_param & 0xF;
if (chan.final_period < mod->period_low_clamp) chan.final_period = mod->period_low_clamp;
period_dirty = true;
break;
case EFF_FINE_PORTA_DOWN:
chan.final_period += chan.effect_param & 0xF;
if (chan.final_period > mod->period_high_clamp) chan.final_period = mod->period_high_clamp;
period_dirty = true;
break;
case EFF_FINE_VOLUME_SLIDE_UP:
chan.volume += chan.effect_param & 0xF;
if (chan.volume > 64) chan.volume = 64;
......@@ -352,13 +399,14 @@ void update_row()
if (chan.volume < 0) chan.volume = 0;
volume_dirty = true;
break;
/*
case EFF_NOTE_DELAY:
chan.note_delay = chan.effect_param & 0xF;
chan.note = note->note;
break;
*/
// default:
// iprintf("eek E%X\n", chan.effect_param >> 4);
default:
iprintf("unsupported effect E%X\n", chan.effect_param >> 4);
}
break;
......@@ -387,9 +435,9 @@ void update_row()
case EFF_SET_BPM: break;
*/
// default:
// iprintf("eek %02X!\n", chan.effect);
// assert(0);
default:
iprintf("unsupported effect %02X\n", chan.effect);
assert(0);
}
if (period_dirty)
......@@ -450,7 +498,6 @@ static void update_tick()
case EFF_PORTA_UP:
chan.final_period -= chan.porta_speed;
if (chan.final_period > mod->period_high_clamp) chan.final_period = mod->period_high_clamp;
if (chan.final_period < mod->period_low_clamp) chan.final_period = mod->period_low_clamp;
period_dirty = true;
break;
......@@ -458,7 +505,6 @@ static void update_tick()
case EFF_PORTA_DOWN:
chan.final_period += chan.porta_speed;
if (chan.final_period > mod->period_high_clamp) chan.final_period = mod->period_high_clamp;
if (chan.final_period < mod->period_low_clamp) chan.final_period = mod->period_low_clamp;
period_dirty = true;
break;
......@@ -502,6 +548,35 @@ static void update_tick()
case EFF_FINE_VOLUME_SLIDE_DOWN:
break; /* fine volume slide is only done on tick0 */
case EFF_NOTE_DELAY:
// note on
if (--chan.note_delay == 0)
{
// TODO: replace with a note_on-function
if (chan.instrument != 0)
{
chan.sample = get_sample(mod, chan.instrument, chan.instrument->sample_map[chan.note]);
mc.sample_cursor = 0;
mc.sample_data = pimp_sample_bank + chan.sample->data_ptr;
mc.sample_length = chan.sample->length;
mc.loop_type = (mixer::loop_type_t)chan.sample->loop_type;
mc.loop_start = chan.sample->loop_start;
mc.loop_end = chan.sample->loop_start + chan.sample->loop_length;