/*
 * (c) Copyright 1999, 2000 -- Anders Torger
 *
 * This software is free. You can redistribute it and/or modify it under the
 * terms of the GNU General Public License as published by the Free Software
 * Foundation.
 *
 */
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <unistd.h>
#include <errno.h>
#include <math.h>
#include <time.h>
#include <signal.h>

#include "sample.h"
#include "filter.h"
#include "defs.h"
#include "swap.h"
#include "nwfiir.h"
#include "fdrw.h"
#include "timestamp.h"

/* struct to store filter statistics */
struct filter_stats {
    int i_o_max;
    int i_o_min;
    int f_o_max[MAX_CHANNELS];
    int f_o_min[MAX_CHANNELS];
    int f_i_max[MAX_CHANNELS];
    int f_i_min[MAX_CHANNELS];
};

/* structure mapped to shared memory to communicate between the processes */
struct filter_comarea {
    bool_t active;     /* true if filter processing is active */
    int last_size;     /* size of the last fragment */
    int last_pos;      /* index of the last fragment */
    int input_pos;
    int output_pos;
    int filter_pos[MAX_CHANNELS];
    struct filter_stats stats;
    /* Semaphores for process synchronisation. Implemented as pipes, since
       they are simple and fast enough, and System V semaphores suck. */
    int sem_input[2];
    int sem_output[MAX_CHANNELS][2];
    int sem_filter[MAX_CHANNELS][2];
};

struct filter_t_ {
    /* input loop parameters */
    int (*fillbuf)(void *);
    void *inbuffer;
    int infragsize;
    int n_frags;
    int in_sbytes;
    int out_sbytes;
    
    int n_channels;  /* the number of channels, which is equal to the number of
		      * processes working on the input samples */
    pid_t pids[MAX_CHANNELS + 2]; /* pids of the filter processes */
    struct filter_comarea *comarea;
};

/*
static void
reset_stats(struct filter_stats *filter_stats,
	    int n_channels,
	    int n_frags)
{
    int n;
    
    bzero(filter_stats, sizeof(struct filter_stats));
    filter_stats->i_o_min = n_frags;
    for (n = 0; n < n_channels; n++) {
	filter_stats->f_o_min[n] = n_frags;
	filter_stats->f_i_min[n] = n_frags;
    }
}

static void
print_stats(struct filter_stats *filter_stats,
	    int n_channels)
{
    struct filter_stats s;
    int n;
    
    memcpy(&s, filter_stats, sizeof(struct filter_stats));

    fprintf(stderr, "input - output max distance: %u\n", s.i_o_max);
    fprintf(stderr, "input - output min distance: %u\n", s.i_o_min);
    for (n = 0; n < n_channels; n++) {
        fprintf(stderr, "filter[%d] - output max distance: %u\n",
		n, s.f_o_max[n]);
	fprintf(stderr, "filter[%d] - output min distance: %u\n",
		n, s.f_o_min[n]);
	fprintf(stderr, "filter[%d] - input max distance: %u\n",
		n, s.f_i_max[n]);
	fprintf(stderr, "filter[%d] - input min distance: %u\n",
		n, s.f_i_max[n]);
    }
}
*/

static inline void
sleep_forever(void)
{
    while (true) sleep(100000);    
}

static inline int32_t
bit_reduce(int64_t insample,
	   int bits,
	   int32_t gaindiv,
	   bool_t shiftdiv,
	   int channel)
{
    static int32_t sample;
    static int overflow = 0;
    static bool_t print_overflow = false;
    static time_t last_print = 0;
    
    /* Should contain a good dither algorithm sometime in the future (?) */
    if (shiftdiv) {
	insample >>= gaindiv;
    } else {
	insample /= gaindiv;
    }
    sample = (int32_t)insample;
    if (insample < 0) {
	/* should be -2147483648 to be exact, but the compiler complains then */
	if (insample < -2147483647) { 
	    overflow++;
	    print_overflow = true;
	    sample = -2147483647;
	}
    } else if (insample > 2147483647) {
	overflow++;
	print_overflow = true;
	sample = 2147483647;
    }
    if (print_overflow && time(NULL) != last_print) {
	fprintf(stderr, "sample overflow on channel %d! "
		"(%d accumulated overflows)\n",	channel + 1, overflow);
	last_print = time(NULL);
	print_overflow = false;
    }
    sample >>= 32 - bits;    
    return sample;
}

/*
 * Filter loop, for processing the input samples and writing the result to the
 * output buffers. Should be run in a separate process for each channel.
 */
static void
filter_loop(void *inbuf,      /* the input buffer */
	    void *outbuf,     /* the output buffer */
	    struct filter_comarea *ca,
	    int fragsize,     /* number of samples in each fragment (per
				 channel) */
	    int n_frags,      /* number of fragments per buffer */
	    int n_channels,   /* number of channels */
	    struct sample_format *insf,  /* input sample format */
	    struct sample_format *outsf, /* output sample format */
	    filterproc_t *filterproc, /* the filter processor to be used */
	    int channel)      /* channel index */
{
    char dummy[n_frags];
    int n, infragsize, outfragsize, delay_frags;
    int32_t sample, gaindiv;
    int64_t bigsample;
    uint8_t *infrag, *outfrag;
    bool_t active_state, do_inc = false, is_last_fragment = false, sd = false;
    volatile struct filter_comarea *comarea;
    /*int32_t divtest;*/

    delay_frags = n_frags - 2;
    comarea = ca;
    active_state = comarea->active;
    if (!comarea->active) {
	filterproc->toggle_processing(filterproc->private);
    }
    gaindiv = (int32_t)rint(filterproc->multiplier *
			    pow(10, -filterproc->db_gain / 20));
    /*
    for (n = 0, divtest = 1; n < 31; divtest <<= 1, n++) {
	if (fabs((double)gaindiv / (double)divtest - 1.0) < 0.01) {
	    sd = true;
	    gaindiv = n;
	    break;
	}
    }
    */
    infragsize = fragsize * insf->bytes * n_channels;
    outfragsize = fragsize * outsf->bytes * n_channels;

    /*
     * Get buffer alignment for the given channel. When there are more than
     * one channel, samples are stored in interleaved order. For two
     * channels:
     * <ch1 sample 1><ch2 sample 1><ch1 sample 2><ch2 sample 2>...
     */
    inbuf = &((int8_t *)inbuf)[insf->bytes * channel];
    outbuf = &((int8_t *)outbuf)[outsf->bytes * channel];

    while (true) {

	/* Filter semaphore decrease */
	readfd(comarea->sem_filter[channel][0], dummy, 1);

	/* Check if processing should be toggled */
	if (comarea->active != active_state) {
	    filterproc->toggle_processing(filterproc->private);
	    active_state = !active_state;	    
	}
	
	/* Initialise fragment pointers */
	n = comarea->filter_pos[channel] % n_frags;
	infrag = (uint8_t *)inbuf + infragsize * n;
	outfrag = (uint8_t *)outbuf + outfragsize * n;
	
	/* Check if this is the last fragment, if so, fit the fragsize */
	is_last_fragment = (comarea->last_size != -1 &&
			    comarea->filter_pos[channel] == comarea->last_pos);
	if (is_last_fragment) {
	    fragsize = comarea->last_size / (outsf->bytes * n_channels);
	}
	
	/* Process the buffer */
	for (n = 0; n < n_channels * fragsize; n += n_channels) {	    
	    sample = sample_raw2int(insf, &infrag[n*insf->bytes]);
	    bigsample = filterproc->process_sample(filterproc->private, sample);
	    sample = bit_reduce(bigsample, outsf->bits, gaindiv, sd, channel);
	    sample_int2raw(outsf, &outfrag[n*outsf->bytes], sample);
        }
	
	if (is_last_fragment) {
	    if (delay_frags - 2 > 0) {
		/* This may be too many increases (if extremely short inputs,
		   but that will not cause any problems, since all buffers are
		   already processed. */
		writefd(comarea->sem_output[channel][1], dummy,
			delay_frags - 2);
	    }
	    sleep_forever();
	}

	n = comarea->filter_pos[channel] - comarea->output_pos;
	if (comarea->stats.f_o_max[channel] < n) {
	    comarea->stats.f_o_max[channel] = n;
	}
	if (comarea->stats.f_o_min[channel] > n) {
	    comarea->stats.f_o_min[channel] = n;
	}
	comarea->filter_pos[channel]++;
	if (comarea->filter_pos[channel] == delay_frags) {
	    if (!do_inc) {
		/* Output semaphore increase */
		writefd(comarea->sem_output[channel][1], dummy, 2);
	    }
	    do_inc = true;
	}
	if (do_inc) {
	    /* Output semaphore increase */
	    writefd(comarea->sem_output[channel][1], dummy, 1);
	}
    };
}

void
output_loop(void (*flushbuf)(void *, int),
	    void (*stop_io)(void),
	    void *buffer,       /* the buffer containing processed samples */
	    int bufsize,        /* size of the buffer in bytes */
	    int fragsize,       /* fragment size in bytes */
	    int n_channels,
	    filter_t *filter)
{
    volatile struct filter_comarea *comarea;
    int n, n_frags;
    char dummy;
    /*time_t now, when_printstats;*/

    n_frags = bufsize / fragsize;
    /*when_printstats = time(NULL) + 10;*/
    comarea = filter->comarea;    
    while (true) {
	
	/* output semaphore decrease */
	for (n = 0; n < n_channels; n++) {
	    readfd(comarea->sem_output[n][0], &dummy, 1);
	}

	/* check if this is the last buffer to be written */
	if (comarea->last_size != -1 &&
	    comarea->output_pos == comarea->last_pos)
	{
	    if (comarea->last_size != 0) {
		/* a partial fragment left at the end */
		flushbuf(buffer + comarea->output_pos * fragsize % bufsize,
			 comarea->last_size);		
	    }	    
	    stop_io();
	    fprintf(stderr, "\nnwfiir: finished!\n");
	    nwfiir_abort_all_processes();
	} else {
	    flushbuf(buffer + comarea->output_pos * fragsize % bufsize,
		     fragsize);
	    n = comarea->input_pos - comarea->output_pos;
	    if (comarea->stats.i_o_max < n) {
		comarea->stats.i_o_max = n;
	    }
	    if (comarea->stats.i_o_min > n) {
		comarea->stats.i_o_min = n;
	    }
	    comarea->output_pos++;
	}

	/* input semaphore increase */
	writefd(comarea->sem_input[1], &dummy, 1);
        /*
	now = time(NULL);
	if (now == when_printstats) {
	    when_printstats = now + 10;
	    print_stats((struct filter_stats *)&comarea->stats, n_channels);
	    reset_stats((struct filter_stats *)&comarea->stats, n_channels,
			n_frags);
	}
	*/
    }
}

static void
input_loop(int (*fillbuf)(void *),
	   void *buffer,       /* the buffer */
	   int bufsize,        /* buffer size in bytes */
	   int fragsize,       /* fragment size in bytes */
	   int n_frags,
	   int n_channels,
	   int in_sbytes,
	   int out_sbytes,
	   struct filter_comarea *ca,
	   bool_t show_progress)
{
    volatile struct filter_comarea *comarea;
    int n, i, bufoffset;
    bool_t do_dec = false;
    char dummy[MAX_CHANNELS];

    comarea = ca;
    
    while (true) {
	bufoffset = comarea->input_pos * fragsize % bufsize;
	if ((n = fillbuf(buffer + bufoffset)) != fragsize) {
	    if (show_progress) {
		fprintf(stderr, ".");
	    }
	    /*
	     * Program has lost input, but there may still be data in
	     * output buffers that ought to be played.	     
	     */
	    comarea->last_pos = comarea->input_pos;
	    comarea->last_size = n / in_sbytes * out_sbytes;
	    /* filter semaphore increase */
	    for (n = 0; n < n_channels; n++) {
		writefd(comarea->sem_filter[n][1], &dummy, 1);
	    }
	    sleep_forever();
	}
	for (n = 0; n < n_channels; n++) {
	    i = comarea->input_pos - comarea->filter_pos[n];
	    if (comarea->stats.f_i_max[n] < i) {
		comarea->stats.f_i_max[n] = i;
	    }
	    if (comarea->stats.f_i_min[n] > i) {
		comarea->stats.f_i_min[n] = i;
	    }
	}
	comarea->input_pos++;
	if (show_progress) {
	    fprintf(stderr, ".");
	}	
	/* filter semaphore increase */
	for (n = 0; n < n_channels; n++) {
	    writefd(comarea->sem_filter[n][1], &dummy, 1);
	}
	
	if (comarea->input_pos == n_frags) {
	    do_dec = true;
	}
	if (do_dec) {
	    /* input semaphore decrease */
	    readfd(comarea->sem_input[0], dummy, 1);
	}
    }
}


/*
 * Create a new filter group of filter processors to work on a set of
 * I/O buffers.
 */
filter_t *
filter_new(int fragsize,               /* fragment size in samples
					* (per channel) */
	   int n_frags,                /* number of fragments per buffer */
	   int n_channels,             /* number of channels */
	   struct sample_format *insf, /* input sample format */
	   struct sample_format *outsf,/* output sample format */
	   filterproc_t *filterproc[], /* filter processors mapped to
				        * the channels */
	   struct io_func *io_func)
{
    int n;
    filter_t *filter;
    void *inbuf, *outbuf;
    struct filter_comarea *comarea;
    struct shmid_ds shmid_ds;

    /* Allocated resources are not freed on failure */
    
    if (n_channels < 1 || n_frags < 4 || fragsize < 128) {
	fprintf(stderr, "filter_new: at least one input parameter is "
		"invalid\n");
	return NULL;
    }

    /*
     * Allocate shared memory segments for I/O buffers, and interprocess
     * communication
     */    
    if ((n = shmget(IPC_PRIVATE, n_frags * fragsize * n_channels * insf->bytes,
		    IPC_CREAT | SHM_R | SHM_W)) == -1 ||
	(inbuf = shmat(n, 0, 0)) == (char *)-1 ||
	shmctl(n, IPC_RMID, &shmid_ds) == -1 ||
	
	(n = shmget(IPC_PRIVATE, n_frags * fragsize * n_channels * outsf->bytes,
		    IPC_CREAT | SHM_R | SHM_W)) == -1 ||
	(outbuf = shmat(n, 0, 0)) == (char *)-1 ||
	shmctl(n, IPC_RMID, &shmid_ds) == -1 ||
	
	(n = shmget(IPC_PRIVATE, sizeof(struct filter_comarea),
		    IPC_CREAT | SHM_R | SHM_W)) == -1 ||
	((void *)comarea = shmat(n, 0, 0)) == (char *)-1 ||
	shmctl(n, IPC_RMID, &shmid_ds) == -1)
    {
	fprintf(stderr, "filter_new: failed to create shared memory "
		"segments: %s\n", strerror(errno));
	return NULL;
    }
    
    /* Create filter data structure and fill it with data */
    if ((filter = malloc(sizeof(filter_t))) == NULL) {
	fprintf(stderr, "filter_new: could not allocate memory\n");
	return NULL;
    }
    filter->fillbuf = io_func->fillbuf;
    filter->inbuffer = inbuf;
    filter->infragsize = fragsize * n_channels * insf->bytes;
    filter->n_frags = n_frags;
    filter->in_sbytes = insf->bytes;
    filter->out_sbytes = outsf->bytes;
    filter->n_channels = n_channels;
    filter->comarea = comarea;
    bzero(filter->comarea, sizeof(struct filter_comarea));
    filter->comarea->last_size = -1;
    filter->comarea->active = true;
    filter->comarea->stats.i_o_min = n_frags;
    for (n = 0; n < n_channels; n++) {
	filter->comarea->stats.f_o_min[n] = n_frags;
	filter->comarea->stats.f_i_min[n] = n_frags;
    }

    /* Initialise semaphores for interprocess synchronisation */
    if (pipe(comarea->sem_input) == -1) {
	fprintf(stderr, "filter_new: failed to create pipe: %s\n",
		strerror(errno));
	return NULL;
    }
    for (n = 0; n < n_channels; n++) {
	if (pipe(comarea->sem_filter[n]) == -1 ||
	    pipe(comarea->sem_output[n]) == -1)
	{
	    fprintf(stderr, "filter_new: failed to create pipe: %s\n",
		    strerror(errno));
	    return NULL;
	}
    }

    /* Start output process */
    switch (filter->pids[n_channels] = fork()) {
    case 0:
	output_loop(io_func->flushbuf, io_func->stop_io, outbuf,
		    fragsize * n_channels * outsf->bytes * n_frags,
		    fragsize * n_channels * outsf->bytes, n_channels, filter);
	exit(0);
    case -1:
	fprintf(stderr, "filter_new: fork failed: %s\n", strerror(errno));
	return NULL;
    default:
    }
    
    /* Start a process for each channel */
    for (n = 0; n < n_channels; n++) {
	switch (filter->pids[n] = fork()) {
	case 0:
	    filter_loop(inbuf, outbuf, comarea, fragsize, n_frags, n_channels,
			insf, outsf, filterproc[n], n);
	    exit(0);
	case -1:
	    fprintf(stderr, "filter_new: fork failed: %s\n", strerror(errno));
	    return NULL;
	default:
	}
    }        
    
    return filter;
}

void
filter_run(filter_t *filter,
	   bool_t show_progress)
{
    filter->pids[filter->n_channels + 1] = getpid();
    input_loop(filter->fillbuf, filter->inbuffer, 
	       filter->n_frags * filter->infragsize, filter->infragsize,
	       filter->n_frags, filter->n_channels,  filter->in_sbytes,
	       filter->out_sbytes, filter->comarea, show_progress);
}

void
filter_toggle_processing(filter_t *filter)
{
    filter->comarea->active = !filter->comarea->active;
}

void
filter_delete(filter_t *filter)
{
    int n;

    for (n = 0; n < filter->n_channels + 1; n++) {
	if (filter->pids[n] != getpid()) {
	    kill(filter->pids[n], SIGINT);
	}
    }
    free(filter);
}
