#include <common.h>
#include <command.h>
#include <cpu.h>

typedef void (*os_t)(int, char **, char **, int *);

extern bootm_headers_t images;

/* 0 ~ 3: a0 ~ a3
   4: entry point */
static uint32_t slave_env[5] = {0};

#define UCADDR(addr) (((uint32_t)addr) | 0x20000000)

/* Do NO fancy shit here! This func. has no stack! */
__attribute__ ((noinline, far)) static void
_5281_text_before_os(void) {
	const os_t os = (os_t)(slave_env[4]);

	/* 5281 icache inv. */
	__asm__ __volatile__ ("mfc0 $4, $20;\n\t"
												"li   $5, 0x2;\n\t"
												"not  $5, $5;\n\t"
												"and  $4, $4, $5;\n\t"
												"mtc0 $4, $20;\n\t"
												"li   $5, 0x2;\n\t"
												"or   $4, $4, $5;\n\t"
												"mtc0 $4, $20;\n\t"
	                      "nop;\n\t");

	/* 5281 dcache inv. */
	__asm__ __volatile__ ("mfc0 $4, $20;\n\t"
												"li   $5, 1;\n\t"
												"not  $5, $5;\n\t"
												"and  $4, $4, $5;\n\t"
												"mtc0 $4, $20;\n\t"
												"li   $5, 1;\n\t"
												"or   $4, $4, $5;\n\t"
												"mtc0 $4, $20;\n\t"
	                      "nop;\n\t");

	/* CPU init will be done in Linux */

	os((int)slave_env[0],
	   (char **)slave_env[1],
	   (char **)slave_env[2],
	   (int *)slave_env[3]);

	/* shall never return! */
	return;
}

/* Do NO fancy shit here! This func. will be reloc. to BIR and no stack.
   Limit it to 28-byte. */
static void
_5281_text_bir(void) {
	typedef void (*prep_func_t)(void);
	prep_func_t uc_5281_text_before_os = (prep_func_t)UCADDR(_5281_text_before_os);

	uc_5281_text_before_os();

	return;
}

__attribute__((unused)) static void
_memcpy32(uint32_t *dst, uint32_t *src, int len_4b) {
	while (len_4b--) {
		*dst++ = *src++;
	}
	return;
}

void
board_boot_slave(int a0, char **a1, char **a2, int *a3) {
	/* BIR */
	_memcpy32((uint32_t *)0xb8004100, (uint32_t *)_5281_text_bir, 8);

	/* Slave context */
	slave_env[0] = (uint32_t)a0;
	slave_env[1] = (uint32_t)a1;
	slave_env[2] = (uint32_t)a2;
	slave_env[3] = (uint32_t)a3;
	slave_env[4] = (uint32_t)images.ep;

	/* DCR for getting DRAM size */
	REG32(0xb800411c) = REG32(0xb8001004);

	writeback_invalidate_dcache_all();

	/* direct snof to oc1; from now on, oc0 can't access spif */
	REG32(0xb8000044) |= (1 << 30);

	/* enable slave core with jtag chain mode */
	REG32(0xb8000044) &= (~(7 << 5));
	REG32(0xb8000044) |= (1 << 5);

	while (1);

	return;
}
