I am using some texture loading inside my code making move of graphic datas from main memory (SH4) to video memory (PVR). So i am using the "pvr_mem_malloc" and "pvr_txr_load". But as i move quite large amount of datas, it takes times and i want to try to reduce it. So i dig in KOS and i see that the function "pvr_txr_load" is using stored queues with "sq_cpy" function. Is there a way to speed up something ? I heard about DMA transfer too ...
Thanks.
Code: Select all
/* Load raw texture data from an SH-4 buffer into PVR RAM */
void pvr_txr_load(void * src, pvr_ptr_t dst, uint32 count) {
if(count % 4)
count = (count & 0xfffffffc) + 4;
sq_cpy((uint32 *)dst, (uint32 *)src, count);
}
Code: Select all
/* copies n bytes from src to dest, dest must be 32-byte aligned */
void * sq_cpy(void *dest, void *src, int n) {
unsigned int *d = (unsigned int *)(void *)
(0xe0000000 | (((unsigned long)dest) & 0x03ffffe0));
unsigned int *s = src;
/* Set store queue memory area as desired */
QACR0 = ((((unsigned int)dest) >> 26) << 2) & 0x1c;
QACR1 = ((((unsigned int)dest) >> 26) << 2) & 0x1c;
/* fill/write queues as many times necessary */
n >>= 5;
while(n--) {
asm("pref @%0" : : "r"(s + 8)); /* prefetch 32 bytes for next loop */
d[0] = *(s++);
d[1] = *(s++);
d[2] = *(s++);
d[3] = *(s++);
d[4] = *(s++);
d[5] = *(s++);
d[6] = *(s++);
d[7] = *(s++);
asm("pref @%0" : : "r"(d));
d += 8;
}
/* Wait for both store queues to complete */
d = (unsigned int *)0xe0000000;
d[0] = d[8] = 0;
return dest;
}