Channel: credentiality
Viewing all articles
Browse latest Browse all 94

Beaglebone PRU DDR memory access

Here's some C and PRU assembly code I wrote to see how fast the PRU can write to system (DDR) memory.

 // Loads a .bin file into a BeagleBone PRU and then interacts with it  
// in shared PRU memory and (system-wide) DDR memory.
// Pass in the filename of the .bin file on the command line, eg:
// $ ./pru_loader foo.bin
// Compile with:
// gcc -std=gnu99 -o pru_loader pru_loader.c -lprussdrv

#include <unistd.h>
#include <stdio.h>
#include <inttypes.h>
#include <prussdrv.h>
#include <pruss_intc_mapping.h>

int main(int argc, char **argv) {
if (argc != 2) {
printf("Usage: %s pru_code.bin\n", argv[0]);
return 1;

// If this segfaults, make sure you're executing as root.
if (prussdrv_open(PRU_EVTOUT_0) == -1) {
printf("prussdrv_open() failed\n");
return 1;

tpruss_intc_initdata pruss_intc_initdata = PRUSS_INTC_INITDATA;

// Pointer into the 8KB of shared PRU DRAM
volatile void *shared_memory_void = NULL;
// Useful if we're storing data there in 4-byte chunks
volatile uint32_t *shared_memory = NULL;
prussdrv_map_prumem(PRUSS0_SHARED_DATARAM, (void **) &shared_memory_void);
shared_memory = (uint32_t *) shared_memory_void;

// Pointer into the DDR RAM mapped by the uio_pruss kernel module.
volatile void *shared_ddr = NULL;
prussdrv_map_extmem((void **) &shared_ddr);
unsigned int shared_ddr_len = prussdrv_extmem_size();
unsigned int physical_address = prussdrv_get_phys_addr((void *) shared_ddr);

printf("%u bytes of shared DDR available.\n Physical (PRU-side) address:%x\n",
shared_ddr_len, physical_address);
printf("Virtual (linux-side) address: %p\n\n", shared_ddr);

// We'll use the first 8 bytes of PRU memory to tell it where the
// shared segment of system memory is.
shared_memory[0] = physical_address;
shared_memory[1] = shared_ddr_len;

// Change to 0 to use PRU0
int which_pru = 1;
prussdrv_exec_program(which_pru, argv[1]);

for (int i = 0; i < 10; i++) {
// See if it's successfully writing the physical address of each word at
// the (virtual, from our viewpoint) address
printf("DDR[%d] is: %p / 0x%x\n", i, ((unsigned int *)shared_ddr) + i,
((unsigned int *) shared_ddr)[i]);

int passes = shared_memory[0];
int bytes_written = passes * shared_ddr_len;
printf("Bytes written: %d\n", bytes_written);

// Wait for the PRU to let us know it's done
printf("All done\n");


return 0;

And here's the assembly:
 .origin 0  
.entrypoint TOP

#define DDR r29
#define DDR_SIZE r28
#define SHARED_RAM r27

#define SHARED_RAM_ADDRESS 0x10000

// Enable OCP master ports in SYSCFG register
LBCO r0, C4, 4, 4
CLR r0, r0, 4
SBCO r0, C4, 4, 4


// From shared RAM, grab the address of the shared DDR segment
// And the size of the segment from SHARED_RAM + 4

// BIGLOOP is one pass overwriting the shared DDR memory segment
mov r12, 0
mov r14, 10000

// Start at the beginning of the segment
MOV r10, DDR

// Tight loop writing the physical address of each word into that word
SBBO r10, r10, 0, 4
ADD r10, r10, 4
// XXX: This means r10 < r11, opposite what I expected!
QBLT LOOP0, r11, r10

ADD r12, r12, 1
SBBO r12, SHARED_RAM, 0, 4
QBGT BIGLOOP, r12, r14

// Interrupt the host so it knows we're done
MOV r31.b0, 19 + 16

// Don't forget to halt!

Here's the output I get, about 200MB/sec:

 262144 bytes of shared DDR available.  
Physical (PRU-side) address:9e6c0000
Virtual (linux-side) address: 0xb6d78000

DDR[0] is: 0xb6d78000 / 0x9e6c0000
Bytes written: 200540160
DDR[1] is: 0xb6d78004 / 0x9e6c0004
Bytes written: 401342464
DDR[2] is: 0xb6d78008 / 0x9e6c0008
Bytes written: 601882624
DDR[3] is: 0xb6d7800c / 0x9e6c000c
Bytes written: 802160640
DDR[4] is: 0xb6d78010 / 0x9e6c0010
Bytes written: 1002176512
DDR[5] is: 0xb6d78014 / 0x9e6c0014
Bytes written: 1202454528
DDR[6] is: 0xb6d78018 / 0x9e6c0018
Bytes written: 1402470400
DDR[7] is: 0xb6d7801c / 0x9e6c001c
Bytes written: 1602748416
DDR[8] is: 0xb6d78020 / 0x9e6c0020
Bytes written: 1802764288
DDR[9] is: 0xb6d78024 / 0x9e6c0024
Bytes written: 2003042304
All done

If I crank up the number of bytes written by SBBO from 4 to 8 (in the SBBO and ADD after LOOP0), then I think it ends up writing the contents of r10 and r11 into memory, and I get 320MB/sec.  If I crank it up to 16 bytes per write, I get 450MB/sec.

So the PRU really can write very quickly to system RAM.

Viewing all articles
Browse latest Browse all 94

Latest Images

Trending Articles

click here for Latest and Popular articles on Mesothelioma and Asbestos

Latest Images