This section describes the Field Programmable Gate Arrays (FPGA) core services device driver and covers the following topics:
All FPGAs connected to a TIO in a RASC-brick or a RASC-blade will contain a fixed set of services or core logic from SGI that control data movement, function initialization and initiation. These services are spelled out in detail in Chapter 3, “RASC Algorithm FPGA Hardware Design Guide”.
The FPGA core services device driver is implemented as a character special device driver. It provides open, close, read, write, ioctl and mmap entry points that allow access to the core services direct memory access (DMA) engines and registers.
Specific knowledge of the core services memory-mapped registers is not required to use the core services device driver. The layout of those registers is provide in Chapter 3, “RASC Algorithm FPGA Hardware Design Guide”.
The device driver API implements these system calls (see Table A-1):
Table A-1. Device Driver API System Calls
System Call | Description |
|---|---|
open() | Opens the character-special file. Only one application may have the file opened at any one time. |
close() | Closes the character-special file. |
read() | Reads from the FPGA SDRAM to host memory. |
write() | Writes from host memory to the FPGA SDRAM. |
lseek() | Seeks to a specific location in FPGA SDRAM. |
mmap() | Maps the core services registers or SDRAM into user space. |
The character special files used with the device driver are, as follows:
/dev/RASC/acs/<nasid>/gscr
Used for memory mapping core services memory mapped registers.
/dev/RASC/acs/<nasid>/sram
Used for all other system calls, for example. reading, writing, and memory mapping SRAM.
The input direct memory access (DMA) block is comprised of four stream DMA engines that target the algorithm for the data coming in from main memory and and two block DMA read engines that targets SRAM for the data coming in from main memory. For more information, see “RASC Core Services Overview” in Chapter 3.
The output DMA block is comprised of four stream DMA engines that take data directly from the algorithm and target main memory and two block DMA write engines that takes data from SRAM and targets main memory. For more information, see “RASC Core Services Overview” on page 22.
The ioctl command COP_IOCTL_ALGO_START is used to start the algorithm in the FPGA. Use COP_IOCTL_ALGO_STEP instead of COP_IOCTL_ALGO_START to start the algorithm but only run a given number of clocks. Successive COP_IOCTL_ALGO_STEP ioctl calls will move the clock. The COP_IOCTL_ALGO_CONT call is used to drive the algorithm to completion after stepping.
Upon successful completion, zero is returned. Otherwise -1 is returned and errno is set.
This section provides an example of using the FPGA core services device driver.
#include "sys/types.h"
#include "stdio.h"
#include "stdlib.h"
#include "unistd.h"
#include "string.h"
#include "fcntl.h"
#include "sys/ioctl.h"
#include "acs.h"
/*
* FPGA algorithm info.
*
* (This is a simple test-only algorithm built largely for software testing.)
*
* FPGA algorithm is 'a = a & b | c'.
* Max number of each operands is 512 64-bit words.
* Operand 'a' input starts at word zero of sram0
* Operand 'b' input starts at word zero of sram1
* Operand 'c' input starts at word zero of sram2
* The results are placed at word zero of sram0 (overwriting operand a)
*/
typedef unsigned long long uint64_t;
#define RAM_SIZE (2*1024*1024) // individual SRAM size
#define SRAM0 0
#define SRAM1 RAM_SIZE
#define SRAM2 (RAM_SIZE*2)
#define OPERAND_COUNT 512
/* 64-bit words to bytes */
#define W2B(wc) (wc*sizeof(uint64_t))
uint64_t operand_a[OPERAND_COUNT];
uint64_t operand_b[OPERAND_COUNT];
uint64_t operand_c[OPERAND_COUNT];
uint64_t hard_results[OPERAND_COUNT];
uint64_t soft_results[OPERAND_COUNT];
/*
* host processor version of algorithm
*/
void
soft_algo(uint64_t *r, uint64_t *a, uint64_t *b, uint64_t *c, int count)
{
int i;
for (i=0; i < count; i++)
r[i] = (a[i] & b[i]) | c[i];
}
/*
* FPGA version of algorithm
*/
int
hard_algo(uint64_t *r, uint64_t *a, uint64_t *b, uint64_t *c, int count)
{
int fd;
int bufsize = W2B(count);
int n, rv = 0;
char *path = "/dev/RASC/acs/1/sram";
/*
* open core services
*/
fd = open(path, O_RDWR);
if (fd < 0) {
perror("open");
return -1;
}
/*
* move operand 'a'
*/
lseek(fd, SRAM0, SEEK_SET);
n = write(fd, a, bufsize);
if (n != bufsize) {
fprintf(stderr, "wrote %d of %d bytes\n", n, bufsize);
perror("write");
rv = -2;
goto exit;
}
/*
* move operand 'b'
*/
lseek(fd, SRAM1, SEEK_SET);
n = write(fd, b, bufsize);
if (n != bufsize) {
fprintf(stderr, "wrote %d of %d bytes\n", n, bufsize);
perror("write");
rv = -3;
goto exit;
}
/*
* move operand 'c'
*/
lseek(fd, SRAM2, SEEK_SET);
n = write(fd, c, bufsize);
if (n != bufsize) {
fprintf(stderr, "wrote %d of %d bytes\n", n, bufsize);
perror("write");
rv = -4;
goto exit;
}
/*
* start the FPGA algorithm and wait for it to complete
*/
if (ioctl(fd, COP_IOCTL_ALGO_START, 0) < 0) {
perror("ioctl");
rv = -5;
goto exit;
}
/*
* get the results
*/
lseek(fd, SRAM0, SEEK_SET);
n = read(fd, r, bufsize);
if (n != bufsize) {
fprintf(stderr, "read %d of %d bytes\n", n, bufsize);
perror("read");
rv = -6;
goto exit;
}
exit:
close(fd);
return rv;
}
void
compare_results(uint64_t *hard_results, uint64_t *soft_results, int count)
{
int i;
int misses = 0;
for (i=0; i < count; i++)
if (hard_results[i] != soft_results[i]) {
if (!misses)
printf("miscompare at %d: got: 0x%llx expected: 0x%llx\n",
i, hard_results[i], soft_results[i]);
misses++;
}
printf("%d result miscompares between algorithms\n", misses);
}
void
operand_set(uint64_t *operand, int operand_count, uint64_t value)
{
int i;
for (i=0; i < operand_count; i++)
*operand++ = value;
}
int
main(int argc, char **argv)
{
int operand_count, bufsize;
int n, c;
uint64_t val_a = 'a';
uint64_t val_b = 'b';
uint64_t val_c = 'c';
int verify = 0;
operand_count = OPERAND_COUNT;
while ((c = getopt(argc, argv, "a:b:c:s:v")) != EOF) {
switch(c) {
case 'a':
val_a = strtoull(optarg, 0, 0);
break;
case 'b':
val_b = strtoull(optarg, 0, 0);
break;
case 'c':
val_c = strtoull(optarg, 0, 0);
break;
case 's':
operand_count = strtoull(optarg, 0, 0);
break;
case 'v':
verify++;
break;
}
}
if (operand_count > OPERAND_COUNT || operand_count <= 0) {
printf("invalid operand_count. ");
printf("must be: 0 > operand_count <= OPERAND_COUNT\n");
return -1;
}
/* syscalls & lib functions usually require a byte count */
bufsize = W2B(operand_count);
/*
* initialize input and output buffers
*/
operand_set(operand_b, operand_count, val_b);
operand_set(operand_c, operand_count, val_c);
operand_set(operand_a, operand_count, val_a);
memset(hard_results, 0x0, bufsize);
/*
* optionally generate results on host processor
*/
if (verify) {
printf("Starting the software algorithm\n");
soft_algo(soft_results, operand_a, operand_b, operand_c,
operand_count);
}
/*
* run the FPGA algorithm
*/
printf("Starting the hardware algorithm\n");
n = hard_algo(hard_results, operand_a, operand_b, operand_c,
operand_count);
if (n < 0) {
printf("hard_algo failed: %d\n", n);
return -1;
}
printf("example results:\n 0x%llx & 0x%llx | 0x%llx = 0x%llx\n",
operand_a[0], operand_b[0], operand_c[0], hard_results[0]);
if (verify) {
compare_results(hard_results, soft_results, operand_count);
}
printf("done\n");
return 0;
}
|