diff options
Diffstat (limited to 'ipedma_test.c')
-rw-r--r-- | ipedma_test.c | 657 |
1 files changed, 657 insertions, 0 deletions
diff --git a/ipedma_test.c b/ipedma_test.c new file mode 100644 index 0000000..a99a40b --- /dev/null +++ b/ipedma_test.c @@ -0,0 +1,657 @@ +#define _POSIX_C_SOURCE 200809L +#define _BSD_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdarg.h> +#include <time.h> +#include <sched.h> +#include <sys/time.h> +#include <sys/types.h> +#include <arpa/inet.h> +#include <sched.h> +#include <errno.h> + +#include <pcilib.h> +#include <pcilib/kmem.h> + +//#include <sys/ipc.h> +//#include <sys/shm.h> + + +#define DEVICE "/dev/fpga0" + +#define BAR PCILIB_BAR0 +#define USE_RING PCILIB_KMEM_USE(PCILIB_KMEM_USE_USER, 1) +#define USE PCILIB_KMEM_USE(PCILIB_KMEM_USE_USER, 2) +//#define STATIC_REGION 0x80000000 // to reserve 512 MB at the specified address, add "memmap=512M$2G" to kernel parameters + +#define BUFFERS 128 +#define ITERATIONS 1000 +#define DESC_THRESHOLD BUFFERS/8 // Lorenzo: after how many desc the FPGA must update the "written descriptor counter" in PC mem + // if set to 0, the update only happens when INT is received + +#define HUGE_PAGE 1 // number of pages per huge page +#define TLP_SIZE 32 // TLP SIZE = 64 for 256B payload, 32 for 128B payload +#define PAGE_SIZE 4096 // other values are not supported in the kernel + +#define USE_64 // Lorenzo: use 64bit addressing + +//#define DUAL_CORE // Lorenzo: DUAL Core + +//#define SHARED_MEMORY // Lorenzo: Test for fast GUI + +#define CHECK_READY // Lorenzo: Check if PCI-Express is ready by reading 0x0 +#define MEM_COPY // Lorenzo: CPY data +//#define CHECK_RESULTS // Lorenzo: Check if data received is ok (only for counter!) +//#define CHECK_RESULTS_LOG // Lorenzo: Check if data received is ok (only for counter!) +#define PRINT_RESULTS // Lorenzo: Save the received data in "data.out" +//#define EXIT_ON_EMPTY // Lorenzo: Exit if an "empty_detected" signal is received + +#define TIMEOUT 1000000 + + + +/* IRQs are slow for some reason. REALTIME mode is slower. Adding delays does not really help, + otherall we have only 3 checks in average. Check ready seems to be not needed and adds quite + much extra time */ + +//#define USE_IRQ +//#define REALTIME +//#define ADD_DELAYS + + +#define FPGA_CLOCK 250 // Lorenzo: in MHz ! + +//#define WR(addr, value) { val = value; pcilib_write(pci, BAR, addr, sizeof(val), &val); } +//#define RD(addr, value) { pcilib_read(pci, BAR, addr, sizeof(val), &val); value = val; } +#define WR(addr, value) { *(uint32_t*)(bar + addr + offset) = value; } +#define RD(addr, value) { value = *(uint32_t*)(bar + addr + offset); } + +// ************************************************************************************** +// Progress BAR +// Process has done x out of n rounds, +// and we want a bar of width w and resolution r. + static inline void loadBar(int x, int n, int r, int w) + { + // Only update r times. + if ( x % (n/r +1) != 0 ) return; + + // Calculuate the ratio of complete-to-incomplete. + float ratio = x/(float)n; + int c = ratio * w; + + // Show the percentage complete. + printf("%3d%% [", (int)(ratio*100) ); + + // Show the load bar. + for (x=0; x<c; x++) + printf("="); + + for (x=c; x<w; x++) + printf(" "); + + // ANSI Control codes to go back to the + // previous line and clear it. + printf("]\n\033[F\033[J"); + } +// ************************************************************************************** + + + static void fail(const char *msg, ...) { + va_list va; + + va_start(va, msg); + vprintf(msg, va); + va_end(va); + printf("\n"); + + exit(-1); +} + +void hpsleep(size_t ns) { + struct timespec wait, tv; + + clock_gettime(CLOCK_REALTIME, &wait); + + wait.tv_nsec += ns; + if (wait.tv_nsec > 999999999) { + wait.tv_sec += 1; + wait.tv_nsec = 1000000000 - wait.tv_nsec; + } + + do { + clock_gettime(CLOCK_REALTIME, &tv); + } while ((wait.tv_sec > tv.tv_sec)||((wait.tv_sec == tv.tv_sec)&&(wait.tv_nsec > tv.tv_nsec))); +} + + +// ************************************************************************************** +int main() { + + + + int err; + long i, j; + pcilib_t *pci; + pcilib_kmem_handle_t *kdesc; + pcilib_kmem_handle_t *kbuf; + struct timeval start, end; + size_t run_time; + long long int size_mb; + void* volatile bar; + uintptr_t bus_addr[BUFFERS]; + uintptr_t kdesc_bus; + volatile uint32_t *desc; + typedef volatile uint32_t *Tbuf; + Tbuf ptr[BUFFERS]; + +#ifdef SWITCH_GENERATOR + int switch_generator = 0; +#endif /* SWITCH_GENERATOR */ +#if defined(CHECK_RESULTS)||defined(CHECK_RESULTS_LOG) + long k; + int mem_diff; +#endif /* CHECK_RESULTS */ + + + float performance, perf_counter; + pcilib_bar_t bar_tmp = BAR; + uintptr_t offset = 0; + + unsigned int temp; + int iterations_completed, buffers_filled; + + +// int shmid; + + + printf("\n\n**** **** **** KIT-DMA TEST **** **** ****\n\n"); + + //size = ITERATIONS * BUFFERS * HUGE_PAGE * PAGE_SIZE; + size_mb = ITERATIONS * BUFFERS * HUGE_PAGE * 4 / 1024; + printf("Total size of memory buffer: \t %.3lf GBytes\n", (float)size_mb/1024 ); + printf("Using %d Buffers with %d iterations\n\n", BUFFERS, ITERATIONS ); + +#ifdef ADD_DELAYS + long rpt = 0, rpt2 = 0; + size_t best_time; + best_time = 1000000000L * HUGE_PAGE * PAGE_SIZE / (4L * 1024 * 1024 * 1024); +#endif /* ADD_DELAYS */ + + + pcilib_kmem_flags_t flags = PCILIB_KMEM_FLAG_HARDWARE|PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_EXCLUSIVE/*|PCILIB_KMEM_FLAG_REUSE*/; // Lorenzo: if REUSE = 1, the re-allocation fails! + pcilib_kmem_flags_t free_flags = PCILIB_KMEM_FLAG_HARDWARE/*|PCILIB_KMEM_FLAG_EXCLUSIVE|PCILIB_KMEM_FLAG_REUSE*/; + pcilib_kmem_flags_t clean_flags = PCILIB_KMEM_FLAG_HARDWARE|PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_EXCLUSIVE; + + pci = pcilib_open(DEVICE, "pci"); + if (!pci) fail("pcilib_open"); + + bar = pcilib_map_bar(pci, BAR); + if (!bar) { + pcilib_close(pci); + fail("map bar"); + } + + pcilib_detect_address(pci, &bar_tmp, &offset, 1); + + pcilib_enable_irq(pci, PCILIB_IRQ_TYPE_ALL, 0); + pcilib_clear_irq(pci, PCILIB_IRQ_SOURCE_DEFAULT); + + pcilib_clean_kernel_memory(pci, USE, clean_flags); + pcilib_clean_kernel_memory(pci, USE_RING, clean_flags); + + kdesc = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_CONSISTENT, 1, 128, 4096, USE_RING, flags); + kdesc_bus = pcilib_kmem_get_block_ba(pci, kdesc, 0); + desc = (uint32_t*)pcilib_kmem_get_block_ua(pci, kdesc, 0); + memset((void*)desc, 0, 5*sizeof(uint32_t)); + +#ifdef REALTIME + pid_t pid; + struct sched_param sched = {0}; + + pid = getpid(); + sched.sched_priority = sched_get_priority_min(SCHED_FIFO); + if (sched_setscheduler(pid, SCHED_FIFO, &sched)) + printf("Warning: not able to get real-time priority\n"); +#endif /* REALTIME */ + + // ****************************************************************** + // **** MEM: check 4k boundary ***** + // ****************************************************************** + + do { + printf("* Allocating KMem, "); +#ifdef STATIC_REGION + kbuf = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_REGION_C2S, BUFFERS, HUGE_PAGE * PAGE_SIZE, STATIC_REGION, USE, flags); +#else + kbuf = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_DMA_C2S_PAGE, BUFFERS, HUGE_PAGE * PAGE_SIZE, 4096, USE, flags); +#endif + + if (!kbuf) { + printf("KMem allocation failed\n"); + exit(0); + } + + // Pointers for Virtualized Mem + for (j = 0; j < BUFFERS; j++) { + ptr[j] = (volatile uint32_t*)pcilib_kmem_get_block_ua(pci, kbuf, j); + memset((void*)(ptr[j]), 0, HUGE_PAGE * PAGE_SIZE); + } + + err = 0; + + // Check if HW addresses satisfy 4k boundary condition, if not -> free (!!) and reallocate memory + printf("4k boundary test: "); + for (j = 0; j < BUFFERS; j++) { + temp = (((unsigned int)pcilib_kmem_get_block_ba(pci, kbuf, j)) % 4096); + //printf("%u", temp); + if (temp != 0) { + err = 1; + } + } + if (err == 1) { + pcilib_clean_kernel_memory(pci, USE, clean_flags); + pcilib_clean_kernel_memory(pci, USE_RING, clean_flags); + pcilib_free_kernel_memory(pci, kbuf, free_flags); + printf("failed \xE2\x9C\x98\n"); + } + else printf("passed \xE2\x9C\x93\n"); + + } while (err == 1); + + + // ****************************************************************** + // **** Allocate RAM buffer Memory ***** + // ****************************************************************** + + FILE * Output; + FILE * error_log; + +#ifdef MEM_COPY + + uint32_t *temp_data[ITERATIONS][BUFFERS]; + + for (j=0; j < ITERATIONS; j++) { + for (i=0; i < BUFFERS; i++) { + temp_data[j][i] = (uint32_t *)malloc(HUGE_PAGE*PAGE_SIZE); + if (temp_data[j][i] == 0) { + printf("******* Error: could not allocate memory! ********\n"); + exit(0); + } + memset((void*)(temp_data[j][i]), 0, HUGE_PAGE * PAGE_SIZE); + } + } +#endif + +#ifdef SHARED_MEMORY + // give your shared memory an id, anything will do + key_t key = 123456; + char *shared_memory; + + // Setup shared memory, 11 is the size +/* if ((shmid = shmget(key, HUGE_PAGE*PAGE_SIZE, IPC_CREAT | 0666)) < 0) + { + printf("Error getting shared memory id"); + exit(1); + } + + // Attached shared memory + if ((shared_memory = shmat(shmid, NULL, 0)) == (char *) -1) + { + printf("Error attaching shared memory id"); + exit(1); + } + printf("* Shared memory created... Id:\t %d\n", key); + //////////////// SHARED MEMORY TEST */ +#endif + + Output = fopen ("data.out", "w"); + fclose(Output); + + error_log = fopen ("error_log.txt", "w"); + fclose(error_log); + + // ****************************************************************** + // **** PCIe TEST ***** + // ****************************************************************** + + // Reset DMA + printf("* DMA: Reset...\n"); + WR(0x00, 0x1); + usleep(100000); + WR(0x00, 0x0); + usleep(100000); + +#ifdef CHECK_READY + printf("* PCIe: Testing..."); + RD(0x0, err); + if (err == 335746816 || err == 335681280) { + printf("\xE2\x9C\x93 \n"); + } else { + printf("\xE2\x9C\x98\n PCIe not ready!\n"); + exit(0); + } +#endif + + + // ****************************************************************** + // **** DMA CONFIGURATION ***** + // ****************************************************************** + + printf("* DMA: Send Data Amount\n"); +#ifdef DUAL_CORE + WR(0x10, (HUGE_PAGE * (PAGE_SIZE / (4 * TLP_SIZE)))/2); +#else + WR(0x10, (HUGE_PAGE * (PAGE_SIZE / (4 * TLP_SIZE)))); +#endif + + printf("* DMA: Running mode: "); + +#ifdef USE_64 + if (TLP_SIZE == 64) + { + WR(0x0C, 0x80040); + printf ("64bit - 256B Payload\n"); + } + else if (TLP_SIZE == 32) + { + WR(0x0C, 0x80020); + printf ("64bit - 128B Payload\n"); + } +#else + if (TLP_SIZE == 64) + { + WR(0x0C, 0x0040); + printf ("32bit - 256B Payload\n"); + } + else if (TLP_SIZE == 32) + { + WR(0x0C, 0x0020); + printf ("32bit - 128B Payload\n"); + } +#endif + + printf("* DMA: Reset Desc Memory...\n"); + WR(0x5C, 0x00); // RST Desc Memory + + printf("Writing SW Read Descriptor\n"); + WR(0x58, BUFFERS-1); + //WR(0x58, 0x01); + + printf("Writing the Descriptor Threshold\n"); + WR(0x60, DESC_THRESHOLD); + + printf("Writing HW write Descriptor Address: %lx\n", kdesc_bus); + WR(0x54, kdesc_bus); + usleep(100000); + + printf("* DMA: Writing Descriptors\n"); + for (j = 0; j < BUFFERS; j++ ) { + bus_addr[j] = pcilib_kmem_get_block_ba(pci, kbuf, j); + // LEAVE THIS DELAY???!?!?!?! + usleep(1000); + //printf("Writing descriptor num. %ld: \t %08lx \n", j, bus_addr[j]); + WR(0x50, bus_addr[j]); + } + + // ****************************************************************** + // **** START DMA ***** + // ****************************************************************** + + //printf ("\n ---- Press ENTER to start DMA ---- \n"); + //getchar(); + + printf("* DMA: Start \n"); + WR(0x04, 0x1); + gettimeofday(&start, NULL); + + // ****************************************************************** + // **** Handshaking DMA ***** + // ****************************************************************** + + uint32_t curptr = 0, hwptr; + uint32_t curbuf = 0; + int empty = 0; + i = 0; + + + while (i < ITERATIONS) { + j = 0; + //printf("\ndesc0: %lx", desc[0]); + //printf("\ndesc1: %lx", desc[1]); + //printf("\ndesc2: %lx", desc[2]); + //printf("\ndesc3: %lx", desc[3]); + //printf("\ndesc4: %lx", desc[4]); + // printf("\ndesc5: %lx", htonl(desc[5])); + //printf("Iteration: %li of %li \r", i+1, ITERATIONS); + //getchar(); + //loadBar(i+1, ITERATIONS, ITERATIONS, 30); + // printf("\nhwptr: %zu", hwptr); + // printf("\ncurptr: %zu", curptr); + + do { +#ifdef USE_64 + hwptr = desc[3]; +#else // 32-bit + hwptr = desc[4]; +#endif + j++; + //printf("\rcurptr: %lx \t \t hwptr: %lx", curptr, hwptr); + } while (hwptr == curptr); + + do { + pcilib_kmem_sync_block(pci, kbuf, PCILIB_KMEM_SYNC_FROMDEVICE, curbuf); +#ifdef MEM_COPY + memcpy(temp_data[i][curbuf], (void*)ptr[curbuf], 4096); +#endif +#ifdef CHECK_RESULTS +for (k = 0; k < 1024 ; k++) + { + mem_diff = (ptr[curbuf][k] - ptr[curbuf][k]); + //if ((mem_diff == 1) || (mem_diff == (-7)) || (k == 1023) ) + if (mem_diff == -1) + {;} + else { + //fprintf(error_log, "Error in: \t IT %li \t BUF : %li \t OFFSET: %li \t | %08x --> %08x - DIFF: %d \n", i, j, k, temp_data[i][j][k], temp_data[i][j][k+1], mem_diff); + err++; + } + } +#endif +#ifdef SHARED_MEMORY + memcpy(shared_memory, ptr[curbuf], 4096); +#endif + //printf("\ncurbuf: %08x", curbuf); + //printf("\nbus_addr[curbuf]\n: %08x",bus_addr[curbuf]); + // for (k = 0; k < 63; k++){ + // if (k%16 == 0) printf("\n# %d # :", k); + // printf(" %08x", ptr[curbuf][k]); + // } + //pcilib_kmem_sync_block(pci, kbuf, PCILIB_KMEM_SYNC_TODEVICE, curbuf); + curbuf++; + if (curbuf == BUFFERS) { + i++; + curbuf = 0; +#ifdef SWITCH_GENERATOR + if (switch_generator == 1) { + switch_generator = 0; + WR(0x9040, 0x100007F0); + } else { + WR(0x9040, 0x180007F0); + switch_generator = 1; + } +#endif + if (i >= ITERATIONS) break; + //if (i >= (ITERATIONS - 4) ) WR(0x04, 0x0f); + } + } while (bus_addr[curbuf] != hwptr); + +#ifdef EXIT_ON_EMPTY +#ifdef USE_64 + if (desc[1] != 0) +#else // 32bit + if (desc[2] != 0) +#endif + { + if (bus_addr[curbuf] == hwptr) { + empty = 1; + break; + } + } +#endif + + WR(0x58, curbuf + 1); + //printf("WR %d\n", curbuf + 1); + //printf("%u (%lu)\n", curbuf, j); + curptr = hwptr; + } + + + // ****************************************************************** + // **** Read performance and stop DMA ******* + // ****************************************************************** + + gettimeofday(&end, NULL); + WR(0x04, 0x00); + usleep(100); + RD(0x28, perf_counter); + usleep(100); + WR(0x00, 0x01); + + + + + iterations_completed = i; + buffers_filled = curbuf; + if (empty) printf("* DMA: Empty FIFO! Last iteration: %li of %i\n", i+1, ITERATIONS); + printf ("* DMA: Stop\n\n"); + +#ifdef MEM_COPY + printf ("First value:\t %08x\n", temp_data[0][0][0]); + printf ("Last value:\t %08x\n\n", temp_data[ITERATIONS-1][BUFFERS-1][(PAGE_SIZE/4)-4]); +#endif + + // ****************************************************************** + // **** Performance ******* + // ****************************************************************** + printf("Iterations done: %d\n", iterations_completed); + printf("Buffers filled on last iteration: %d\n", buffers_filled); + + + run_time = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); + //size = (long long int) (( BUFFERS * (iterations_completed) + buffers_filled) * HUGE_PAGE * PAGE_SIZE); + size_mb = (long long int) (( BUFFERS * (iterations_completed) + buffers_filled) * HUGE_PAGE * 4 / 1024); + printf("Performance: transfered %llu Mbytes in %zu us using %d buffers\n", (size_mb), run_time, BUFFERS); + //printf("Buffers: \t %d \n", BUFFERS); + //printf("Buf_Size: \t %d \n", PAGE_SIZE); + //printf("Perf_counter: \t %f \n", perf_counter); + performance = ((size_mb * FPGA_CLOCK * 1000000)/(perf_counter*256)); + printf("DMA perf counter:\t%d\n", (int)perf_counter); + printf("DMA side:\t\t%.3lf MB/s\n", performance); + printf("PC side:\t\t%.3lf MB/s\n\n", 1000000. * size_mb / run_time ); + + // ****************************************************************** + // **** Read Data ******* + // ****************************************************************** + + + #ifdef PRINT_RESULTS + printf("Writing Data to HDD... \n"); + for (i=0; i < iterations_completed; i++) { + for (j=0; j < BUFFERS; j++) + { + Output = fopen("data.out", "a"); + fwrite(temp_data[i][j], 4096, 1, Output); + fclose(Output); + } + loadBar(i+1, ITERATIONS, ITERATIONS, 30); + } + // Save last partially filled iteration + for (j=0; j < buffers_filled; j++) + { + Output = fopen("data.out", "a"); + fwrite(temp_data[iterations_completed][j], 4096, 1, Output); + fclose(Output); + } + printf("Data saved in data.out. \n"); + #endif + + #ifdef CHECK_RESULTS_LOG + err = 0; + error_log = fopen ("error_log.txt", "a"); + printf("\nChecking data ...\n"); + for (i=0; i < iterations_completed; i++) { + for (j = 0; j < BUFFERS; j++) { + for (k = 0; k < 1024 ; k++) + { + mem_diff = ((uint32_t)temp_data[i][j][k] - (uint32_t)temp_data[i][j][k+1]); + //if ((mem_diff == 1) || (mem_diff == (-7)) || (k == 1023) ) + if ((mem_diff == -1) || (k == 1023) ) + {;} + else { + fprintf(error_log, "Error in: \t IT %li \t BUF : %li \t OFFSET: %li \t | %08x --> %08x - DIFF: %d \n", i, j, k, temp_data[i][j][k], temp_data[i][j][k+1], mem_diff); + err++; + } + } + if (j != BUFFERS-1) { + // Check first and Last + mem_diff = (uint32_t)(temp_data[i][j+1][0] - temp_data[i][j][1023]); + if (mem_diff == (1)) + {;} + else { + fprintf(error_log, "Error_2 in: \t IT %li \t BUF : %li \t OFFSET: %li \t | %08x --> %08x - DIFF: %d \n", i, j, k, temp_data[i][j+1][0], temp_data[i][j][1023], mem_diff); + err++; + } + } + + } + loadBar(i+1, ITERATIONS, ITERATIONS, 30); + } + for (j = 0; j < buffers_filled; j++) { + for (k = 0; k < 1024 ; k++) + { + mem_diff = ((uint32_t)temp_data[iterations_completed][j][k] - (uint32_t)temp_data[iterations_completed][j][k+1]); + if ((mem_diff == -1) || (k == 1023) ) + {;} + else { + fprintf(error_log, "Error in: \t IT %li \t BUF : %li \t OFFSET: %li \t | %08x --> %08x - DIFF: %d \n", iterations_completed, j, k, temp_data[iterations_completed][j][k], temp_data[iterations_completed][j][k+1], mem_diff); + err++; + } + } + if (j != buffers_filled-1) { + // Check first and Last + mem_diff = (uint32_t)(temp_data[i][j+1][0] - temp_data[i][j][1023]); + if (mem_diff == (1)) + {;} + else { + fprintf(error_log, "Error_2 in: \t IT %li \t BUF : %li \t OFFSET: %li \t | %08x --> %08x - DIFF: %d \n", iterations_completed, j, k, temp_data[iterations_completed][j+1][0], temp_data[iterations_completed][j][1023], mem_diff); + err++; + } + } + } + if (err != 0) printf("\rChecking data: \xE2\x9C\x98 %d errors found \n See \"error_log.txt\" for details \n\n", err); + else printf("\rChecking data: \xE2\x9C\x93 no errors found \n\n"); + fclose(error_log); + #endif + + // *********** Free Memory +#ifdef MEM_COPY + for (i=0; i < ITERATIONS; i++) { + for (j=0; j < BUFFERS; j++) + { + free(temp_data[i][j]); + } + } +#endif + + pcilib_free_kernel_memory(pci, kbuf, free_flags); + pcilib_free_kernel_memory(pci, kdesc, free_flags); + pcilib_disable_irq(pci, 0); + pcilib_unmap_bar(pci, BAR, bar); + pcilib_close(pci); + +// shmdt(shmid); +// shmctl(shmid, IPC_RMID, NULL); + +} |