From 40c61a3b927ce31e05a6eb0d7b36e09e67c6ca97 Mon Sep 17 00:00:00 2001
From: "Suren A. Chilingaryan" <csa@dside.dyndns.org>
Date: Mon, 11 Jul 2011 03:48:13 +0200
Subject: Wait for the completion of DMA operations during writes

---
 dma/nwl.c         | 358 ++++++------------------------------------------------
 dma/nwl_buffers.h | 294 ++++++++++++++++++++++++++++++++++++++++++++
 dma/nwl_dma.h     |   4 +-
 3 files changed, 335 insertions(+), 321 deletions(-)
 create mode 100644 dma/nwl_buffers.h

(limited to 'dma')

diff --git a/dma/nwl.c b/dma/nwl.c
index 0ef69a4..61de953 100644
--- a/dma/nwl.c
+++ b/dma/nwl.c
@@ -23,6 +23,10 @@
 #define NWL_FIX_EOP_FOR_BIG_PACKETS		// requires precise sizes in read requests
 #define NWL_GENERATE_DMA_IRQ
 
+#define PCILIB_NWL_ALIGNMENT 			64  // in bytes
+#define PCILIB_NWL_DMA_DESCRIPTOR_SIZE		64  // in bytes
+#define PCILIB_NWL_DMA_PAGES			512 // 1024
+
 
 static int nwl_read_engine_config(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info, char *base) {
     uint32_t val;
@@ -200,15 +204,6 @@ void  dma_nwl_free(pcilib_dma_context_t *vctx) {
     }
 }
 
-#define PCILIB_NWL_ALIGNMENT 			64  // in bytes
-#define PCILIB_NWL_DMA_DESCRIPTOR_SIZE		64  // in bytes
-#define PCILIB_NWL_DMA_PAGES			512 // 1024
-
-#define NWL_RING_GET(data, offset)  *(uint32_t*)(((char*)(data)) + (offset))
-#define NWL_RING_SET(data, offset, val)  *(uint32_t*)(((char*)(data)) + (offset)) = (val)
-#define NWL_RING_UPDATE(data, offset, mask, val) *(uint32_t*)(((char*)(data)) + (offset)) = ((*(uint32_t*)(((char*)(data)) + (offset)))&(mask))|(val)
-
-
 int dma_nwl_sync_buffers(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info, pcilib_kmem_handle_t *kmem) {
     switch (info->desc.direction) {
      case PCILIB_DMA_FROM_DEVICE:
@@ -220,298 +215,9 @@ int dma_nwl_sync_buffers(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info,
     return 0;
 }
 
-int dma_nwl_allocate_engine_buffers(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info) {
-    int err = 0;
+#include "nwl_buffers.h"    
 
-    int i;
-    uint32_t val;
-    uint32_t buf_sz;
-    uint64_t buf_pa;
-
-    char *base = info->base_addr;
-    
-    if (info->pages) return 0;
-    
-    pcilib_kmem_handle_t *ring = pcilib_alloc_kernel_memory(ctx->pcilib, PCILIB_KMEM_TYPE_CONSISTENT, 1, PCILIB_NWL_DMA_PAGES * PCILIB_NWL_DMA_DESCRIPTOR_SIZE, PCILIB_NWL_ALIGNMENT, PCILIB_KMEM_USE(PCILIB_KMEM_USE_DMA, info->desc.addr), 0);
-    pcilib_kmem_handle_t *pages = pcilib_alloc_kernel_memory(ctx->pcilib, PCILIB_KMEM_TYPE_PAGE, PCILIB_NWL_DMA_PAGES, 0, 0, PCILIB_KMEM_USE(PCILIB_KMEM_USE_DMA, info->desc.addr), 0);
-
-    if ((ring)&&(pages)) err = dma_nwl_sync_buffers(ctx, info, pages);
-    else err = PCILIB_ERROR_FAILED;
-
-
-    if (err) {
-	if (pages) pcilib_free_kernel_memory(ctx->pcilib, pages);
-	if (ring) pcilib_free_kernel_memory(ctx->pcilib, ring);    
-	return err;
-    }
-    
-    unsigned char *data = (unsigned char*)pcilib_kmem_get_ua(ctx->pcilib, ring);
-    uint32_t ring_pa = pcilib_kmem_get_pa(ctx->pcilib, ring);
-    
-    memset(data, 0, PCILIB_NWL_DMA_PAGES * PCILIB_NWL_DMA_DESCRIPTOR_SIZE);
-
-    for (i = 0; i < PCILIB_NWL_DMA_PAGES; i++, data += PCILIB_NWL_DMA_DESCRIPTOR_SIZE) {
-	buf_pa = pcilib_kmem_get_block_pa(ctx->pcilib, pages, i);
-	buf_sz = pcilib_kmem_get_block_size(ctx->pcilib, pages, i);
-
-	NWL_RING_SET(data, DMA_BD_NDESC_OFFSET, ring_pa + ((i + 1) % PCILIB_NWL_DMA_PAGES) * PCILIB_NWL_DMA_DESCRIPTOR_SIZE);
-	NWL_RING_SET(data, DMA_BD_BUFAL_OFFSET, buf_pa&0xFFFFFFFF);
-	NWL_RING_SET(data, DMA_BD_BUFAH_OFFSET, buf_pa>>32);
-#ifdef NWL_GENERATE_DMA_IRQ
-        NWL_RING_SET(data, DMA_BD_BUFL_CTRL_OFFSET, buf_sz | DMA_BD_INT_ERROR_MASK | DMA_BD_INT_COMP_MASK);
-#else /* NWL_GENERATE_DMA_IRQ */
-        NWL_RING_SET(data, DMA_BD_BUFL_CTRL_OFFSET, buf_sz);
-#endif /* NWL_GENERATE_DMA_IRQ */
-    }
-
-    val = ring_pa;
-    nwl_write_register(val, ctx, base, REG_DMA_ENG_NEXT_BD);
-    nwl_write_register(val, ctx, base, REG_SW_NEXT_BD);
-    
-    info->ring = ring;
-    info->pages = pages;
-    info->page_size = buf_sz;
-    info->ring_size = PCILIB_NWL_DMA_PAGES;
-    
-    info->head = 0;
-    info->tail = 0;
-    
-    return 0;
-}
-
-static int dma_nwl_start(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info) {
-    int err;
-    uint32_t ring_pa;
-    uint32_t val;
-
-    if (info->started) return 0;
-    
-    err = dma_nwl_allocate_engine_buffers(ctx, info);
-    if (err) return err;
-    
-    ring_pa = pcilib_kmem_get_pa(ctx->pcilib, info->ring);
-    nwl_write_register(ring_pa, ctx, info->base_addr, REG_DMA_ENG_NEXT_BD);
-    nwl_write_register(ring_pa, ctx, info->base_addr, REG_SW_NEXT_BD);
-
-    __sync_synchronize();
-
-    nwl_read_register(val, ctx, info->base_addr, REG_DMA_ENG_CTRL_STATUS);
-    val |= (DMA_ENG_ENABLE);
-    nwl_write_register(val, ctx, info->base_addr, REG_DMA_ENG_CTRL_STATUS);
-
-    __sync_synchronize();
-
-    if (info->desc.direction == PCILIB_DMA_FROM_DEVICE) {
-	ring_pa += (info->ring_size - 1) * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
-    	nwl_write_register(ring_pa, ctx, info->base_addr, REG_SW_NEXT_BD);
-//	nwl_read_register(val, ctx, info->base_addr, 0x18);
-
-	info->tail = 0;
-	info->head = (info->ring_size - 1);
-    } else {
-	info->tail = 0;
-	info->head = 0;
-    }
-    
-    info->started = 1;
-    
-    return 0;
-}
-
-static size_t dma_nwl_clean_buffers(nwl_dma_t * ctx, pcilib_nwl_engine_description_t *info) {
-    size_t res = 0;
-    uint32_t status, control;
-
-    unsigned char *ring = pcilib_kmem_get_ua(ctx->pcilib, info->ring);
-    ring += info->tail * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
-
-next_buffer:
-    status = NWL_RING_GET(ring, DMA_BD_BUFL_STATUS_OFFSET)&DMA_BD_STATUS_MASK;
-//  control = NWL_RING_GET(ring, DMA_BD_BUFL_CTRL_OFFSET)&DMA_BD_CTRL_MASK;
-    
-    if (status & DMA_BD_ERROR_MASK) {
-        pcilib_error("NWL DMA Engine reported error in ring descriptor");
-        return (size_t)-1;
-    }
-	
-    if (status & DMA_BD_SHORT_MASK) {
-        pcilib_error("NWL DMA Engine reported short error");
-        return (size_t)-1;
-    }
-	
-    if (status & DMA_BD_COMP_MASK) {
-	info->tail++;
-	if (info->tail == info->ring_size) {
-	    ring -= (info->tail - 1) * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
-	    info->tail = 0;
-	} else {
-	    ring += PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
-	}
-	
-	res++;
-
-	if (info->tail != info->head) goto next_buffer;
-    }
-    
-//    printf("====> Cleaned: %i\n", res);
-    return res;
-}
-
-
-static size_t dma_nwl_get_next_buffer(nwl_dma_t * ctx, pcilib_nwl_engine_description_t *info, size_t n_buffers, size_t timeout) {
-    struct timeval start, cur;
-
-    size_t res, n = 0;
-    size_t head;
-
-    for (head = info->head; (((head + 1)%info->ring_size) != info->tail)&&(n < n_buffers); head++, n++);
-    if (n == n_buffers) return info->head;
-
-    gettimeofday(&start, NULL);
-
-    res = dma_nwl_clean_buffers(ctx, info);
-    if (res == (size_t)-1) return PCILIB_DMA_BUFFER_INVALID;
-    else n += res;
-
-    
-    while (n < n_buffers) {
-	if (timeout != PCILIB_TIMEOUT_INFINITE) {
-	    gettimeofday(&cur, NULL);
-	    if  (((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)) > timeout) break;
-	}
-	
-	usleep (10);	
-
-        res = dma_nwl_clean_buffers(ctx, info);
-        if (res == (size_t)-1) return PCILIB_DMA_BUFFER_INVALID;
-	else if (res > 0) {
-	    gettimeofday(&start, NULL);
-	    n += res;
-	}
-    }
-    
-    if (n < n_buffers) return PCILIB_DMA_BUFFER_INVALID;
-    
-    return info->head;
-}
-
-static int dma_nwl_push_buffer(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info, size_t size, int eop, size_t timeout) {
-    int flags;
-    
-    uint32_t val;
-    unsigned char *ring = pcilib_kmem_get_ua(ctx->pcilib, info->ring);
-    uint32_t ring_pa = pcilib_kmem_get_pa(ctx->pcilib, info->ring);
-
-    ring += info->head * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
-
-    
-    if (!info->writting) {
-	flags |= DMA_BD_SOP_MASK;
-	info->writting = 1;
-    }
-    if (eop) {
-	flags |= DMA_BD_EOP_MASK;
-	info->writting = 0;
-    }
-
-    NWL_RING_SET(ring, DMA_BD_BUFL_CTRL_OFFSET, size|flags);
-    NWL_RING_SET(ring, DMA_BD_BUFL_STATUS_OFFSET, size);
-
-    info->head++;
-    if (info->head == info->ring_size) info->head = 0;
-    
-    val = ring_pa + info->head * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
-    nwl_write_register(val, ctx, info->base_addr, REG_SW_NEXT_BD);
-//    nwl_read_register(val, ctx, info->base_addr, 0x18);
-
-//    usleep(10000);
-
-//    nwl_read_register(val, ctx, info->base_addr, REG_DMA_ENG_LAST_BD);
-//    printf("Last BD(Write): %lx %lx\n", ring, val);
-    
-    
-    return 0;
-}
-
-
-static size_t dma_nwl_wait_buffer(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info, size_t *size, int *eop, size_t timeout) {
-    uint32_t val;
-    struct timeval start, cur;
-    uint32_t status_size, status, control;
-
-//    usleep(10000);
-    
-    unsigned char *ring = pcilib_kmem_get_ua(ctx->pcilib, info->ring);
-    
-//    status_size = NWL_RING_GET(ring, DMA_BD_BUFL_STATUS_OFFSET);
-//    printf("Status0: %lx\n", status_size);
-
-    ring += info->tail * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
-
-    gettimeofday(&start, NULL);
-    
-//    printf("Waiting %li\n", info->tail);
-//    nwl_read_register(val, ctx, info->base_addr, REG_DMA_ENG_LAST_BD);
-//    printf("Last BD(Read): %lx %lx\n", ring, val);
-
-    do {
-	status_size = NWL_RING_GET(ring, DMA_BD_BUFL_STATUS_OFFSET);
-	status = status_size & DMA_BD_STATUS_MASK;
-	
-//	printf("%i: %lx\n", info->tail, status_size);
-    
-	if (status & DMA_BD_ERROR_MASK) {
-    	    pcilib_error("NWL DMA Engine reported error in ring descriptor");
-    	    return (size_t)-1;
-	}	
-	
-	if (status & DMA_BD_COMP_MASK) {
-	    if (status & DMA_BD_EOP_MASK) *eop = 1;
-	    else *eop = 0;
-        
-	    *size = status_size & DMA_BD_BUFL_MASK;
-	
-//	    printf("Status: %lx\n", status_size);
-	    return info->tail;
-	}
-	
-	usleep(10);
-        gettimeofday(&cur, NULL);
-    } while ((timeout == PCILIB_TIMEOUT_INFINITE)||(((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)) < timeout));
-
-//    printf("Final status: %lx\n", status_size);
-    
-    return (size_t)-1;
-}
-
-static int dma_nwl_return_buffer(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info) {
-    uint32_t val;
-
-    unsigned char *ring = pcilib_kmem_get_ua(ctx->pcilib, info->ring);
-    uint32_t ring_pa = pcilib_kmem_get_pa(ctx->pcilib, info->ring);
-    size_t bufsz = pcilib_kmem_get_block_size(ctx->pcilib, info->pages, info->tail);
-
-    ring += info->tail * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
-//    printf("Returning: %i\n", info->tail);
-
-#ifdef NWL_GENERATE_DMA_IRQ    
-    NWL_RING_SET(ring, DMA_BD_BUFL_CTRL_OFFSET, bufsz | DMA_BD_INT_ERROR_MASK | DMA_BD_INT_COMP_MASK);
-#else /* NWL_GENERATE_DMA_IRQ */
-    NWL_RING_SET(ring, DMA_BD_BUFL_CTRL_OFFSET, bufsz);
-#endif /* NWL_GENERATE_DMA_IRQ */
-
-    NWL_RING_SET(ring, DMA_BD_BUFL_STATUS_OFFSET, 0);
-
-    val = ring_pa + info->tail * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
-    nwl_write_register(val, ctx, info->base_addr, REG_SW_NEXT_BD);
-//    nwl_read_register(val, ctx, info->base_addr, 0x18);
-    
-    info->tail++;
-    if (info->tail == info->ring_size) info->tail = 0;
-}
-    
-
-size_t dma_nwl_write_fragment(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, size_t timeout, void *data) {
+int dma_nwl_write_fragment(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, size_t timeout, void *data, size_t *written) {
     int err;
     size_t pos;
     size_t bufnum;
@@ -520,26 +226,40 @@ size_t dma_nwl_write_fragment(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dm
     pcilib_nwl_engine_description_t *info = ctx->engines + dma;
 
     err = dma_nwl_start(ctx, info);
-    if (err) return 0;
+    if (err) return err;
 
-    for (pos = 0; pos < size; pos += info->page_size) {
-	int block_size = min2(size - pos, info->page_size);
+    if (data) {
+	for (pos = 0; pos < size; pos += info->page_size) {
+	    int block_size = min2(size - pos, info->page_size);
     
-        bufnum = dma_nwl_get_next_buffer(ctx, info, 1, timeout);
-	if (bufnum == PCILIB_DMA_BUFFER_INVALID) return pos;
+    	    bufnum = dma_nwl_get_next_buffer(ctx, info, 1, timeout);
+	    if (bufnum == PCILIB_DMA_BUFFER_INVALID) {
+		if (written) *written = pos;
+		return PCILIB_ERROR_TIMEOUT;
+	    }
 	
-	    //sync
-        void *buf = pcilib_kmem_get_block_ua(ctx->pcilib, info->pages, bufnum);
-	memcpy(buf, data, block_size);
+    	    void *buf = pcilib_kmem_get_block_ua(ctx->pcilib, info->pages, bufnum);
+	    memcpy(buf, data, block_size);
 
-	err = dma_nwl_push_buffer(ctx, info, block_size, (flags&PCILIB_DMA_FLAG_EOP)&&((pos + block_size) == size), timeout);
-	if (err) return pos;
-    }    
+	    err = dma_nwl_push_buffer(ctx, info, block_size, (flags&PCILIB_DMA_FLAG_EOP)&&((pos + block_size) == size), timeout);
+	    if (err) {
+		if (written) *written = pos;
+		return err;
+	    }
+	}    
+    }
+    
+    if (written) *written = size;
+    
+    if (flags&PCILIB_DMA_FLAG_WAIT) {
+	bufnum =  dma_nwl_get_next_buffer(ctx, info, PCILIB_NWL_DMA_PAGES - 1, timeout);
+	if (bufnum == PCILIB_DMA_BUFFER_INVALID) return PCILIB_ERROR_TIMEOUT;
+    }
     
     return size;
 }
 
-size_t dma_nwl_stream_read(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, size_t timeout, pcilib_dma_callback_t cb, void *cbattr) {
+int dma_nwl_stream_read(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, size_t timeout, pcilib_dma_callback_t cb, void *cbattr) {
     int err, ret;
     size_t res = 0;
     size_t bufnum;
@@ -552,11 +272,11 @@ size_t dma_nwl_stream_read(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma,
     pcilib_nwl_engine_description_t *info = ctx->engines + dma;
 
     err = dma_nwl_start(ctx, info);
-    if (err) return 0;
+    if (err) return err;
 
     do {
         bufnum = dma_nwl_wait_buffer(ctx, info, &bufsize, &eop, timeout);
-	if (bufnum == PCILIB_DMA_BUFFER_INVALID) return 0;
+	if (bufnum == PCILIB_DMA_BUFFER_INVALID) return PCILIB_ERROR_TIMEOUT;
 
 #ifdef NWL_FIX_EOP_FOR_BIG_PACKETS
 	if (size > 65536) {
@@ -576,7 +296,7 @@ size_t dma_nwl_stream_read(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma,
 //	printf("%i %i %i (%li)\n", ret, res, eop, size);
     } while (ret);
     
-    return res;
+    return 0;
 }
 
 
@@ -670,8 +390,8 @@ double dma_nwl_benchmark(pcilib_dma_context_t *vctx, pcilib_dma_engine_addr_t dm
 	if (direction&PCILIB_DMA_TO_DEVICE) {
 	    memcpy(buf, cmp, size * sizeof(uint32_t));
 
-	    bytes = pcilib_write_dma(ctx->pcilib, writeid, addr, size * sizeof(uint32_t), buf);
-	    if (bytes != size * sizeof(uint32_t)) {
+	    err = pcilib_write_dma(ctx->pcilib, writeid, addr, size * sizeof(uint32_t), buf, &bytes);
+	    if ((err)||(bytes != size * sizeof(uint32_t))) {
 		error = "Write failed";
 	        break;
 	    }
@@ -679,12 +399,12 @@ double dma_nwl_benchmark(pcilib_dma_context_t *vctx, pcilib_dma_engine_addr_t dm
 
 	memset(buf, 0, size * sizeof(uint32_t));
         
-	bytes = pcilib_read_dma(ctx->pcilib, readid, addr, size * sizeof(uint32_t), buf);
+	err = pcilib_read_dma(ctx->pcilib, readid, addr, size * sizeof(uint32_t), buf, &bytes);
         gettimeofday(&cur, NULL);
 	us += ((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec));
 
 
-	if (bytes != size * sizeof(uint32_t)) {
+	if ((err)||(bytes != size * sizeof(uint32_t))) {
 //	     printf("RF: %li %li\n", bytes, size * 4);
 	     error = "Read failed";
 	     break;
diff --git a/dma/nwl_buffers.h b/dma/nwl_buffers.h
new file mode 100644
index 0000000..8d01650
--- /dev/null
+++ b/dma/nwl_buffers.h
@@ -0,0 +1,294 @@
+#define NWL_RING_GET(data, offset)  *(uint32_t*)(((char*)(data)) + (offset))
+#define NWL_RING_SET(data, offset, val)  *(uint32_t*)(((char*)(data)) + (offset)) = (val)
+#define NWL_RING_UPDATE(data, offset, mask, val) *(uint32_t*)(((char*)(data)) + (offset)) = ((*(uint32_t*)(((char*)(data)) + (offset)))&(mask))|(val)
+
+int dma_nwl_allocate_engine_buffers(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info) {
+    int err = 0;
+
+    int i;
+    uint32_t val;
+    uint32_t buf_sz;
+    uint64_t buf_pa;
+
+    char *base = info->base_addr;
+    
+    if (info->pages) return 0;
+    
+    pcilib_kmem_handle_t *ring = pcilib_alloc_kernel_memory(ctx->pcilib, PCILIB_KMEM_TYPE_CONSISTENT, 1, PCILIB_NWL_DMA_PAGES * PCILIB_NWL_DMA_DESCRIPTOR_SIZE, PCILIB_NWL_ALIGNMENT, PCILIB_KMEM_USE(PCILIB_KMEM_USE_DMA, info->desc.addr), 0);
+    pcilib_kmem_handle_t *pages = pcilib_alloc_kernel_memory(ctx->pcilib, PCILIB_KMEM_TYPE_PAGE, PCILIB_NWL_DMA_PAGES, 0, 0, PCILIB_KMEM_USE(PCILIB_KMEM_USE_DMA, info->desc.addr), 0);
+
+    if ((ring)&&(pages)) err = dma_nwl_sync_buffers(ctx, info, pages);
+    else err = PCILIB_ERROR_FAILED;
+
+
+    if (err) {
+	if (pages) pcilib_free_kernel_memory(ctx->pcilib, pages);
+	if (ring) pcilib_free_kernel_memory(ctx->pcilib, ring);    
+	return err;
+    }
+    
+    unsigned char *data = (unsigned char*)pcilib_kmem_get_ua(ctx->pcilib, ring);
+    uint32_t ring_pa = pcilib_kmem_get_pa(ctx->pcilib, ring);
+    
+    memset(data, 0, PCILIB_NWL_DMA_PAGES * PCILIB_NWL_DMA_DESCRIPTOR_SIZE);
+
+    for (i = 0; i < PCILIB_NWL_DMA_PAGES; i++, data += PCILIB_NWL_DMA_DESCRIPTOR_SIZE) {
+	buf_pa = pcilib_kmem_get_block_pa(ctx->pcilib, pages, i);
+	buf_sz = pcilib_kmem_get_block_size(ctx->pcilib, pages, i);
+
+	NWL_RING_SET(data, DMA_BD_NDESC_OFFSET, ring_pa + ((i + 1) % PCILIB_NWL_DMA_PAGES) * PCILIB_NWL_DMA_DESCRIPTOR_SIZE);
+	NWL_RING_SET(data, DMA_BD_BUFAL_OFFSET, buf_pa&0xFFFFFFFF);
+	NWL_RING_SET(data, DMA_BD_BUFAH_OFFSET, buf_pa>>32);
+#ifdef NWL_GENERATE_DMA_IRQ
+        NWL_RING_SET(data, DMA_BD_BUFL_CTRL_OFFSET, buf_sz | DMA_BD_INT_ERROR_MASK | DMA_BD_INT_COMP_MASK);
+#else /* NWL_GENERATE_DMA_IRQ */
+        NWL_RING_SET(data, DMA_BD_BUFL_CTRL_OFFSET, buf_sz);
+#endif /* NWL_GENERATE_DMA_IRQ */
+    }
+
+    val = ring_pa;
+    nwl_write_register(val, ctx, base, REG_DMA_ENG_NEXT_BD);
+    nwl_write_register(val, ctx, base, REG_SW_NEXT_BD);
+    
+    info->ring = ring;
+    info->pages = pages;
+    info->page_size = buf_sz;
+    info->ring_size = PCILIB_NWL_DMA_PAGES;
+    
+    info->head = 0;
+    info->tail = 0;
+    
+    return 0;
+}
+
+
+static int dma_nwl_start(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info) {
+    int err;
+    uint32_t ring_pa;
+    uint32_t val;
+
+    if (info->started) return 0;
+    
+    err = dma_nwl_allocate_engine_buffers(ctx, info);
+    if (err) return err;
+    
+    ring_pa = pcilib_kmem_get_pa(ctx->pcilib, info->ring);
+    nwl_write_register(ring_pa, ctx, info->base_addr, REG_DMA_ENG_NEXT_BD);
+    nwl_write_register(ring_pa, ctx, info->base_addr, REG_SW_NEXT_BD);
+
+    __sync_synchronize();
+
+    nwl_read_register(val, ctx, info->base_addr, REG_DMA_ENG_CTRL_STATUS);
+    val |= (DMA_ENG_ENABLE);
+    nwl_write_register(val, ctx, info->base_addr, REG_DMA_ENG_CTRL_STATUS);
+
+    __sync_synchronize();
+
+    if (info->desc.direction == PCILIB_DMA_FROM_DEVICE) {
+	ring_pa += (info->ring_size - 1) * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+    	nwl_write_register(ring_pa, ctx, info->base_addr, REG_SW_NEXT_BD);
+//	nwl_read_register(val, ctx, info->base_addr, 0x18);
+
+	info->tail = 0;
+	info->head = (info->ring_size - 1);
+    } else {
+	info->tail = 0;
+	info->head = 0;
+    }
+    
+    info->started = 1;
+    
+    return 0;
+}
+
+static size_t dma_nwl_clean_buffers(nwl_dma_t * ctx, pcilib_nwl_engine_description_t *info) {
+    size_t res = 0;
+    uint32_t status, control;
+
+    unsigned char *ring = pcilib_kmem_get_ua(ctx->pcilib, info->ring);
+    ring += info->tail * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+
+next_buffer:
+    status = NWL_RING_GET(ring, DMA_BD_BUFL_STATUS_OFFSET)&DMA_BD_STATUS_MASK;
+//  control = NWL_RING_GET(ring, DMA_BD_BUFL_CTRL_OFFSET)&DMA_BD_CTRL_MASK;
+    
+    if (status & DMA_BD_ERROR_MASK) {
+        pcilib_error("NWL DMA Engine reported error in ring descriptor");
+        return (size_t)-1;
+    }
+	
+    if (status & DMA_BD_SHORT_MASK) {
+        pcilib_error("NWL DMA Engine reported short error");
+        return (size_t)-1;
+    }
+	
+    if (status & DMA_BD_COMP_MASK) {
+	info->tail++;
+	if (info->tail == info->ring_size) {
+	    ring -= (info->tail - 1) * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+	    info->tail = 0;
+	} else {
+	    ring += PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+	}
+	
+	res++;
+
+	if (info->tail != info->head) goto next_buffer;
+    }
+    
+//    printf("====> Cleaned: %i\n", res);
+    return res;
+}
+
+
+static size_t dma_nwl_get_next_buffer(nwl_dma_t * ctx, pcilib_nwl_engine_description_t *info, size_t n_buffers, size_t timeout) {
+    struct timeval start, cur;
+
+    size_t res, n = 0;
+    size_t head;
+
+    for (head = info->head; (((head + 1)%info->ring_size) != info->tail)&&(n < n_buffers); head++, n++);
+    if (n == n_buffers) return info->head;
+
+    gettimeofday(&start, NULL);
+
+    res = dma_nwl_clean_buffers(ctx, info);
+    if (res == (size_t)-1) return PCILIB_DMA_BUFFER_INVALID;
+    else n += res;
+
+    
+    while (n < n_buffers) {
+	if (timeout != PCILIB_TIMEOUT_INFINITE) {
+	    gettimeofday(&cur, NULL);
+	    if  (((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)) > timeout) break;
+	}
+	
+	usleep (10);	
+
+        res = dma_nwl_clean_buffers(ctx, info);
+        if (res == (size_t)-1) return PCILIB_DMA_BUFFER_INVALID;
+	else if (res > 0) {
+	    gettimeofday(&start, NULL);
+	    n += res;
+	}
+    }
+    
+    if (n < n_buffers) return PCILIB_DMA_BUFFER_INVALID;
+    
+    return info->head;
+}
+
+static int dma_nwl_push_buffer(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info, size_t size, int eop, size_t timeout) {
+    int flags;
+    
+    uint32_t val;
+    unsigned char *ring = pcilib_kmem_get_ua(ctx->pcilib, info->ring);
+    uint32_t ring_pa = pcilib_kmem_get_pa(ctx->pcilib, info->ring);
+
+    ring += info->head * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+
+    
+    if (!info->writting) {
+	flags |= DMA_BD_SOP_MASK;
+	info->writting = 1;
+    }
+    if (eop) {
+	flags |= DMA_BD_EOP_MASK;
+	info->writting = 0;
+    }
+
+    NWL_RING_SET(ring, DMA_BD_BUFL_CTRL_OFFSET, size|flags);
+    NWL_RING_SET(ring, DMA_BD_BUFL_STATUS_OFFSET, size);
+
+    info->head++;
+    if (info->head == info->ring_size) info->head = 0;
+    
+    val = ring_pa + info->head * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+    nwl_write_register(val, ctx, info->base_addr, REG_SW_NEXT_BD);
+//    nwl_read_register(val, ctx, info->base_addr, 0x18);
+
+//    usleep(10000);
+
+//    nwl_read_register(val, ctx, info->base_addr, REG_DMA_ENG_LAST_BD);
+//    printf("Last BD(Write): %lx %lx\n", ring, val);
+    
+    
+    return 0;
+}
+
+
+static size_t dma_nwl_wait_buffer(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info, size_t *size, int *eop, size_t timeout) {
+    uint32_t val;
+    struct timeval start, cur;
+    uint32_t status_size, status, control;
+
+//    usleep(10000);
+    
+    unsigned char *ring = pcilib_kmem_get_ua(ctx->pcilib, info->ring);
+    
+//    status_size = NWL_RING_GET(ring, DMA_BD_BUFL_STATUS_OFFSET);
+//    printf("Status0: %lx\n", status_size);
+
+    ring += info->tail * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+
+    gettimeofday(&start, NULL);
+    
+//    printf("Waiting %li\n", info->tail);
+//    nwl_read_register(val, ctx, info->base_addr, REG_DMA_ENG_LAST_BD);
+//    printf("Last BD(Read): %lx %lx\n", ring, val);
+
+    do {
+	status_size = NWL_RING_GET(ring, DMA_BD_BUFL_STATUS_OFFSET);
+	status = status_size & DMA_BD_STATUS_MASK;
+	
+//	printf("%i: %lx\n", info->tail, status_size);
+    
+	if (status & DMA_BD_ERROR_MASK) {
+    	    pcilib_error("NWL DMA Engine reported error in ring descriptor");
+    	    return (size_t)-1;
+	}	
+	
+	if (status & DMA_BD_COMP_MASK) {
+	    if (status & DMA_BD_EOP_MASK) *eop = 1;
+	    else *eop = 0;
+        
+	    *size = status_size & DMA_BD_BUFL_MASK;
+	
+//	    printf("Status: %lx\n", status_size);
+	    return info->tail;
+	}
+	
+	usleep(10);
+        gettimeofday(&cur, NULL);
+    } while ((timeout == PCILIB_TIMEOUT_INFINITE)||(((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)) < timeout));
+
+//    printf("Final status: %lx\n", status_size);
+    
+    return (size_t)-1;
+}
+
+static int dma_nwl_return_buffer(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info) {
+    uint32_t val;
+
+    unsigned char *ring = pcilib_kmem_get_ua(ctx->pcilib, info->ring);
+    uint32_t ring_pa = pcilib_kmem_get_pa(ctx->pcilib, info->ring);
+    size_t bufsz = pcilib_kmem_get_block_size(ctx->pcilib, info->pages, info->tail);
+
+    ring += info->tail * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+//    printf("Returning: %i\n", info->tail);
+
+#ifdef NWL_GENERATE_DMA_IRQ    
+    NWL_RING_SET(ring, DMA_BD_BUFL_CTRL_OFFSET, bufsz | DMA_BD_INT_ERROR_MASK | DMA_BD_INT_COMP_MASK);
+#else /* NWL_GENERATE_DMA_IRQ */
+    NWL_RING_SET(ring, DMA_BD_BUFL_CTRL_OFFSET, bufsz);
+#endif /* NWL_GENERATE_DMA_IRQ */
+
+    NWL_RING_SET(ring, DMA_BD_BUFL_STATUS_OFFSET, 0);
+
+    val = ring_pa + info->tail * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+    nwl_write_register(val, ctx, info->base_addr, REG_SW_NEXT_BD);
+//    nwl_read_register(val, ctx, info->base_addr, 0x18);
+    
+    info->tail++;
+    if (info->tail == info->ring_size) info->tail = 0;
+}
diff --git a/dma/nwl_dma.h b/dma/nwl_dma.h
index 8468f52..b8d6cce 100644
--- a/dma/nwl_dma.h
+++ b/dma/nwl_dma.h
@@ -11,8 +11,8 @@ typedef struct nwl_dma_s nwl_dma_t;
 pcilib_dma_context_t *dma_nwl_init(pcilib_t *ctx);
 void  dma_nwl_free(pcilib_dma_context_t *vctx);
 
-size_t dma_nwl_write_fragment(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, size_t timeout, void *data);
-size_t dma_nwl_stream_read(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, size_t timeout, pcilib_dma_callback_t cb, void *cbattr);
+int dma_nwl_write_fragment(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, size_t timeout, void *data, size_t *written);
+int dma_nwl_stream_read(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, size_t timeout, pcilib_dma_callback_t cb, void *cbattr);
 double dma_nwl_benchmark(pcilib_dma_context_t *vctx, pcilib_dma_engine_addr_t dma, uintptr_t addr, size_t size, size_t iterations, pcilib_dma_direction_t direction);
 
 
-- 
cgit v1.2.3