Abstract the orion_nand_fast_block_write() routine into a separate routine -- arm_nandwrite() -- so that other ARM cores can reuse it. Have davinci_nand do so. This faster than byte-at-a-time ops by a factor of three (!), even given the slowish interactions to support hardware ECC (1-bit flavor in that test) each 512 bytes; those could be read more efficiently by on-chip code. NOTE that until there's a generic "ARM algorithm" structure, this can't work on newer ARMv6 (like ARM1136) or ARMv7A (like Cortex-A8) cores, though the downloaded code itself would work just fine there. git-svn-id: svn://svn.berlios.de/openocd/trunk@2663 b42882b7-edfa-0310-969c-e2dbd0fdcd60tags/v0.3.0-rc0
@@ -6,6 +6,7 @@ AM_CPPFLAGS = \ | |||
METASOURCES = AUTO | |||
noinst_LTLIBRARIES = libflash.la | |||
libflash_la_SOURCES = \ | |||
arm_nandio.c \ | |||
flash.c \ | |||
lpc2000.c \ | |||
cfi.c \ | |||
@@ -38,6 +39,7 @@ libflash_la_SOURCES = \ | |||
avrf.c | |||
noinst_HEADERS = \ | |||
arm_nandio.h \ | |||
flash.h \ | |||
lpc2000.h \ | |||
cfi.h \ | |||
@@ -0,0 +1,131 @@ | |||
/* | |||
* Copyright (C) 2009 by Marvell Semiconductors, Inc. | |||
* Written by Nicolas Pitre <nico at marvell.com> | |||
* | |||
* Copyright (C) 2009 by David Brownell | |||
* | |||
* This program is free software; you can redistribute it and/or modify | |||
* it under the terms of the GNU General Public License as published by | |||
* the Free Software Foundation; either version 2 of the License, or | |||
* (at your option) any later version. | |||
* | |||
* This program is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
* GNU General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU General Public License | |||
* along with this program; if not, write to the | |||
* Free Software Foundation, Inc., | |||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |||
*/ | |||
#ifdef HAVE_CONFIG_H | |||
#include "config.h" | |||
#endif | |||
#include "arm_nandio.h" | |||
#include "armv4_5.h" | |||
/* | |||
* ARM-specific bulk write from buffer to address of 8-bit wide NAND. | |||
* For now this only supports ARMv4 and ARMv5 cores. | |||
* | |||
* Enhancements to target_run_algorithm() could enable: | |||
* - faster writes: on ARMv5+ don't setup/teardown hardware breakpoint | |||
* - ARMv6 and ARMv7 cores in ARM mode | |||
* | |||
* Different code fragments could handle: | |||
* - Thumb2 cores like Cortex-M (needs different byteswapping) | |||
* - 16-bit wide data (needs different setup too) | |||
*/ | |||
int arm_nandwrite(struct arm_nand_data *nand, uint8_t *data, int size) | |||
{ | |||
target_t *target = nand->target; | |||
armv4_5_algorithm_t algo; | |||
reg_param_t reg_params[3]; | |||
uint32_t target_buf; | |||
int retval; | |||
/* Inputs: | |||
* r0 NAND data address (byte wide) | |||
* r1 buffer address | |||
* r2 buffer length | |||
*/ | |||
static const uint32_t code[] = { | |||
0xe4d13001, /* s: ldrb r3, [r1], #1 */ | |||
0xe5c03000, /* strb r3, [r0] */ | |||
0xe2522001, /* subs r2, r2, #1 */ | |||
0x1afffffb, /* bne s */ | |||
/* exit: ARMv4 needs hardware breakpoint */ | |||
0xe1200070, /* e: bkpt #0 */ | |||
}; | |||
if (!nand->copy_area) { | |||
uint8_t code_buf[sizeof(code)]; | |||
unsigned i; | |||
/* make sure we have a working area */ | |||
if (target_alloc_working_area(target, | |||
sizeof(code) + nand->chunk_size, | |||
&nand->copy_area) != ERROR_OK) { | |||
LOG_DEBUG("%s: no %d byte buffer", | |||
__FUNCTION__, | |||
(int) sizeof(code) + nand->chunk_size); | |||
return ERROR_NAND_NO_BUFFER; | |||
} | |||
/* buffer code in target endianness */ | |||
for (i = 0; i < sizeof(code) / 4; i++) | |||
target_buffer_set_u32(target, code_buf + i * 4, code[i]); | |||
/* copy code to work area */ | |||
retval = target_write_memory(target, | |||
nand->copy_area->address, | |||
4, sizeof(code) / 4, code_buf); | |||
if (retval != ERROR_OK) | |||
return retval; | |||
} | |||
/* copy data to work area */ | |||
target_buf = nand->copy_area->address + sizeof(code); | |||
retval = target_bulk_write_memory(target, target_buf, size / 4, data); | |||
if (retval == ERROR_OK && (size & 3) != 0) | |||
retval = target_write_memory(target, | |||
target_buf + (size & ~3), | |||
1, size & 3, data + (size & ~3)); | |||
if (retval != ERROR_OK) | |||
return retval; | |||
/* set up algorithm and parameters */ | |||
algo.common_magic = ARMV4_5_COMMON_MAGIC; | |||
algo.core_mode = ARMV4_5_MODE_SVC; | |||
algo.core_state = ARMV4_5_STATE_ARM; | |||
init_reg_param(®_params[0], "r0", 32, PARAM_IN); | |||
init_reg_param(®_params[1], "r1", 32, PARAM_IN); | |||
init_reg_param(®_params[2], "r2", 32, PARAM_IN); | |||
buf_set_u32(reg_params[0].value, 0, 32, nand->data); | |||
buf_set_u32(reg_params[1].value, 0, 32, target_buf); | |||
buf_set_u32(reg_params[2].value, 0, 32, size); | |||
/* use alg to write data from work area to NAND chip */ | |||
retval = target_run_algorithm(target, 0, NULL, 3, reg_params, | |||
nand->copy_area->address, | |||
nand->copy_area->address + sizeof(code) - 4, | |||
1000, &algo); | |||
if (retval != ERROR_OK) | |||
LOG_ERROR("error executing hosted NAND write"); | |||
destroy_reg_param(®_params[0]); | |||
destroy_reg_param(®_params[1]); | |||
destroy_reg_param(®_params[2]); | |||
return retval; | |||
} | |||
/* REVISIT do the same for bulk *read* too ... */ | |||
@@ -0,0 +1,25 @@ | |||
#ifndef __ARM_NANDIO_H | |||
#define __ARM_NANDIO_H | |||
#include "nand.h" | |||
#include "binarybuffer.h" | |||
struct arm_nand_data { | |||
/* target is proxy for some ARM core */ | |||
struct target_s *target; | |||
/* copy_area holds write-to-NAND loop and data to write */ | |||
struct working_area_s *copy_area; | |||
/* chunk_size == page or ECC unit */ | |||
unsigned chunk_size; | |||
/* data == where to write the data */ | |||
uint32_t data; | |||
/* currently implicit: data width == 8 bits (not 16) */ | |||
}; | |||
int arm_nandwrite(struct arm_nand_data *nand, uint8_t *data, int size); | |||
#endif /* __ARM_NANDIO_H */ |
@@ -28,7 +28,7 @@ | |||
#include "config.h" | |||
#endif | |||
#include "nand.h" | |||
#include "arm_nandio.h" | |||
enum ecc { | |||
@@ -51,6 +51,9 @@ struct davinci_nand { | |||
uint32_t cmd; /* with CLE */ | |||
uint32_t addr; /* with ALE */ | |||
/* write acceleration */ | |||
struct arm_nand_data io; | |||
/* page i/o for the relevant flavor of hardware ECC */ | |||
int (*read_page)(struct nand_device_s *nand, uint32_t page, | |||
uint8_t *data, uint32_t data_size, uint8_t *oob, uint32_t oob_size); | |||
@@ -181,7 +184,7 @@ static int davinci_read_data(struct nand_device_s *nand, void *data) | |||
return ERROR_OK; | |||
} | |||
/* REVISIT a bit of native code should let block I/O be MUCH faster */ | |||
/* REVISIT a bit of native code should let block reads be MUCH faster */ | |||
static int davinci_read_block_data(struct nand_device_s *nand, | |||
uint8_t *data, int data_size) | |||
@@ -223,10 +226,17 @@ static int davinci_write_block_data(struct nand_device_s *nand, | |||
target_t *target = info->target; | |||
uint32_t nfdata = info->data; | |||
uint32_t tmp; | |||
int status; | |||
if (!halted(target, "write_block")) | |||
return ERROR_NAND_OPERATION_FAILED; | |||
/* try the fast way first */ | |||
status = arm_nandwrite(&info->io, data, data_size); | |||
if (status != ERROR_NAND_NO_BUFFER) | |||
return status; | |||
/* else do it slowly */ | |||
while (data_size >= 4) { | |||
tmp = le_to_h_u32(data); | |||
target_write_u32(target, nfdata, tmp); | |||
@@ -285,6 +295,12 @@ static int davinci_write_page(struct nand_device_s *nand, uint32_t page, | |||
memset(oob, 0x0ff, oob_size); | |||
} | |||
/* REVISIT avoid wasting SRAM: unless nand->use_raw is set, | |||
* use 512 byte chunks. Read side support will often want | |||
* to include oob_size ... | |||
*/ | |||
info->io.chunk_size = nand->page_size; | |||
status = info->write_page(nand, page, data, data_size, oob, oob_size); | |||
free(ooballoc); | |||
return status; | |||
@@ -700,6 +716,9 @@ static int davinci_nand_device_command(struct command_context_s *cmd_ctx, | |||
nand->controller_priv = info; | |||
info->io.target = target; | |||
info->io.data = info->data; | |||
/* NOTE: for now we don't do any error correction on read. | |||
* Nothing else in OpenOCD currently corrects read errors, | |||
* and in any case it's *writing* that we care most about. | |||
@@ -223,5 +223,6 @@ extern int nand_init(struct command_context_s *cmd_ctx); | |||
#define ERROR_NAND_OPERATION_NOT_SUPPORTED (-1103) | |||
#define ERROR_NAND_DEVICE_NOT_PROBED (-1104) | |||
#define ERROR_NAND_ERROR_CORRECTION_FAILED (-1105) | |||
#define ERROR_NAND_NO_BUFFER (-1106) | |||
#endif /* NAND_H */ |
@@ -26,15 +26,15 @@ | |||
#include "config.h" | |||
#endif | |||
#include "nand.h" | |||
#include "arm_nandio.h" | |||
#include "armv4_5.h" | |||
#include "binarybuffer.h" | |||
typedef struct orion_nand_controller_s | |||
{ | |||
struct target_s *target; | |||
working_area_t *copy_area; | |||
struct arm_nand_data io; | |||
uint32_t cmd; | |||
uint32_t addr; | |||
@@ -99,78 +99,14 @@ static int orion_nand_slow_block_write(struct nand_device_s *device, uint8_t *da | |||
static int orion_nand_fast_block_write(struct nand_device_s *device, uint8_t *data, int size) | |||
{ | |||
orion_nand_controller_t *hw = device->controller_priv; | |||
target_t *target = hw->target; | |||
armv4_5_algorithm_t algo; | |||
reg_param_t reg_params[3]; | |||
uint32_t target_buf; | |||
int retval; | |||
static const uint32_t code[] = { | |||
0xe4d13001, /* ldrb r3, [r1], #1 */ | |||
0xe5c03000, /* strb r3, [r0] */ | |||
0xe2522001, /* subs r2, r2, #1 */ | |||
0x1afffffb, /* bne 0 */ | |||
0xeafffffe, /* b . */ | |||
}; | |||
int code_size = sizeof(code); | |||
if (!hw->copy_area) { | |||
uint8_t code_buf[code_size]; | |||
int i; | |||
/* make sure we have a working area */ | |||
if (target_alloc_working_area(target, | |||
code_size + device->page_size, | |||
&hw->copy_area) != ERROR_OK) | |||
{ | |||
return orion_nand_slow_block_write(device, data, size); | |||
} | |||
/* copy target instructions to target endianness */ | |||
for (i = 0; i < code_size/4; i++) | |||
target_buffer_set_u32(target, code_buf + i*4, code[i]); | |||
/* write code to working area */ | |||
retval = target_write_memory(target, | |||
hw->copy_area->address, | |||
4, code_size/4, code_buf); | |||
if (retval != ERROR_OK) | |||
return retval; | |||
} | |||
hw->io.chunk_size = device->page_size; | |||
retval = arm_nandwrite(&hw->io, data, size); | |||
if (retval == ERROR_NAND_NO_BUFFER) | |||
retval = orion_nand_slow_block_write(device, data, size); | |||
/* copy data to target's memory */ | |||
target_buf = hw->copy_area->address + code_size; | |||
retval = target_bulk_write_memory(target, target_buf, size/4, data); | |||
if (retval == ERROR_OK && size & 3) { | |||
retval = target_write_memory(target, | |||
target_buf + (size & ~3), | |||
1, size & 3, data + (size & ~3)); | |||
} | |||
if (retval != ERROR_OK) | |||
return retval; | |||
algo.common_magic = ARMV4_5_COMMON_MAGIC; | |||
algo.core_mode = ARMV4_5_MODE_SVC; | |||
algo.core_state = ARMV4_5_STATE_ARM; | |||
init_reg_param(®_params[0], "r0", 32, PARAM_IN); | |||
init_reg_param(®_params[1], "r1", 32, PARAM_IN); | |||
init_reg_param(®_params[2], "r2", 32, PARAM_IN); | |||
buf_set_u32(reg_params[0].value, 0, 32, hw->data); | |||
buf_set_u32(reg_params[1].value, 0, 32, target_buf); | |||
buf_set_u32(reg_params[2].value, 0, 32, size); | |||
retval = target_run_algorithm(target, 0, NULL, 3, reg_params, | |||
hw->copy_area->address, | |||
hw->copy_area->address + code_size - 4, | |||
1000, &algo); | |||
if (retval != ERROR_OK) | |||
LOG_ERROR("error executing hosted NAND write"); | |||
destroy_reg_param(®_params[0]); | |||
destroy_reg_param(®_params[1]); | |||
destroy_reg_param(®_params[2]); | |||
return retval; | |||
} | |||
@@ -224,6 +160,9 @@ int orion_nand_device_command(struct command_context_s *cmd_ctx, char *cmd, | |||
hw->cmd = base + (1 << cle); | |||
hw->addr = base + (1 << ale); | |||
hw->io.target = hw->target; | |||
hw->io.data = hw->data; | |||
return ERROR_OK; | |||
} | |||