Skip to content

Commit

Permalink
Fix logging filesystem failing to initialize with high number of logs (
Browse files Browse the repository at this point in the history
…#1294)

### Changelist 
<!-- Give a list of the changes covered in this PR. This will help both
you and the reviewer keep this PR within scope. -->

On master, if there are a significant number of CAN logs present on the
SD card (somewhere in the range 50-100) the watchdog trips at startup.
This means the VC never gets past init.

Root cause: Currently, mounting the filesystem and starting logging
takes about 250ms for every 100 logs on the SD card, and scales
linearly. There is no significant correlation with the size of each log
file. Since the watchdog trips if not checked in every 200ms, it just
simply takes too long. Note that we start a new log after every boot.

Solution: I think anything less than a second of extra delay at bootup
is not a big deal, so I don't think this motivates any big changes to
our logging. There is no way to stop the STM32 watchdog peripheral once
its started, so instead I added a `tasks_preInitWatchdog` function which
runs after all peripherals are initialized, but before the watchdog is.
So, we initialize logging in here and then can proceed like normal.

Also, I added some CAN signals so there will be visibility on when we've
logged a significant amount of data.

### Testing Done
<!-- Outline the testing that was done to demonstrate the changes are
solid. This could be unit tests, integration tests, testing on the car,
etc. Include relevant code snippets, screenshots, etc as needed. -->

Tested on VC #2. Able to init logging in ~500ms total with 200 logs
without the watchdog tripping, and the VC behaves like normal after
that. Tested with large files as well (>>100MB) without any slowdown.
Performance is approximately O(N) at all values tested.

Also, interestingly it seems that different SD cards are faster than
others. The KOOTION 64GB Micro SDXC card is approximately 2x as fast as
the Lexar 300x 32GB SDHC card.
  • Loading branch information
gtaharaedmonds authored Jun 4, 2024
1 parent aedcaf1 commit c2c247c
Show file tree
Hide file tree
Showing 8 changed files with 69 additions and 26 deletions.
4 changes: 4 additions & 0 deletions can_bus/quadruna/VC/VC_alerts.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@
"BoostControllerFault": {
"id": 560,
"description": "Boost controller has faulted."
},
"HighNumberOfCanDataLogs": {
"id": 561,
"description": "High number of CAN data logs (>200) on the SD card, which slows down mounting the filesystem."
}
},
"faults": {
Expand Down
5 changes: 5 additions & 0 deletions can_bus/quadruna/VC/VC_tx.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
},
"BuzzerOn": {
"bits": 1
},
"NumberOfCanDataLogs": {
"resolution": 1,
"min": 0,
"max": 1000
}
}
},
Expand Down
2 changes: 1 addition & 1 deletion firmware/quadruna/VC/src/cubemx/Src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -708,7 +708,7 @@ static void MX_I2C2_Init(void)
static void MX_IWDG1_Init(void)
{
/* USER CODE BEGIN IWDG1_Init 0 */

tasks_preInitWatchdog();
/* USER CODE END IWDG1_Init 0 */

/* USER CODE BEGIN IWDG1_Init 1 */
Expand Down
48 changes: 31 additions & 17 deletions firmware/quadruna/VC/src/tasks.c
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,33 @@ void tasks_preInit(void)
hw_bootup_enableInterruptsForApp();
}

void tasks_preInitWatchdog(void)
{
hw_sd_init(&sd);

if (loggingEnabled())
{
if (io_fileSystem_init() == FILE_OK)
{
io_canLogging_init(&canLogging_config);

// Empirically, mounting slows down (takes ~500ms) at 200 CAN logs on disk.
// This is not correlated to the size of each file.
app_canTx_VC_NumberOfCanDataLogs_set(io_canLogging_getCurrentLog());
app_canAlerts_VC_Warning_HighNumberOfCanDataLogs_set(
io_canLogging_getCurrentLog() > HIGH_NUMBER_OF_LOGS_THRESHOLD);
}
else
{
can_logging_enable = false;
}
}
else
{
can_logging_enable = false;
}
}

void tasks_init(void)
{
// Configure and initialize SEGGER SystemView.
Expand All @@ -361,7 +388,6 @@ void tasks_init(void)

hw_hardFaultHandler_init();
hw_can_init(&can);
hw_sd_init(&sd);
hw_watchdog_init(hw_watchdogConfig_refresh, hw_watchdogConfig_timeoutCallback);

HAL_ADC_Start_DMA(&hadc1, (uint32_t *)hw_adc_getRawValuesBuffer(), hadc1.Init.NbrOfConversion);
Expand All @@ -388,22 +414,6 @@ void tasks_init(void)
Error_Handler();
}

if (loggingEnabled())
{
if (io_fileSystem_init() == FILE_OK)
{
io_canLogging_init(&canLogging_config);
}
else
{
can_logging_enable = false;
}
}
else
{
can_logging_enable = false;
}

if (!io_imu_init())
{
app_canAlerts_VC_Warning_ImuInitFailed_set(true);
Expand Down Expand Up @@ -529,6 +539,8 @@ _Noreturn void tasks_runCanTx(void)
CanMsg tx_msg;
io_can_popTxMsgFromQueue(&tx_msg);
io_telemMessage_pushMsgtoQueue(&tx_msg);
io_canLogging_pushTxMsgToQueue(&tx_msg);

io_can_transmitMsgFromQueue(&tx_msg);
}
}
Expand All @@ -550,6 +562,8 @@ _Noreturn void tasks_runCanRx(void)
CanMsg rx_msg;
io_can_popRxMsgFromQueue(&rx_msg);
io_telemMessage_pushMsgtoQueue(&rx_msg);
io_canLogging_pushTxMsgToQueue(&rx_msg);

JsonCanMsg jsoncan_rx_msg;
io_jsoncan_copyFromCanMsg(&rx_msg, &jsoncan_rx_msg);
io_canRx_updateRxTableWithMessage(&jsoncan_rx_msg);
Expand Down
1 change: 1 addition & 0 deletions firmware/quadruna/VC/src/tasks.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

void tasks_preInitWatchdog(void);
void tasks_preInit(void);
void tasks_init(void);
_Noreturn void tasks_run1Hz(void);
Expand Down
7 changes: 6 additions & 1 deletion firmware/shared/src/io/io_canLogging.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
static const CanConfig *config;
#define QUEUE_SIZE 2048
#define QUEUE_BYTES sizeof(CanMsgLog) * QUEUE_SIZE
#define PATH_LENGTH 10
#define PATH_LENGTH 16
static osMessageQueueId_t message_queue_id;
static StaticQueue_t queue_control_block;
static uint8_t queue_buf[QUEUE_BYTES];
Expand Down Expand Up @@ -155,3 +155,8 @@ int io_canLogging_sync(void)
}
return 0;
}

uint32_t io_canLogging_getCurrentLog(void)
{
return current_bootcount;
}
11 changes: 11 additions & 0 deletions firmware/shared/src/io/io_canLogging.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
#include "io_can.h"
#include "hw_sd.h"

// Empirically determined number which corresponds to the time to mount the filesystem, plus the time to create a new
// blank CAN log, taking approximately 500ms. This was found using logfs (not littlefs, there is probably a different
// threshold). The time to mount/create a log both scale linearly with logfs, so for example if there are 400 logs then
// then the time will be ~1s. Proceed with caution creating a significant amount of logs above this threshold.
#define HIGH_NUMBER_OF_LOGS_THRESHOLD (200U)

typedef struct
{
uint32_t id : 11;
Expand Down Expand Up @@ -33,3 +39,8 @@ int io_canLogging_recordMsgFromQueue(void);
void io_canLogging_loggingQueuePush(CanMsg *rx_msg);

int io_canLogging_sync(void);

/**
* Return the number of the current CAN data log.
*/
uint32_t io_canLogging_getCurrentLog(void);
17 changes: 10 additions & 7 deletions firmware/shared/src/io/io_fileSystem_logfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,16 @@ static LogFsErr logfsCfgRead(const LogFsCfg *cfg, uint32_t block, void *buf);
static LogFsErr logfsCfgWrite(const LogFsCfg *cfg, uint32_t block, void *buf);

static uint8_t cache[HW_DEVICE_SECTOR_SIZE];
static const LogFsCfg fs_cfg = { .block_count = 1000000,
.block_size = HW_DEVICE_SECTOR_SIZE,
.cache = cache,
.rd_only = false,
.read = logfsCfgRead,
.write = logfsCfgWrite };
static LogFs fs;
static const LogFsCfg fs_cfg = {
.block_count = 1024 * 1024,
.block_size = HW_DEVICE_SECTOR_SIZE,
.cache = cache,
.rd_only = false,
.read = logfsCfgRead,
.write = logfsCfgWrite,
.write_cycles = 0 // Disable wear levelling.
};
static LogFs fs;

static LogFsFileCfg files_cfg[MAX_FILE_NUMBER];
static LogFsFile files[MAX_FILE_NUMBER];
Expand Down

0 comments on commit c2c247c

Please sign in to comment.