Skip to content

Commit

Permalink
More robust faults on BMS (#1281)
Browse files Browse the repository at this point in the history
### Changelist 
<!-- Give a list of the changes covered in this PR. This will help both
you and the reviewer keep this PR within scope. -->

Made faults on BMS more robust by adding debounce times. This should
make the faults resistance against random noise or anything else leading
to intermittent bad measurements. Currently the values are 500ms for
voltages, 1000ms for temperatures, and 100ms for TS current. Also, added
descriptions for all alerts.

Also, made open wire check a warning so its now up to the driver whether
or not we want to shut down.

Also also, 45C overtemp condition during charging is now based on
`io_charger_isChargerConnected` instead of just checking current. This
means we'll keep using the 60C value when driving with regen.

### Testing Done
<!-- Outline the testing that was done to demonstrate the changes are
solid. This could be unit tests, integration tests, testing on the car,
etc. Include relevant code snippets, screenshots, etc as needed. -->

Updated unit tests.
  • Loading branch information
gtaharaedmonds authored Jun 2, 2024
1 parent b2d8cbe commit b32bfbc
Show file tree
Hide file tree
Showing 13 changed files with 554 additions and 324 deletions.
94 changes: 47 additions & 47 deletions can_bus/quadruna/BMS/BMS_alerts.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,109 +6,109 @@
"warnings": {
"StackWaterMarkHighTask1Hz": {
"id": 101,
"description": "Example"
"description": "1Hz task stack watermark exceeded."
},
"StackWaterMarkHighTask100Hz": {
"id": 102,
"description": "Example"
"description": "100Hz task stack watermark exceeded."
},
"StackWaterMarkHighTask1kHz": {
"id": 103,
"description": "Example"
"description": "1kHz task stack watermark exceeded."
},
"StackWaterMarkHighTaskCanRx": {
"id": 104,
"description": "Example"
"description": "CAN RX task stack watermark exceeded."
},
"StackWaterMarkHighTaskCanTx": {
"id": 105,
"description": "Example"
"description": "CAN TX task stack watermark exceeded."
},
"WatchdogTimeout": {
"id": 106,
"description": "Example"
"description": "Watchdog timeout detected."
},
"TxOverflow": {
"id": 107,
"description": "Example"
"description": "CAN TX queue has overflowed."
},
"RxOverflow": {
"id": 108,
"description": "Example"
"description": "CAN RX queue has overflowed."
},
"MissingVCHeartbeat": {
"OpenWireCheckFault": {
"id": 109,
"description": "Example"
"description": "Open wire cell detected (cell voltage tap is open circuit)."
},
"OpenWireCheck_Segment0_GND": {
"id": 110,
"description": "Open wire cell detected in segment 0 (cell voltage tap is open circuit)."
},
"OpenWireCheck_Segment1_GND": {
"id": 111,
"description": "Open wire cell detected in segment 1 (cell voltage tap is open circuit)."
},
"OpenWireCheck_Segment2_GND": {
"id": 112,
"description": "Open wire cell detected in segment 2 (cell voltage tap is open circuit)."
},
"OpenWireCheck_Segment3_GND": {
"id": 113,
"description": "Open wire cell detected in segment 3 (cell voltage tap is open circuit)."
},
"OpenWireCheck_Segment4_GND": {
"id": 114,
"description": "Open wire cell detected in segment 4 (cell voltage tap is open circuit)."
},
"MissingVCHeartbeat": {
"id": 115,
"description": "Missing heartbeat from the VC."
}
},
"faults": {
"StateMachine": {
"id": 154,
"description": "Example"
"description": "BMS has transitioned to fault state."
},
"CellUndervoltage": {
"id": 155,
"description": "Example"
"description": "Cell is critically undervoltage (<3.0V)"
},
"CellOvervoltage": {
"id": 156,
"description": "Example"
"description": "Cell is critically overvoltage (>4.2V)"
},
"ModuleCommunicationError": {
"id": 157,
"description": "Example"
"description": "Failed to communicate with LTC6813 modules (>3x bad PEC15 error checks)."
},
"CellUndertemp": {
"id": 158,
"description": "Example"
"description": "Cell is critically undertemp (<-20degC)"
},
"CellOvertemp": {
"id": 159,
"description": "Example"
"description": "Cell is critically overtemp (>45degC if charging, >60degC if not)."
},
"Charger": {
"ChargerReportedError": {
"id": 160,
"description": "Example"
"description": "The charger reported an error."
},
"ChargerDisconnectedDuringCharge": {
"id": 161,
"description": "Example"
"description": "Charger was disconnected while in charge state."
},
"ChargerExternalShutdown": {
"ChargerShutdownLoopOpen": {
"id": 162,
"description": "Example"
"description": "AIR- opened during charging."
},
"TractiveSystemOvercurrent": {
"id": 163,
"description": "Example"
"description": "Tractive system overcurrent detected (<-265A or >70.8A)."
},
"PrechargeFailure": {
"id": 164,
"description": "Example"
},
"OpenWireCheckFault": {
"id": 165,
"description": "Example"
},
"OpenWireCheck_Segment0_GND": {
"id": 166,
"description": "Example"
},
"OpenWireCheck_Segment1_GND": {
"id": 167,
"description": "Example"
},
"OpenWireCheck_Segment2_GND": {
"id": 168,
"description": "Example"
},
"OpenWireCheck_Segment3_GND": {
"id": 169,
"description": "Example"
},
"OpenWireCheck_Segment4_GND": {
"id": 170,
"description": "Example"
"description": "Precharge failed 3x times."
}
}
}
77 changes: 49 additions & 28 deletions firmware/quadruna/BMS/src/app/app_accumulator.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "app_canRx.h"
#include "app_canAlerts.h"
#include "app_math.h"
#include "app_timer.h"
#include "ltc6813/io_ltc6813Shared.h"
#include "ltc6813/io_ltc6813CellVoltages.h"
#include "ltc6813/io_ltc6813CellTemps.h"
Expand Down Expand Up @@ -84,9 +85,9 @@ typedef struct

typedef struct
{
uint16_t owcStatus[ACCUMULATOR_NUM_SEGMENTS];
bool owcFaultGND[ACCUMULATOR_NUM_SEGMENTS];
bool owcGlobalFault;
uint16_t owc_status[ACCUMULATOR_NUM_SEGMENTS];
bool owc_fault_gnd[ACCUMULATOR_NUM_SEGMENTS];
bool owc_global_fault;
} OWCFaults;

typedef struct
Expand All @@ -111,6 +112,11 @@ static uint8_t open_wire_pu_readings;
static uint8_t open_wire_pd_readings;
static uint8_t owc_idle_cycles;

static TimerChannel under_voltage_fault_timer;
static TimerChannel over_voltage_fault_timer;
static TimerChannel under_temp_fault_timer;
static TimerChannel over_temp_fault_timer;

static void app_accumulator_calculateVoltageStats(void)
{
VoltageStats temp_voltage_stats = { .min_voltage = { .segment = 0U, .cell = 0U, .voltage = FLT_MAX },
Expand Down Expand Up @@ -244,6 +250,12 @@ void app_accumulator_init(void)
open_wire_pd_readings = 0;
owc_idle_cycles = 0;
data.owc_state = START_OPEN_WIRE_CHECK;

// Init fault debounce timers.
app_timer_init(&under_voltage_fault_timer, UNDER_VOLTAGE_DEBOUNCE_DURATION_MS);
app_timer_init(&over_voltage_fault_timer, OVER_VOLTAGE_DEBOUNCE_DURATION_MS);
app_timer_init(&under_temp_fault_timer, UNDER_TEMP_DEBOUNCE_DURATION_MS);
app_timer_init(&over_temp_fault_timer, OVER_TEMP_DEBOUNCE_DURATION_MS);
}

void app_accumulator_writeDefaultConfig(void)
Expand Down Expand Up @@ -288,26 +300,26 @@ void app_accumulator_runCellMeasurements(void)

static void app_accumulator_owcCalculateFaults(void)
{
OWCFaults owcFaults = { .owcStatus = { 0U }, .owcFaultGND = { 0U }, .owcGlobalFault = 0U };
OWCFaults owcFaults = { .owc_status = { 0U }, .owc_fault_gnd = { 0U }, .owc_global_fault = 0U };

owcFaults.owcGlobalFault = io_ltc6813CellVoltages_getGlobalOpenWireFault();
owcFaults.owc_global_fault = io_ltc6813CellVoltages_getGlobalOpenWireFault();

if (owcFaults.owcGlobalFault)
if (owcFaults.owc_global_fault)
{
for (uint8_t segment = 0; segment < ACCUMULATOR_NUM_SEGMENTS; segment++)
{
if (io_ltc6813CellVoltages_getOpenWireFault(segment, 0))
{
owcFaults.owcFaultGND[segment] = true;
owcFaults.owcStatus[segment] = (uint16_t)1;
owcFaults.owc_fault_gnd[segment] = true;
owcFaults.owc_status[segment] = (uint16_t)1;
}
else
{
for (uint8_t cell = 1; cell < ACCUMULATOR_NUM_SERIES_CELLS_PER_SEGMENT; cell++)
{
if (io_ltc6813CellVoltages_getOpenWireFault(segment, cell))
{
owcFaults.owcStatus[segment] |= ((uint16_t)(1 << cell));
owcFaults.owc_status[segment] |= ((uint16_t)(1 << cell));
}
}
}
Expand Down Expand Up @@ -438,11 +450,11 @@ void app_accumulator_broadcast(void)
app_canTx_BMS_MaxTempIdx_set(max_loc);

// Broadcast OWC information
app_canTx_BMS_Segment0_OWC_Cells_Status_set(data.owc_faults.owcStatus[0]);
app_canTx_BMS_Segment1_OWC_Cells_Status_set(data.owc_faults.owcStatus[1]);
app_canTx_BMS_Segment2_OWC_Cells_Status_set(data.owc_faults.owcStatus[2]);
app_canTx_BMS_Segment3_OWC_Cells_Status_set(data.owc_faults.owcStatus[3]);
app_canTx_BMS_Segment4_OWC_Cells_Status_set(data.owc_faults.owcStatus[4]);
app_canTx_BMS_Segment0_OWC_Cells_Status_set(data.owc_faults.owc_status[0]);
app_canTx_BMS_Segment1_OWC_Cells_Status_set(data.owc_faults.owc_status[1]);
app_canTx_BMS_Segment2_OWC_Cells_Status_set(data.owc_faults.owc_status[2]);
app_canTx_BMS_Segment3_OWC_Cells_Status_set(data.owc_faults.owc_status[3]);
app_canTx_BMS_Segment4_OWC_Cells_Status_set(data.owc_faults.owc_status[4]);

// Calculate and broadcast pack power.
const float available_power =
Expand All @@ -461,36 +473,45 @@ bool app_accumulator_checkFaults(void)
float min_allowable_cell_temp = MIN_CELL_DISCHARGE_TEMP_DEGC;

// if we are charging, max cell temp is 45C not 60C
if (app_tractiveSystem_getCurrent() > 3.0f)
if (app_canRx_BRUSA_IsConnected_get())
{
max_allowable_cell_temp = MAX_CELL_CHARGE_TEMP_DEGC;
min_allowable_cell_temp = MIN_CELL_CHARGE_TEMP_DEGC;
}

bool overtemp_fault =
const bool overtemp_condition =
io_ltc6813CellTemps_getMaxTempDegC(&throwaway_segment, &throwaway_loc) > max_allowable_cell_temp;
bool undertemp_fault =
const bool undertemp_condition =
io_ltc6813CellTemps_getMinTempDegC(&throwaway_segment, &throwaway_loc) < min_allowable_cell_temp;
bool overvoltage_fault = data.voltage_stats.max_voltage.voltage > MAX_CELL_VOLTAGE;
bool undervoltage_fault = data.voltage_stats.min_voltage.voltage < MIN_CELL_VOLTAGE;
bool communication_fault = data.num_comm_tries >= MAX_NUM_COMM_TRIES;
const bool overvoltage_condition = data.voltage_stats.max_voltage.voltage > MAX_CELL_VOLTAGE;
const bool undervoltage_condition = data.voltage_stats.min_voltage.voltage < MIN_CELL_VOLTAGE;

const bool overtemp_fault =
app_timer_runIfCondition(&over_temp_fault_timer, overtemp_condition) == TIMER_STATE_EXPIRED;
const bool undertemp_fault =
app_timer_runIfCondition(&under_temp_fault_timer, undertemp_condition) == TIMER_STATE_EXPIRED;
const bool overvoltage_fault =
app_timer_runIfCondition(&over_voltage_fault_timer, overvoltage_condition) == TIMER_STATE_EXPIRED;
const bool undervoltage_fault =
app_timer_runIfCondition(&under_voltage_fault_timer, undervoltage_condition) == TIMER_STATE_EXPIRED;

app_canAlerts_BMS_Fault_CellUndervoltage_set(undervoltage_fault);
app_canAlerts_BMS_Fault_CellOvervoltage_set(overvoltage_fault);
app_canAlerts_BMS_Fault_CellUndertemp_set(undertemp_fault);
app_canAlerts_BMS_Fault_CellOvertemp_set(overtemp_fault);

bool communication_fault = data.num_comm_tries >= MAX_NUM_COMM_TRIES;
app_canTx_BMS_ModuleCommunication_NumCommTries_set(data.num_comm_tries);
app_canTx_BMS_ModuleCommunication_MonitorState_set((CAN_AccumulatorMonitorState)data.state);
app_canAlerts_BMS_Fault_ModuleCommunicationError_set(communication_fault);

bool owc_fault = data.owc_faults.owcGlobalFault;

app_canAlerts_BMS_Fault_OpenWireCheckFault_set(data.owc_faults.owcGlobalFault);
app_canAlerts_BMS_Fault_OpenWireCheck_Segment0_GND_set(data.owc_faults.owcFaultGND[0]);
app_canAlerts_BMS_Fault_OpenWireCheck_Segment1_GND_set(data.owc_faults.owcFaultGND[1]);
app_canAlerts_BMS_Fault_OpenWireCheck_Segment2_GND_set(data.owc_faults.owcFaultGND[2]);
app_canAlerts_BMS_Fault_OpenWireCheck_Segment3_GND_set(data.owc_faults.owcFaultGND[3]);
app_canAlerts_BMS_Fault_OpenWireCheck_Segment4_GND_set(data.owc_faults.owcFaultGND[4]);
bool owc_fault = data.owc_faults.owc_global_fault;
app_canAlerts_BMS_Warning_OpenWireCheckFault_set(data.owc_faults.owc_global_fault);
app_canAlerts_BMS_Warning_OpenWireCheck_Segment0_GND_set(data.owc_faults.owc_fault_gnd[0]);
app_canAlerts_BMS_Warning_OpenWireCheck_Segment1_GND_set(data.owc_faults.owc_fault_gnd[1]);
app_canAlerts_BMS_Warning_OpenWireCheck_Segment2_GND_set(data.owc_faults.owc_fault_gnd[2]);
app_canAlerts_BMS_Warning_OpenWireCheck_Segment3_GND_set(data.owc_faults.owc_fault_gnd[3]);
app_canAlerts_BMS_Warning_OpenWireCheck_Segment4_GND_set(data.owc_faults.owc_fault_gnd[4]);

const bool acc_fault = overtemp_fault || undertemp_fault || overvoltage_fault || undervoltage_fault ||
communication_fault || owc_fault;
Expand Down
6 changes: 6 additions & 0 deletions firmware/quadruna/BMS/src/app/app_accumulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
#define MIN_CELL_VOLTAGE (3.0f)
#define C_RATE_TO_AMPS (17.7f)

// Fault debounce durations.
#define UNDER_VOLTAGE_DEBOUNCE_DURATION_MS (500U)
#define OVER_VOLTAGE_DEBOUNCE_DURATION_MS (500U)
#define UNDER_TEMP_DEBOUNCE_DURATION_MS (1000U)
#define OVER_TEMP_DEBOUNCE_DURATION_MS (1000U)

// Cell Balancing Discharge Parameters
#define CELL_VOLTAGE_BALANCE_WINDOW_V (600e-6f) // 600uV

Expand Down
11 changes: 6 additions & 5 deletions firmware/quadruna/BMS/src/app/app_globals.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ void app_globals_init(const GlobalsConfig *config)

app_timer_init(&globals->precharge_lower_bound_timer, PRECHARGE_COMPLETION_LOWER_BOUND);
app_timer_init(&globals->precharge_upper_bound_timer, PRECHARGE_COMPLETION_UPPER_BOUND);
globals->charger_connected_counter = 0;
globals->ignore_charger_fault_counter = 0;
globals->charger_exit_counter = 0;
globals->precharge_limit_exceeded = false;
globals->broadcast_charger_connected = true;
globals->charger_connected_counter = 0;
globals->ignore_charger_fault_counter = 0;
globals->charger_exit_counter = 0;
globals->precharge_limit_exceeded = false;
globals->broadcast_charger_connected = true;
globals->disable_charger_connected_hb_check = false;
}
1 change: 1 addition & 0 deletions firmware/quadruna/BMS/src/app/app_globals.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ typedef struct
uint32_t ignore_charger_fault_counter;
uint32_t charger_exit_counter;
uint32_t charger_connected_counter;
bool disable_charger_connected_hb_check;
} Globals;

extern Globals *const globals;
Expand Down
20 changes: 16 additions & 4 deletions firmware/quadruna/BMS/src/app/app_tractiveSystem.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@
#include "app_canAlerts.h"
#include "app_canTx.h"
#include "io_tractiveSystem.h"
#include "app_timer.h"

TimerChannel overcurrent_fault_timer;

void app_tractiveSystem_init()
{
app_timer_init(&overcurrent_fault_timer, TS_OVERCURRENT_DEBOUNCE_DURATION_MS);
}

#define W_TO_KW 1.0e-3f

Expand Down Expand Up @@ -40,9 +48,13 @@ bool app_tractveSystem_checkFaults()
{
// Charge current is positive, discharge current is negative
// TS current should be in the range: (-265.5, 70.8)
const float current_A = app_tractiveSystem_getCurrent();
bool ts_current_out_of_bounds = !IS_IN_RANGE(MAX_TS_DISCHARGE_CURRENT_AMPS, MAX_TS_CHARGE_CURRENT_AMPS, current_A);
app_canAlerts_BMS_Fault_TractiveSystemOvercurrent_set(ts_current_out_of_bounds);
const float current_A = app_tractiveSystem_getCurrent();
const bool ts_current_out_of_bounds =
!IS_IN_RANGE(MAX_TS_DISCHARGE_CURRENT_AMPS, MAX_TS_CHARGE_CURRENT_AMPS, current_A);
const bool ts_overcurrent_fault =
app_timer_runIfCondition(&overcurrent_fault_timer, ts_current_out_of_bounds) == TIMER_STATE_EXPIRED;

app_canAlerts_BMS_Fault_TractiveSystemOvercurrent_set(ts_overcurrent_fault);

return ts_current_out_of_bounds;
return ts_overcurrent_fault;
}
Loading

0 comments on commit b32bfbc

Please sign in to comment.