Boot-up diagnostic
Verify the system operational state after boot-up
To check if the system is operational after a boot-up sequence one should verify at least two stages (for an ODS application):
Check that confInitStages:
For non-ODS application scenarios:
/device/diagnostic/confInitStages: ['device', 'ports']
For ODS application scenarios:
/device/diagnostic/confInitStages: ['device', 'ports', 'applications']
Diagnostic query for active errors
"diagnostic": {
"confInitStages": [
"device",
"ports"
],
Additionally it is advisable to periodically check them systems temperature values via the JSON configuration:
$ ifm3d dump | jq .device.diagnostic.temperatures
[
{
"entity": "BCPU-therm",
"overtemperature": false,
"temperatureLimit": 100.5,
"valid": true,
"value": 43
},
{
"entity": "GPU-therm",
"overtemperature": false,
"temperatureLimit": 100.5,
"valid": true,
"value": 40
},
{
"entity": "MCPU-therm",
"overtemperature": false,
"temperatureLimit": 100.5,
"valid": true,
"value": 43
},
{
"entity": "port2",
"overtemperature": false,
"temperatureLimit": 85,
"valid": true,
"value": 47
},
{
"entity": "port3",
"overtemperature": false,
"temperatureLimit": 85,
"valid": true,
"value": 49
},
{
"entity": "VPU",
"overtemperature": false,
"temperatureLimit": 100.5,
"valid": true,
"value": 43
}
]
#############################################
# Copyright 2023-present ifm electronic, gmbh
# SPDX-License-Identifier: Apache-2.0
#############################################
# %%
import logging
import time
import socket
from ifm3dpy.device import O3R
from ifm3dpy.device import Error as ifm3dpy_error
class BootUpMonitor:
"""This class helps to properly monitor that the VPU is fully booted up."""
def __init__(
self,
o3r: O3R,
timeout: int = 60,
wait_time: int = 0.5,
) -> None:
self.o3r = o3r
self._stages = ["device", "ports", "applications"]
self._diagnostics_PCICPort = 50009
self.timeout = timeout
self.wait_time = wait_time
self._ip = o3r.ip
self.logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format="%(message)s")
def wait_for_diagnostics(self, retry_delay=0.5):
"""Waits until the diagnostics service is available at the given IP and port."""
while True:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
# Optional: set timeout to avoid indefinite hanging
sock.settimeout(1)
result = sock.connect_ex(
(self._ip, self._diagnostics_PCICPort))
if result == 0:
self.logger.info("Diagnostics service available.")
return
time.sleep(retry_delay)
def retrieve_boot_diagnostic(self):
self.logger.info("Retrieving diagnostics: \n")
for error in self.o3r.get_diagnostic_filtered({"state": "active"})["events"]:
self.logger.warning("Active errors: %s, %s",
error["id"], error["name"])
def monitor_VPU_bootup(self) -> bool:
"""
Check that the VPU completes it's boot sequence before
attempting to initialize an application.
Sequence goes:
/device/diagnostic/confInitStages: 'device' --> 'ports' --> 'applications'
Diagnostic query for active errors
Args:
o3r (ifm3dpy.O3R): O3R handle
timeout (int, optional): Maximum time expected for bootup sequence in seconds. Defaults to 60.
wait_time (int, optional): how long to pause between queries to the VPU. Defaults to .5.
Raises:
TimeoutError: If no valid response is received from VPU within the timeout duration.
Returns:
True if the VPU is fully booted
"""
if len(self._stages) == 0:
raise RuntimeError("please use a non empty list of stages")
self.logger.debug("Monitoring bootup sequence: ready to connect.")
start = time.perf_counter()
config = None
while time.perf_counter() - start < self.timeout:
try:
config = self.o3r.get()
self.logger.debug("Connected.")
except ifm3dpy_error:
self.logger.debug("Awaiting data from VPU...")
if config:
confInitStages = config["device"]["diagnostic"]["confInitStages"]
if all(x in self._stages for x in confInitStages):
self.wait_for_diagnostics(retry_delay=0.5)
self.logger.info("VPU fully booted.")
self.retrieve_boot_diagnostic()
return True
if "ports" in confInitStages:
self.logger.debug("Ports recognized")
elif "device" in confInitStages:
self.logger.debug("Device recognized")
time.sleep(self.wait_time)
raise TimeoutError("Process timed out waiting for VPU to boot")
def __enter__(self):
self.logger.info("Waiting for VPU to boot")
return self
def __exit__(self, type, value, traceback):
self.logger.info(
"Bootup monitoring finished. Check the logs to verify bootup status."
)
# %%
def main():
try:
# If the example python package was build, import the configuration
from ovp8xxexamples import config
IP = config.IP
except ImportError:
# Otherwise, use default values
print(
"Unable to import the configuration.\nPlease run 'pip install -e .' from the python root directory"
)
print("Defaulting to the default configuration.")
IP = "192.168.0.69"
logger = logging.getLogger(__name__)
logger.info(f"Device IP: {IP}")
o3r = O3R(IP)
bootup_monitor = BootUpMonitor(o3r)
bootup_monitor.monitor_VPU_bootup()
if __name__ == "__main__":
main()
/*
* Copyright 2022-present ifm electronic, gmbh
* SPDX-License-Identifier: Apache-2.0
*/
#include <chrono>
#include <cstring>
#include <ifm3d/device/err.h>
#include <ifm3d/device/o3r.h>
#include <iostream>
#include <stdexcept>
#include <string>
#include <thread>
using namespace std::chrono_literals;
using namespace ifm3d::literals;
class BootupMonitor {
public:
static std::tuple<bool, std::string> MonitorVPUBootup(ifm3d::O3R::Ptr o3r, int timeout = 25, int wait_time = 1) {
BootupMonitor monitor(o3r, timeout, wait_time);
try {
bool success = monitor.Monitor();
return std::make_tuple(success, "");
} catch (const std::runtime_error& e) {
return std::make_tuple(false, e.what());
}
}
private:
ifm3d::O3R::Ptr o3r_;
const int timeout_; // in seconds
const int wait_time_; // in seconds
BootupMonitor(ifm3d::O3R::Ptr o3r, int timeout = 25, int wait_time = 1)
: o3r_(o3r), timeout_(timeout), wait_time_(wait_time) {}
bool Monitor(){
std::clog << "Monitoring bootup sequence: ready to connect." << std::endl;
auto start = std::chrono::steady_clock::now();
ifm3d::json config;
do {
try {
config = o3r_->Get();
std::clog << "Connected." << std::endl;
} catch (ifm3d::Error &e) {
std::clog << "Awaiting data from VPU..." << std::endl;
}
if (!config.empty()) {
std::clog << "Checking the init stages." << std::endl;
auto conf_init_stages =
config["/device/diagnostic/confInitStages"_json_pointer];
std::clog << conf_init_stages << std::endl;
for (auto it : conf_init_stages) {
if (it == "applications") {
std::clog << "Applications recognized" << std::endl
<< "VPU fully booted." << std::endl;
RetrieveBootDiagnostic();
return true;
}
if (it == "ports") {
std::clog << "Ports recognized." << std::endl;
} else if (it == "device") {
std::clog << "Device recognized." << std::endl;
}
}
}
std::this_thread::sleep_for(std::chrono::seconds(wait_time_));
} while (std::chrono::steady_clock::now() - start <
std::chrono::seconds(timeout_));
throw std::runtime_error("VPU bootup sequence timed out, or connection failed.");
}
void RetrieveBootDiagnostic() {
auto active_diag = o3r_->GetDiagnosticFiltered(
ifm3d::json::parse(R"({"state": "active"})"))["/events"_json_pointer];
for (auto error = active_diag.begin(); error != active_diag.end();
++error) {
std::clog << "\n//////////////////////////////////" << std::endl;
std::clog << *error << std::endl;
}
}
};