Skip to content

Commit b0c92ea

Browse files
bmyatesCompute-Runtime-Automation
authored andcommitted
fix: Fail device init if kernel debugging is misconfigured
Also print error to stderr Related-to: GSD-10780 Signed-off-by: Brandon Yates <brandon.yates@intel.com>
1 parent 80168b1 commit b0c92ea

File tree

7 files changed

+44
-10
lines changed

7 files changed

+44
-10
lines changed

shared/source/device/device.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,9 @@ bool Device::createDeviceImpl() {
138138
}
139139

140140
// initialize common resources once
141-
initializeCommonResources();
141+
if (!initializeCommonResources()) {
142+
return false;
143+
}
142144
}
143145

144146
// create engines
@@ -172,15 +174,14 @@ bool Device::initDeviceWithEngines() {
172174
return createEngines();
173175
}
174176

175-
void Device::initializeCommonResources() {
177+
bool Device::initializeCommonResources() {
176178
if (getExecutionEnvironment()->isDebuggingEnabled()) {
177179
const auto rootDeviceIndex = getRootDeviceIndex();
178180
auto rootDeviceEnvironment = getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex].get();
179181
rootDeviceEnvironment->initDebuggerL0(this);
180182
if (rootDeviceEnvironment->debugger == nullptr) {
181-
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr,
182-
"Debug mode is not enabled in the system.\n");
183-
getExecutionEnvironment()->setDebuggingMode(DebuggingMode::disabled);
183+
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Debug mode is not enabled in the system.\n");
184+
return false;
184185
}
185186
}
186187

@@ -209,6 +210,7 @@ void Device::initializeCommonResources() {
209210
deviceUsmMemAllocPoolsManager.reset(new UsmMemAllocPoolsManager(getMemoryManager(), rootDeviceIndices, deviceBitfields, this, InternalMemoryType::deviceUnifiedMemory));
210211
}
211212
initUsmReuseMaxSize();
213+
return true;
212214
}
213215

214216
void Device::initUsmReuseMaxSize() {

shared/source/device/device.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
277277

278278
MOCKABLE_VIRTUAL bool createDeviceImpl();
279279
bool initDeviceWithEngines();
280-
void initializeCommonResources();
280+
bool initializeCommonResources();
281281
bool initDeviceFully();
282282
void initUsmReuseMaxSize();
283283
virtual bool createEngines();

shared/source/dll/linux/debugger_l0_dll_linux.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020-2024 Intel Corporation
2+
* Copyright (C) 2020-2025 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -19,7 +19,12 @@ std::unique_ptr<NEO::Debugger> DebuggerL0::create(NEO::Device *device) {
1919
return nullptr;
2020
}
2121
auto osInterface = device->getRootDeviceEnvironment().osInterface.get();
22-
if (!osInterface || !osInterface->isDebugAttachAvailable()) {
22+
if (!osInterface) {
23+
return nullptr;
24+
}
25+
if (!osInterface->isDebugAttachAvailable()) {
26+
auto cardName = osInterface->getDriverModel()->as<Drm>()->getSysFsPciPathBaseName();
27+
IoFunctions::fprintf(stderr, "Kernel debug mode is not enabled for %s. Device is not available for use\n", cardName.c_str());
2328
return nullptr;
2429
}
2530

shared/source/os_interface/linux/drm_neo.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,15 @@ int Drm::getEnabledPooledEu(int &enabled) {
179179
return getParamIoctl(DrmParam::paramHasPooledEu, &enabled);
180180
}
181181

182+
std::string Drm::getSysFsPciPathBaseName() {
183+
auto fullPath = getSysFsPciPath();
184+
size_t pos = fullPath.rfind("/");
185+
if (std::string::npos == pos) {
186+
return fullPath;
187+
}
188+
return fullPath.substr(pos + 1, std::string::npos);
189+
}
190+
182191
std::string Drm::getSysFsPciPath() {
183192
std::string path = std::string(Os::sysFsPciPathPrefix) + hwDeviceId->getPciPath() + "/drm";
184193
std::string expectedFilePrefix = path + "/card";

shared/source/os_interface/linux/drm_neo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ class Drm : public DriverModel {
264264
void cleanup() override;
265265
bool readSysFsAsString(const std::string &relativeFilePath, std::string &readString);
266266
MOCKABLE_VIRTUAL std::string getSysFsPciPath();
267+
MOCKABLE_VIRTUAL std::string getSysFsPciPathBaseName();
267268
std::unique_ptr<HwDeviceIdDrm> &getHwDeviceId() { return hwDeviceId; }
268269

269270
template <typename DataType>

shared/test/unit_test/device/neo_device_tests.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1983,7 +1983,7 @@ TEST_F(DeviceTests, GivenDebuggingEnabledWhenDeviceIsInitializedThenL0DebuggerIs
19831983
EXPECT_NE(nullptr, device->getL0Debugger());
19841984
}
19851985

1986-
TEST_F(DeviceTests, givenDebuggerRequestedByUserAndNotAvailableWhenDeviceIsInitializedThenErrorIsPrintedButNotReturned) {
1986+
TEST_F(DeviceTests, givenDebuggerRequestedByUserAndNotAvailableWhenDeviceIsInitializedThenDeviceIsNullAndErrorIsPrinted) {
19871987
extern bool forceCreateNullptrDebugger;
19881988

19891989
VariableBackup backupForceCreateNullptrDebugger{&forceCreateNullptrDebugger, true};
@@ -1998,7 +1998,7 @@ TEST_F(DeviceTests, givenDebuggerRequestedByUserAndNotAvailableWhenDeviceIsIniti
19981998
auto output = testing::internal::GetCapturedStderr();
19991999

20002000
EXPECT_EQ(std::string("Debug mode is not enabled in the system.\n"), output);
2001-
EXPECT_EQ(nullptr, device->getL0Debugger());
2001+
EXPECT_EQ(nullptr, device);
20022002
}
20032003

20042004
TEST_F(DeviceTests, givenDebuggerRequestedByUserWhenDeviceWithSubDevicesCreatedThenInitializeDebuggerOncePerRootDevice) {

shared/test/unit_test/os_interface/linux/drm_tests.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2267,3 +2267,20 @@ TEST(DrmTest, GivenProductSpecificIoctlHelperAvailableAndDebugFlagToIgnoreIsSetW
22672267

22682268
EXPECT_EQ(0u, customFuncCalled);
22692269
}
2270+
2271+
TEST(DrmTest, GivenSysFsPciPathWhenCallinggetSysFsPciPathBaseNameThenResultIsCorrect) {
2272+
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
2273+
2274+
class DrmMockPciPath : public DrmMock {
2275+
public:
2276+
DrmMockPciPath(RootDeviceEnvironment &rootDeviceEnvironment) : DrmMock(rootDeviceEnvironment) {}
2277+
std::string mockSysFsPciPath = "/sys/devices/pci0000:00/0000:00:02.0/drm/card0";
2278+
std::string getSysFsPciPath() override { return mockSysFsPciPath; }
2279+
};
2280+
DrmMockPciPath drm{*executionEnvironment->rootDeviceEnvironments[0]};
2281+
EXPECT_STREQ("card0", drm.getSysFsPciPathBaseName().c_str());
2282+
drm.mockSysFsPciPath = "/sys/devices/pci0000:00/0000:00:02.0/drm/card7";
2283+
EXPECT_STREQ("card7", drm.getSysFsPciPathBaseName().c_str());
2284+
drm.mockSysFsPciPath = "card8";
2285+
EXPECT_STREQ("card8", drm.getSysFsPciPathBaseName().c_str());
2286+
}

0 commit comments

Comments
 (0)