Skip to content

Commit 9577473

Browse files
Apollon77claude
andauthored
fix(node): make endpoint.state[id] and ClientNode.toString teardown-safe (#3680)
* fix(node): cache resolved peer address to make ClientNode toString teardown-safe ClientNode.toString() — invoked from Behaviors and Endpoint error message construction — could throw [destroyed-dependency] during ServerNode shutdown: the storage fallback in peerAddress went through ServerNodeStore.clientStores, which asserts active construction and is already in Closing state by the time the WAL final snapshot runs. The throw then masked whatever original error triggered toString(). peerAddress now mirrors the commissioning behavior state into a cache field whenever the backing is loaded (set OR cleared, so a decommission does not leave a stale value), falls back to the cache when the backing is gone, and accepts DependencyLifecycleError around the storage probe so teardown reads return undefined rather than throwing. eraseWithMutex clears the cache so re-commissioning can publish a new address. Adds a regression test that closes the controller and verifies peerAddress and toString continue to resolve. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix(node): make endpoint.state[behaviorId] return undefined during teardown Behaviors.close() destroys backings but leaves the augmented endpoint.state[id] property descriptors in place because reuse paths (factory reset) re-activate through the same getter without re-running the constructor. Post-close access therefore re-entered #backingFor on a missing backing and threw BehaviorInitializationError; constructing that error involved ${this.#endpoint} which cascaded through toString → identity → peerAddress → storage and surfaced as a misleading "Snapshot failed" warning during ServerNode shutdown. The getter now checks the endpoint construction status: in Destroying or Destroyed it returns undefined, leaving Active/Initializing paths (including late activation) unchanged. Adds a regression test verifying state access on a closed peer returns undefined rather than throwing. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix(node): clear cached peer address on soft reset too Copilot review on #3680 noted that ClientNode.reset() (soft reset, without storage erase) closes behaviors but leaves #cachedPeerAddress in place. If the node is then re-commissioned to a different fabric, peerAddress would return the stale cached value until the new commissioning state loads. Add a resetWithMutex override that clears the cache. The explicit clear in eraseWithMutex stays because that path uses super.resetWithMutex() (static dispatch), which skips this override. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 05db23e commit 9577473

3 files changed

Lines changed: 81 additions & 12 deletions

File tree

packages/node/src/endpoint/properties/Behaviors.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -821,7 +821,17 @@ export class Behaviors {
821821
#augmentEndpoint(type: Behavior.Type) {
822822
const { id, Events } = type;
823823

824-
const get = () => this.#backingFor(type).stateView;
824+
// Descriptors persist across the lifetime of the Behaviors instance (constructor + inject install them and
825+
// close does not remove them so reuse paths like factory-reset can re-activate the same getter). Returning
826+
// undefined for endpoints that are tearing down keeps post-close access from re-entering #backingFor on a
827+
// missing backing and surfacing a misleading BehaviorInitializationError.
828+
const get = () => {
829+
const status = this.#endpoint.construction.status;
830+
if (status === Lifecycle.Status.Destroying || status === Lifecycle.Status.Destroyed) {
831+
return undefined;
832+
}
833+
return this.#backingFor(type).stateView;
834+
};
825835
Object.defineProperty(this.#endpoint.state, id, { get, enumerable: true, configurable: true });
826836
if (type.schema.id !== undefined) {
827837
Object.defineProperty(this.#endpoint.state, type.schema.id, { get, configurable: true });

packages/node/src/node/ClientNode.ts

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import { ClientNodeStore } from "#storage/client/ClientNodeStore.js";
2020
import { RemoteWriter } from "#storage/client/RemoteWriter.js";
2121
import { ServerNodeStore } from "#storage/server/ServerNodeStore.js";
2222
import {
23+
DependencyLifecycleError,
2324
Diagnostic,
2425
Identity,
2526
ImplementationError,
@@ -47,6 +48,7 @@ export class ClientNode extends Node<ClientNode.RootEndpoint> {
4748
#matter: MatterModel;
4849
#interaction?: ClientNodeInteraction;
4950
#blockInteractions = false;
51+
#cachedPeerAddress?: PeerAddress;
5052

5153
constructor(options: ClientNode.Options) {
5254
const opts = {
@@ -206,16 +208,17 @@ export class ClientNode extends Node<ClientNode.RootEndpoint> {
206208
}
207209

208210
protected async eraseWithMutex() {
209-
// First, ensure we're offline
210211
await this.cancelWithMutex();
211-
212-
// Then reset
213212
await super.resetWithMutex();
214-
215-
// and erase
213+
this.#cachedPeerAddress = undefined;
216214
await this.env.get(EndpointInitializer).eraseDescendant(this);
217215
}
218216

217+
protected override async resetWithMutex() {
218+
this.#cachedPeerAddress = undefined;
219+
await super.resetWithMutex();
220+
}
221+
219222
protected createRuntime(): NetworkRuntime {
220223
return new ClientNetworkRuntime(this);
221224
}
@@ -274,15 +277,32 @@ export class ClientNode extends Node<ClientNode.RootEndpoint> {
274277
}
275278

276279
get peerAddress(): PeerAddress | undefined {
277-
// If commissioned, use the peer address for logging purposes
278-
let address = PeerAddress(this.behaviors.maybeStateOf("commissioning")?.peerAddress as PeerAddress | undefined);
280+
// Whenever the commissioning backing is loaded its state is authoritative - mirror it (set or cleared)
281+
// so the cache cannot survive a decommission as a stale value
282+
const state = this.behaviors.maybeStateOf("commissioning");
283+
if (state !== undefined) {
284+
const live = PeerAddress(state.peerAddress as PeerAddress | undefined);
285+
this.#cachedPeerAddress = live;
286+
return live;
287+
}
288+
289+
if (this.#cachedPeerAddress !== undefined) {
290+
return this.#cachedPeerAddress;
291+
}
279292

280-
// During early initialization commissioning state may not be loaded, so check directly in storage too
281-
if (!address) {
282-
address = PeerAddress(this.store.storeForEndpoint(this).peerAddress as PeerAddress | undefined);
293+
// Backing not yet loaded; consult storage directly, tolerating teardown
294+
try {
295+
const stored = PeerAddress(this.store.storeForEndpoint(this).peerAddress as PeerAddress | undefined);
296+
if (stored !== undefined) {
297+
this.#cachedPeerAddress = stored;
298+
return stored;
299+
}
300+
} catch (error) {
301+
DependencyLifecycleError.accept(error);
302+
logger.info("Cannot resolve peer address from storage:", error);
283303
}
284304

285-
return address;
305+
return undefined;
286306
}
287307

288308
override get identity() {

packages/node/test/node/ClientNodeTest.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,6 +1362,45 @@ describe("ClientNode", () => {
13621362
expect(address2.nodeId).not.equals(fabric.nodeId);
13631363
});
13641364
});
1365+
1366+
describe("peerAddress resilience", () => {
1367+
// Closing a ServerNode transitions ServerNodeStore.construction to "Closing" before the destructor releases
1368+
// child stores. Pre-fix, peerAddress fell back to the storage layer via this.store, which would assert
1369+
// through to clientStores and throw [destroyed-dependency]. toString() (used in error message construction
1370+
// throughout Behaviors and Endpoint) then re-threw, masking the original cause.
1371+
it("peerAddress survives ServerNode shutdown via cache", async () => {
1372+
await using site = new MockSite();
1373+
const { controller } = await site.addCommissionedPair();
1374+
const peer1 = controller.peers.get("peer1")!;
1375+
expect(peer1).not.undefined;
1376+
1377+
const cachedAddress = peer1.peerAddress;
1378+
expect(cachedAddress).not.undefined;
1379+
1380+
await MockTime.resolve(controller.close(), { macrotasks: true });
1381+
1382+
expect(() => peer1.peerAddress).not.throws();
1383+
expect(peer1.peerAddress).deep.equals(cachedAddress);
1384+
expect(() => peer1.toString()).not.throws();
1385+
});
1386+
1387+
// Behaviors.close() leaves the augmented endpoint.state[id] descriptors in place so that paths reusing the
1388+
// Behaviors instance (e.g. factory reset) can re-activate. Post-close access must therefore not re-enter
1389+
// #backingFor and throw BehaviorInitializationError - the getter checks the construction status and returns
1390+
// undefined once the endpoint is tearing down.
1391+
it("endpoint.state[behaviorId] returns undefined after close", async () => {
1392+
await using site = new MockSite();
1393+
const { controller } = await site.addCommissionedPair();
1394+
const peer1 = controller.peers.get("peer1")!;
1395+
1396+
expect(peer1.state.commissioning).not.undefined;
1397+
1398+
await MockTime.resolve(controller.close(), { macrotasks: true });
1399+
1400+
expect(() => peer1.state.commissioning).not.throws();
1401+
expect(peer1.state.commissioning).to.be.undefined;
1402+
});
1403+
});
13651404
});
13661405

13671406
const GLOBAL_ATTRS = [0xfff8, 0xfff9, 0xfffb, 0xfffc, 0xfffd];

0 commit comments

Comments
 (0)