diff --git a/assertion/function/assertiontree/rich_check_effect.go b/assertion/function/assertiontree/rich_check_effect.go index c026a8b6..6b74ed5d 100644 --- a/assertion/function/assertiontree/rich_check_effect.go +++ b/assertion/function/assertiontree/rich_check_effect.go @@ -22,6 +22,7 @@ import ( "go.uber.org/nilaway/annotation" "go.uber.org/nilaway/guard" + "go.uber.org/nilaway/hook" "go.uber.org/nilaway/util/asthelper" "go.uber.org/nilaway/util/typeshelper" "golang.org/x/tools/go/cfg" @@ -101,6 +102,53 @@ func (f *FuncErrRet) equals(effect RichCheckEffect) bool { f.guard == otherFuncErrRet.guard } +// A FuncErrRetNonnilArg is a RichCheckEffect for a trusted function call `f(..., &v, ...)` whose +// final result is of type `error` and which, on success (`err == nil`), guarantees that the pointee +// `v` of a by-address argument is non-nil. The canonical example is `json.Unmarshal(data, &v)`: once +// the returned error is checked to be nil, `v` is populated and hence non-nil. +// +// This differs from FuncErrRet, which guards a value *produced* by the call (a return value): there, +// the producer is itself guarded so that guarding the consumer suffices. Here the pointee is a +// pre-existing expression with its own (e.g., unassigned/nilable) producer, so on the error-is-nil +// branch we must actively *produce* a non-nil value for it, exactly as a `v != nil` check would. +type FuncErrRetNonnilArg struct { + root *RootAssertionNode // an associated root node + err TrackableExpr // the `error`-typed return of the function + arg TrackableExpr // the trackable pointee argument guaranteed non-nil on success + argExpr ast.Expr // the raw pointee expression, for producing a non-nil value +} + +func (f *FuncErrRetNonnilArg) isTriggeredBy(expr ast.Expr) bool { + return exprIsPositiveNilCheck(f.root, expr, f.err) +} + +func (f *FuncErrRetNonnilArg) isInvalidatedBy(node ast.Node) bool { + // Reassigning either the checked error or the pointee breaks the correspondence established at + // the call site, so either invalidates the effect. + return nodeAssignsAny(f.root, node, f.err, f.arg) +} + +func (f *FuncErrRetNonnilArg) effectIfTrue(node *RootAssertionNode) { + node.AddProduction(&annotation.ProduceTrigger{ + Annotation: &annotation.NegativeNilCheck{ProduceTriggerNever: &annotation.ProduceTriggerNever{}}, + Expr: f.argExpr, + }) +} + +func (f *FuncErrRetNonnilArg) effectIfFalse(*RootAssertionNode) { + // no-op +} + +func (f *FuncErrRetNonnilArg) isNoop() bool { return false } + +func (f *FuncErrRetNonnilArg) equals(effect RichCheckEffect) bool { + other, ok := effect.(*FuncErrRetNonnilArg) + if !ok { + return false + } + return f.root.Equal(f.err, other.err) && f.root.Equal(f.arg, other.arg) +} + // okRead provides a general implementation for the special return form: `v1, v2, ..., ok := expr`. // Concrete examples of patterns supported are: // - map ok read: `v, ok := m[k]` @@ -434,6 +482,19 @@ func NodeTriggersFuncErrRet(rootNode *RootAssertionNode, nonceGenerator *guard.N }), true } + // Besides the return values handled above, certain trusted functions also guarantee that one of + // their (pointer) arguments is non-nil once the error return is checked to be nil. + if argExpr := hook.ErrorReturnNonnilArg(rootNode.Pass(), callExpr); argExpr != nil { + if argExprParsed := parseExpr(rootNode, argExpr); argExprParsed != nil { + effects, someEffect = append(effects, &FuncErrRetNonnilArg{ + root: rootNode, + err: errExprParsed, + arg: argExprParsed, + argExpr: argExpr, + }), true + } + } + return effects, someEffect } @@ -457,6 +518,26 @@ func nodeAssignsOneWithoutOther(rootNode *RootAssertionNode, node ast.Node, one, return assignsOne && !assignsOther } +// nodeAssignsAny returns true if `node` is an assignment to any of the variables in `exprs`. +func nodeAssignsAny(rootNode *RootAssertionNode, node ast.Node, exprs ...TrackableExpr) bool { + assignStmt, ok := node.(*ast.AssignStmt) + if !ok { + return false + } + for _, assignedVal := range assignStmt.Lhs { + parsedLHSExpr := parseExpr(rootNode, assignedVal) + if parsedLHSExpr == nil { + continue + } + for _, expr := range exprs { + if rootNode.Equal(parsedLHSExpr, expr) { + return true + } + } + } + return false +} + // exprIsPositiveNilCheck checks if an expression `expr` is of the form `checksVar == nil` for some // variable `checksVar`. Note that because of preprocessing done in `restructureBlock` from // `preprocess_blocks.go`, this suffices to handle cases such as `nil != checksVar` as well. diff --git a/hook/error_return.go b/hook/error_return.go new file mode 100644 index 00000000..62c2ab03 --- /dev/null +++ b/hook/error_return.go @@ -0,0 +1,89 @@ +// Copyright (c) 2025 Uber Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hook + +import ( + "go/ast" + "go/token" + "regexp" + + "go.uber.org/nilaway/util/analysishelper" +) + +// ErrorReturnNonnilArg inspects a call to a trusted function whose `error` return, when checked to +// be nil, guarantees that one of its (pointer) arguments is non-nil. It returns the "pointee" +// expression (the `x` in an `&x` argument) that should be guarded as non-nil once the error return +// is verified to be nil. For example, for `json.Unmarshal(data, &v)` it returns `v`, modeling the +// fact that `v` is non-nil after a successful unmarshal (i.e., `err == nil`). +// +// This is intentionally a different hook category from ReplaceConditional (used for `errors.As`). +// `errors.As(err, &target)` returns a `bool` that is consumed *directly* in the conditional, so it +// can be rewritten in place as `errors.As(...) && target != nil`. The trusted functions handled +// here instead return an `error` that is checked *separately* from the call (e.g., +// `err := json.Unmarshal(...); if err != nil { ... }`). That check-then-effect pattern is modeled by +// the FuncErrRet rich-check-effect (see assertion/function/assertiontree/rich_check_effect.go), +// which this hook feeds: the returned expression is guarded as non-nil in the branch where the +// error is checked to be nil, just like a function's own return values are. +// +// If the call does not match any known function, nil is returned. +func ErrorReturnNonnilArg(pass *analysishelper.EnhancedPass, call *ast.CallExpr) ast.Expr { + for sig, act := range _errorReturnNonnilArgs { + if sig.matchCall(pass, call) { + return act.action(call, act.argIndex) + } + } + return nil +} + +// errorReturnNonnilArgsAction computes the pointee expression guaranteed to be non-nil when the +// matched call's error return is nil, given the index of the relevant argument. +type errorReturnNonnilArgsAction func(call *ast.CallExpr, argIndex int) ast.Expr + +// pointeeOfArg extracts the pointee expression `x` from an `&x` argument at the given index. This +// models functions that populate an out-parameter passed by address (e.g., `json.Unmarshal(data, +// &v)`), where a nil error return implies the pointee `x` is non-nil. If the argument is not of the +// `&x` form, nil is returned (we make no claim about it). +func pointeeOfArg(call *ast.CallExpr, argIndex int) ast.Expr { + if argIndex < 0 || argIndex >= len(call.Args) { + return nil + } + unaryExpr, ok := call.Args[argIndex].(*ast.UnaryExpr) + if !ok || unaryExpr.Op != token.AND { + return nil + } + return unaryExpr.X +} + +// _errorReturnNonnilArgs defines the map of trusted functions and their corresponding actions on a +// particular argument. +var _errorReturnNonnilArgs = map[trustedSig]struct { + action errorReturnNonnilArgsAction + argIndex int +}{ + // `encoding/json.Unmarshal(data, &v)` and `encoding/xml.Unmarshal(data, &v)` populate `v`, so a + // nil error return implies `v != nil`. + // + // Note that this is technically unsound in a rare edge case: unmarshaling the JSON literal + // `null` into a pointer leaves it nil while still returning a nil error [1]. In practice this is + // rare enough that the convention is to treat the destination as populated after a successful + // unmarshal. + // + // [1] https://pkg.go.dev/encoding/json#Unmarshal + { + kind: _func, + enclosingRegex: regexp.MustCompile(`^encoding/(json|xml)$`), + nameRegex: regexp.MustCompile(`^Unmarshal$`), + }: {action: pointeeOfArg, argIndex: 1}, +} diff --git a/testdata/src/go.uber.org/trustedfunc/inference/json.go b/testdata/src/go.uber.org/trustedfunc/inference/json.go new file mode 100644 index 00000000..54cca1cd --- /dev/null +++ b/testdata/src/go.uber.org/trustedfunc/inference/json.go @@ -0,0 +1,37 @@ +package inference + +import "encoding/json" + +// testJSONUnmarshal exercises the `ErrorReturnNonnilArg` hook: `json.Unmarshal(data, &v)` populates +// `v`, so the pointee is treated as non-nil once the error return is checked to be nil. +func testJSONUnmarshal(data []byte) { + // `err != nil` early return: pointee is non-nil on the fallthrough (error-is-nil) path. + var v1 *int + if err := json.Unmarshal(data, &v1); err != nil { + return + } + print(*v1) // safe + + // `err == nil` positive check: pointee is non-nil inside the block. + var v2 *int + err := json.Unmarshal(data, &v2) + if err == nil { + print(*v2) // safe + } + + // Error return not checked at all: no guarantee. + var v3 *int + json.Unmarshal(data, &v3) + print(*v3) //want "unassigned variable `v3` dereferenced" + + // Error return discarded into the blank identifier: no guarantee. + var v4 *int + _ = json.Unmarshal(data, &v4) + print(*v4) //want "unassigned variable `v4` dereferenced" + + // Dereference on the error path (`err != nil`): pointee is not guarded here. + var v5 *int + if err := json.Unmarshal(data, &v5); err != nil { + print(*v5) //want "unassigned variable `v5` dereferenced" + } +} diff --git a/testdata/src/go.uber.org/trustedfunc/inference/xml.go b/testdata/src/go.uber.org/trustedfunc/inference/xml.go new file mode 100644 index 00000000..6bcdb975 --- /dev/null +++ b/testdata/src/go.uber.org/trustedfunc/inference/xml.go @@ -0,0 +1,37 @@ +package inference + +import "encoding/xml" + +// testXMLUnmarshal exercises the `ErrorReturnNonnilArg` hook: `xml.Unmarshal(data, &v)` populates +// `v`, so the pointee is treated as non-nil once the error return is checked to be nil. +func testXMLUnmarshal(data []byte) { + // `err != nil` early return: pointee is non-nil on the fallthrough (error-is-nil) path. + var v1 *int + if err := xml.Unmarshal(data, &v1); err != nil { + return + } + print(*v1) // safe + + // `err == nil` positive check: pointee is non-nil inside the block. + var v2 *int + err := xml.Unmarshal(data, &v2) + if err == nil { + print(*v2) // safe + } + + // Error return not checked at all: no guarantee. + var v3 *int + xml.Unmarshal(data, &v3) + print(*v3) //want "unassigned variable `v3` dereferenced" + + // Error return discarded into the blank identifier: no guarantee. + var v4 *int + _ = xml.Unmarshal(data, &v4) + print(*v4) //want "unassigned variable `v4` dereferenced" + + // Dereference on the error path (`err != nil`): pointee is not guarded here. + var v5 *int + if err := xml.Unmarshal(data, &v5); err != nil { + print(*v5) //want "unassigned variable `v5` dereferenced" + } +}