diff --git a/.tool-versions b/.tool-versions index 27966d1..7cd7ba8 100644 --- a/.tool-versions +++ b/.tool-versions @@ -1,2 +1,2 @@ -golang 1.24.3 +golang 1.25.1 gitleaks 8.21.2 diff --git a/docs/callgraph-js-limitations.md b/docs/callgraph-js-limitations.md new file mode 100644 index 0000000..c853b7d --- /dev/null +++ b/docs/callgraph-js-limitations.md @@ -0,0 +1,495 @@ +# JavaScript Callgraph Implementation - Limitations and Challenges + +JavaScript's dynamic nature and flexible semantics make it significantly more challenging to build accurate static callgraphs compared to statically typed languages like Go or Java. The current implementation handles basic cases but has several fundamental limitations. + +## Dynamic Function Calls + +JavaScript allows functions to be called dynamically through various mechanisms that cannot be resolved statically. + +### Example + +```javascript +// Function stored in variable and called later +const functionMap = { + add: (a, b) => a + b, + subtract: (a, b) => a - b, +}; + +const operation = getUserInput(); // Runtime value +functionMap[operation](10, 5); // Cannot determine which function is called +``` + +```javascript +// Function name from string +const funcName = "someFunction"; +window[funcName](); // Dynamic lookup - impossible to resolve statically + +// Using eval +eval("myFunction()"); // Code generated at runtime +``` + +Requires runtime information or complex symbolic execution to track all possible +values of dynamic identifiers. + +## Callback Functions and Higher-Order Functions + +JavaScript heavily uses callbacks and functions that accept other functions as arguments. + +### Example + +```javascript +// Callback passed to array method +const numbers = [1, 2, 3]; +numbers.forEach(function (n) { + console.log(n); // This callback is not tracked as a call from forEach +}); + +// Function returned from another function +function makeMultiplier(factor) { + return function (x) { + return x * factor; + }; +} + +const double = makeMultiplier(2); +double(5); // Call to anonymous function - hard to track +``` + +```javascript +// Async callbacks +setTimeout(() => { + dangerousFunction(); // Call happens in callback context +}, 1000); + +fetch("/api/data") + .then((response) => response.json()) + .then((data) => processData(data)); // Chain of callbacks +``` + +Current implementation doesn't track callbacks passed as arguments or analyze their execution context. + +Requires interprocedural dataflow analysis to track function values through parameter passing and returns. + +## Prototype Chain and Dynamic Property Access + +JavaScript's prototype based inheritance and dynamic property access make method +resolution extremely complex. + +### Example + +```javascript +// Prototype method calls +Array.prototype.customMethod = function () { + this.forEach((x) => console.log(x)); +}; + +[1, 2, 3].customMethod(); // Method added to prototype at runtime +``` + +```javascript +// Dynamic property access +const obj = { + method1() { + console.log("method1"); + }, + method2() { + console.log("method2"); + }, +}; + +const methodName = "method" + Math.floor(Math.random() * 2 + 1); +obj[methodName](); // Cannot determine which method is called +``` + +```javascript +// Object property from another object +const target = { execute: () => console.log("executed") }; +const proxy = new Proxy(target, { + get(target, prop) { + return target[prop]; + }, +}); +proxy.execute(); // Proxy interception not tracked +``` + +Only handles static property access (e.g., `obj.method()`). +Dynamic access like `obj[prop]()` is not resolved. + +Requires modeling the entire prototype chain and tracking all possible property modifications at runtime. + +## Chained Method Calls + +Method chaining is common in JavaScript but requires tracking return types through multiple calls. + +### Example + +```javascript +// Chained calls on builder pattern +const result = builder.setName("test").setAge(30).build(); + +// Chained calls on different types +const processed = "hello" + .toUpperCase() + .split("") + .map((c) => c.charCodeAt(0)) + .filter((n) => n > 100); +``` + +```javascript +// Mixed instance calls +const instance = new TestClass(); +instance.helperMethod().toString().split(","); // Chain across different objects +``` + +Partially handled - only the first method in the chain is tracked correctly. + +```javascript +const result = instance.helperMethod().toString(); +``` + +Only `instance.helperMethod()` is tracked, but the call to `toString()` on the return value is not connected. + +Requires return type inference for each method call to determine what methods are available on the returned object. + +## Async/Await and Promise Chains + +Asynchronous code patterns create implicit control flow that's hard to model. + +### Example + +```javascript +// Async/await +async function fetchData() { + const response = await fetch("/api"); // Implicit promise handling + const data = await response.json(); // Another await + return processData(data); // Actual call may happen later +} + +// Promise chains with error handlers +getData() + .then((data) => transform(data)) + .catch((err) => handleError(err)) + .finally(() => cleanup()); +``` + +```javascript +// Concurrent async calls +async function parallelWork() { + const [result1, result2] = await Promise.all([ + asyncOperation1(), + asyncOperation2(), + ]); + combineResults(result1, result2); +} +``` + +Currently async functions are treated as regular functions, promise chain callbacks are not tracked. + +Requires understanding promise semantics, async control flow, and tracking callbacks through promise resolution. + +## Destructuring and Spread Operators + +Modern JavaScript destructuring can obscure what's being called. + +### Example + +```javascript +// Destructured function calls +const { readFile, writeFile } = require("fs/promises"); +readFile("test.txt"); // Import is tracked, but complex destructuring may fail +``` + +```javascript +// Spread in function calls +function combine(...functions) { + return (x) => functions.reduce((acc, fn) => fn(acc), x); +} + +const pipeline = combine(step1, step2, step3); +pipeline(data); // Calls step1, step2, step3 indirectly +``` + +```javascript +// Object destructuring in parameters +function process({ transform, validate }) { + validate(); // Which function is this? + transform(); // Which function is this? +} + +process({ + transform: myTransform, + validate: myValidate, +}); +``` + +Only handles simple destructured imports. Parameter destructuring and spread operators are not tracked. + +Requires tracking destructured bindings through the assignment graph and matching them at call sites. + +## Polymorphism and Type Ambiguity + +Without type information, the same method name could refer to completely different implementations. + +### Example + +```javascript +// Polymorphic calls +let x = new ClassA(); +x = new ClassB(); +x.method1(); // Could be ClassA.method1 or ClassB.method1 + +const y = x; +y.method1(); // Same ambiguity propagated +``` + +```javascript +// Duck typing +function callQuack(duck) { + duck.quack(); // Any object with quack() method +} + +callQuack(new RealDuck()); +callQuack(new RubberDuck()); +callQuack({ quack: () => console.log("quack") }); +``` + +The callgraph will include edges to BOTH `ClassA.method1` and `ClassB.method1` because static analysis cannot determine which type `x` has at the call site. This leads to over approximation (false positives). + +Requires precise points-to analysis and type inference, which is undecidable for JavaScript's dynamic type system. + +## Module Systems and Dynamic Imports + +JavaScript has multiple module systems (CommonJS, ES6, AMD) and supports dynamic imports. + +### Example + +```javascript +// Dynamic ES6 imports +const moduleName = getModuleName(); +const module = await import(`./${moduleName}.js`); +module.someFunction(); +``` + +```javascript +// Conditional requires +const logger = process.env.DEBUG + ? require("./verbose-logger") + : require("./simple-logger"); + +logger.log("message"); +``` + +```javascript +// Mixed module systems in same file +const fs = require("fs"); // CommonJS +import axios from "axios"; // ES6 +import { log, warn } from "console"; // Named ES6 imports +``` + +Only resolves static `require()` and `import` statements. Dynamic imports with runtime-computed paths cannot be resolved. + +Dynamic imports require runtime path resolution and module loading semantics. + +## this Binding and Context + +JavaScript's `this` keyword behavior changes based on how a function is called. + +### Example + +```javascript +class MyClass { + constructor() { + this.value = 42; + } + + method() { + console.log(this.value); + } +} + +const obj = new MyClass(); +obj.method(); // this = obj +const fn = obj.method; +fn(); // this = undefined (strict mode) or global +setTimeout(obj.method, 100); // this = global/window +``` + +```javascript +// Explicit binding +const bound = obj.method.bind(obj); +bound(); // this = obj + +obj.method.call(otherObj); // this = otherObj +obj.method.apply(otherObj, args); // this = otherObj +``` + +Only handles straightforward method calls on class instances. Doesn't track `this` through bindings, arrow functions, or explicit context changes. + +Requires tracking calling context and modeling all `this` binding rules (implicit, explicit, new, arrow). + +## Generator Functions and Iterators + +Generator functions have special control flow that's hard to model statically. + +### Example + +```javascript +function* generateSequence() { + yield step1(); + yield step2(); + yield step3(); +} + +for (const value of generateSequence()) { + process(value); // Calls happen in different control flow +} +``` + +```javascript +// Async generators +async function* fetchPages() { + let page = 1; + while (true) { + const data = await fetchPage(page++); + if (!data) break; + yield data; + } +} +``` + +Not implemented. + +## Closures and Lexical Scope + +Closures capture variables from outer scopes, making it hard to track what functions have access to what data. + +### Example + +```javascript +function makeCounter() { + let count = 0; + let increment = () => { + count++; + }; + + return { + inc: increment, + get: () => count, + }; +} + +const counter = makeCounter(); +counter.inc(); // Call to closure function +``` + +```javascript +// IIFE (Immediately Invoked Function Expression) +(function () { + const privateVar = "secret"; + + function privateFunction() { + console.log(privateVar); + } + + privateFunction(); // Call inside IIFE +})(); +``` + +Arrow functions are tracked if assigned to variables, but closure scope is not modeled. + +## Object and Function Factories + +JavaScript commonly uses factory patterns that create objects or functions dynamically. + +### Example + +```javascript +// Function factory +function createValidator(rules) { + return function (data) { + return rules.every((rule) => rule(data)); + }; +} + +const validator = createValidator([rule1, rule2, rule3]); +validator(myData); // Calls anonymous function with captured rules +``` + +```javascript +// Object factory with methods +function createAPI(baseURL) { + return { + get: (path) => fetch(`${baseURL}${path}`), + post: (path, data) => + fetch(`${baseURL}${path}`, { + method: "POST", + body: JSON.stringify(data), + }), + }; +} + +const api = createAPI("https://api.example.com"); +api.get("/users"); // Method on factory-created object +``` + +Requires tracking object/function construction through factory calls and modeling their properties. + +## Reflect and Meta programming + +JavaScript's reflection APIs allow completely dynamic code execution. + +### Example + +```javascript +// Reflect API +Reflect.apply(myFunction, thisArg, [arg1, arg2]); +Reflect.construct(MyClass, [arg1, arg2]); + +// Getting methods dynamically +const method = Reflect.get(obj, "methodName"); +method(); +``` + +```javascript +// Property descriptors +Object.defineProperty(obj, "computed", { + get() { + return this.calculate(); // Dynamic getter + }, +}); + +obj.computed; // Triggers getter, calls calculate() +``` + +Not implemented. Requires runtime semantics. + +## Class Field Initializers and Static Blocks + +Modern JavaScript class features include field initializers that execute during construction. + +### Example + +```javascript +class Component { + // Field with function call initializer + id = generateId(); + + // Field with arrow function + handler = () => this.process(); + + // Static initialization block + static { + Component.registry = new Map(); + Component.register(); + } + + static register() { + console.log("Registered"); + } +} + +new Component(); // Triggers field initializers and static block +``` + +Class field initializers are not processed; static blocks are not recognized. +Requires handling class initialization semantics and static block execution order. diff --git a/plugin/callgraph/assignment.go b/plugin/callgraph/assignment.go index 87a9d41..c3d0869 100644 --- a/plugin/callgraph/assignment.go +++ b/plugin/callgraph/assignment.go @@ -28,18 +28,34 @@ func (an *assignmentNode) IsLiteralValue() bool { } type assignmentGraph struct { - Assignments map[string]*assignmentNode // Map of identifier to possible namespaces or other identifiers + Assignments map[string]*assignmentNode // Map of identifier to possible namespaces or other identifiers + nodeCount int // Track number of assignment nodes + limitExceeded bool // Flag when limit is exceeded + resolveCache map[string][]*assignmentNode // Cache for resolve() results } func newAssignmentGraph() *assignmentGraph { - return &assignmentGraph{Assignments: make(map[string]*assignmentNode)} + return &assignmentGraph{ + Assignments: make(map[string]*assignmentNode), + nodeCount: 0, + limitExceeded: false, + resolveCache: make(map[string][]*assignmentNode), + } } func (ag *assignmentGraph) addNode(identifier string, treeNode *sitter.Node) *assignmentNode { existingAssignmentNode, exists := ag.Assignments[identifier] if !exists { + // Check limit before adding new node + if ag.nodeCount >= maxAssignmentGraphNodes && !ag.limitExceeded { + // Log warning once when limit is first exceeded + // We still add the node to prevent nil errors but mark as exceeded + ag.limitExceeded = true + } + ag.Assignments[identifier] = newAssignmentGraphNode(identifier, treeNode) + ag.nodeCount++ } else if treeNode != nil && existingAssignmentNode.TreeNode == nil { // If the existing node has no tree node, we can set it now ag.Assignments[identifier].TreeNode = treeNode @@ -52,12 +68,16 @@ func (ag *assignmentGraph) addNode(identifier string, treeNode *sitter.Node) *as func (ag *assignmentGraph) addAssignment(identifier string, identifierTreeNode *sitter.Node, target string, targetTreeNode *sitter.Node) { if _, exists := ag.Assignments[identifier]; !exists { ag.Assignments[identifier] = newAssignmentGraphNode(identifier, identifierTreeNode) + ag.nodeCount++ } if _, exists := ag.Assignments[target]; !exists { ag.Assignments[target] = newAssignmentGraphNode(target, targetTreeNode) + ag.nodeCount++ } if !slices.Contains(ag.Assignments[identifier].AssignedTo, target) { ag.Assignments[identifier].AssignedTo = append(ag.Assignments[identifier].AssignedTo, target) + // Invalidate cache for this identifier since assignment changed + delete(ag.resolveCache, identifier) } if !slices.Contains(ag.Assignments[target].AssignedBy, identifier) { ag.Assignments[target].AssignedBy = append(ag.Assignments[target].AssignedBy, identifier) @@ -66,32 +86,64 @@ func (ag *assignmentGraph) addAssignment(identifier string, identifierTreeNode * // resolves an identifier to its assignment targets (leaf nodes of the DFS tree) // For example, if a = b, b = c, b = d, then resolving a will return {c, d} +// Detects cycles and returns the node itself if a cycle is encountered +// Results are cached to avoid recomputing expensive resolution chains func (ag *assignmentGraph) resolve(identifier string) []*assignmentNode { + // Check cache first + if cached, exists := ag.resolveCache[identifier]; exists { + return cached + } + targets := utils.PtrTo([]*assignmentNode{}) visited := make(map[string]bool) - ag.resolveUtil(identifier, visited, targets) + inProgress := make(map[string]bool) // Track nodes currently being processed + ag.resolveUtil(identifier, visited, inProgress, targets) + + // Cache the result + ag.resolveCache[identifier] = *targets + return *targets } // Utility function to resolve the identifier to its targets recursively -func (ag *assignmentGraph) resolveUtil(currentIdentifier string, visited map[string]bool, targets *[]*assignmentNode) { +// Uses cycle detection: inProgress tracks nodes in current recursion path +func (ag *assignmentGraph) resolveUtil(currentIdentifier string, visited map[string]bool, inProgress map[string]bool, targets *[]*assignmentNode) { + // Cycle detected: current node is already being processed in recursion stack + if inProgress[currentIdentifier] { + // Break the cycle by treating this as a leaf node + identifierNode, exists := ag.Assignments[currentIdentifier] + if exists { + *targets = append(*targets, identifierNode) + } + return + } + + // Already fully processed in a different branch - skip to avoid duplicates if visited[currentIdentifier] { return } + + // Mark as in progress before recursing + inProgress[currentIdentifier] = true visited[currentIdentifier] = true identifierNode, exists := ag.Assignments[currentIdentifier] if !exists { + delete(inProgress, currentIdentifier) return } // If the current identifier has no assignments, it's a leaf node if len(identifierNode.AssignedTo) == 0 { *targets = append(*targets, identifierNode) + delete(inProgress, currentIdentifier) return } for _, targetIdentifier := range identifierNode.AssignedTo { - ag.resolveUtil(targetIdentifier, visited, targets) + ag.resolveUtil(targetIdentifier, visited, inProgress, targets) } + + // Remove from in-progress after processing all children + delete(inProgress, currentIdentifier) } diff --git a/plugin/callgraph/assignment_cycle_test.go b/plugin/callgraph/assignment_cycle_test.go new file mode 100644 index 0000000..e4c426b --- /dev/null +++ b/plugin/callgraph/assignment_cycle_test.go @@ -0,0 +1,163 @@ +package callgraph + +import ( + "testing" +) + +// TestAssignmentGraphCycleDetection tests that cycle detection works correctly +// and that inProgress map is properly cleaned up +func TestAssignmentGraphCycleDetection(t *testing.T) { + tests := []struct { + name string + assignments map[string][]string // identifier -> assignedTo list + resolveKey string + wantLeaves []string // expected leaf namespaces + }{ + { + name: "simple_cycle_A_to_B_to_A", + assignments: map[string][]string{ + "A": {"B"}, + "B": {"A"}, + }, + resolveKey: "A", + wantLeaves: []string{"A"}, // Cycle detected at A (first node in recursion path) + }, + { + name: "three_node_cycle", + assignments: map[string][]string{ + "A": {"B"}, + "B": {"C"}, + "C": {"A"}, + }, + resolveKey: "A", + wantLeaves: []string{"A"}, // First cycle detection point + }, + { + name: "cycle_with_branch", + assignments: map[string][]string{ + "A": {"B"}, + "B": {"C", "D"}, + "C": {"B"}, // Cycle: B -> C -> B + "D": {}, // Leaf + }, + resolveKey: "A", + wantLeaves: []string{"B", "D"}, // B is cycle point, D is true leaf + }, + { + name: "self_cycle", + assignments: map[string][]string{ + "A": {"A"}, // Self-referential + }, + resolveKey: "A", + wantLeaves: []string{"A"}, + }, + { + name: "no_cycle_simple_chain", + assignments: map[string][]string{ + "A": {"B"}, + "B": {"C"}, + "C": {}, + }, + resolveKey: "A", + wantLeaves: []string{"C"}, + }, + { + name: "multiple_resolves_same_graph", + assignments: map[string][]string{ + "A": {"B"}, + "B": {"A"}, + }, + resolveKey: "B", // Resolve from different starting point + wantLeaves: []string{"B"}, // Cycle detected at B when resolving from B + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ag := newAssignmentGraph() + + // Build the assignment graph + for identifier, targets := range tt.assignments { + for _, target := range targets { + ag.addAssignment(identifier, nil, target, nil) + } + } + + // Resolve from the specified key + resolved := ag.resolve(tt.resolveKey) + + // Check that we got the expected number of leaves + if len(resolved) != len(tt.wantLeaves) { + t.Errorf("resolve(%s) returned %d leaves, want %d", tt.resolveKey, len(resolved), len(tt.wantLeaves)) + t.Logf("Got leaves: %v", getNamespaces(resolved)) + t.Logf("Want leaves: %v", tt.wantLeaves) + } + + // Check that all expected leaves are present + gotNamespaces := make(map[string]bool) + for _, node := range resolved { + gotNamespaces[node.Namespace] = true + } + + for _, wantNs := range tt.wantLeaves { + if !gotNamespaces[wantNs] { + t.Errorf("resolve(%s) missing expected leaf %s", tt.resolveKey, wantNs) + } + } + + // Test that multiple resolves work correctly (cache + no dangling inProgress) + resolved2 := ag.resolve(tt.resolveKey) + if len(resolved2) != len(resolved) { + t.Errorf("Second resolve(%s) returned different result count: %d vs %d", + tt.resolveKey, len(resolved2), len(resolved)) + } + }) + } +} + +// TestAssignmentGraphInProgressCleanup specifically tests that inProgress map +// doesn't have dangling entries that could cause issues +func TestAssignmentGraphInProgressCleanup(t *testing.T) { + ag := newAssignmentGraph() + + // Create a complex cycle scenario + // A -> B -> C -> B (cycle) + // A -> D -> E (no cycle) + ag.addAssignment("A", nil, "B", nil) + ag.addAssignment("A", nil, "D", nil) + ag.addAssignment("B", nil, "C", nil) + ag.addAssignment("C", nil, "B", nil) // Cycle + ag.addAssignment("D", nil, "E", nil) + + // First resolve - should work + resolved1 := ag.resolve("A") + if len(resolved1) == 0 { + t.Fatal("First resolve returned no results") + } + + // Second resolve - if inProgress had dangling entries, this might behave differently + resolved2 := ag.resolve("A") + if len(resolved2) != len(resolved1) { + t.Errorf("Second resolve returned different count: %d vs %d", len(resolved2), len(resolved1)) + } + + // Resolve from different starting point - should also work + resolved3 := ag.resolve("B") + if len(resolved3) == 0 { + t.Fatal("Resolve from B returned no results") + } + + // Cache should work - resolve again + resolved4 := ag.resolve("B") + if len(resolved4) != len(resolved3) { + t.Errorf("Cached resolve returned different count: %d vs %d", len(resolved4), len(resolved3)) + } +} + +func getNamespaces(nodes []*assignmentNode) []string { + result := make([]string, len(nodes)) + for i, node := range nodes { + result[i] = node.Namespace + } + return result +} diff --git a/plugin/callgraph/callgraph.go b/plugin/callgraph/callgraph.go index 3a973c2..32eee94 100644 --- a/plugin/callgraph/callgraph.go +++ b/plugin/callgraph/callgraph.go @@ -12,6 +12,14 @@ import ( const namespaceSeparator = "//" +const ( + // Maximum number of nodes in callgraph before warning + // Prevents memory exhaustion in extremely large/complex files + maxCallGraphNodes = 50000 + maxAssignmentGraphNodes = 100000 + maxDFSResultItems = 100000 +) + // Refers to one argument passed to a function call // For example, in the function call `foo(a, b, c)`, there are // three CallArgument instances, one for each of `a`, `b`, and `c @@ -90,6 +98,8 @@ type CallGraph struct { Tree core.ParseTree assignmentGraph assignmentGraph classConstructors map[string]bool + nodeCount int // Track total nodes added + limitExceeded bool // Flag to indicate if processing was truncated } func newCallGraph(fileName string, rootNode *sitter.Node, imports []*ast.ImportNode, tree core.ParseTree) (*CallGraph, error) { @@ -149,11 +159,28 @@ func (cg *CallGraph) ensureNodeInAssignmentGraph(identifier string, treeNode *si } func (cg *CallGraph) addNode(identifier string, treeNode *sitter.Node) *CallGraphNode { + // Check if we've exceeded node limits + if cg.nodeCount >= maxCallGraphNodes && !cg.limitExceeded { + log.Warnf("CallGraph node limit (%d) exceeded for file %s, some nodes may be skipped", maxCallGraphNodes, cg.FileName) + cg.limitExceeded = true + } + + // Still add to assignment graph for resolution, but skip callgraph if limit exceeded cg.ensureNodeInAssignmentGraph(identifier, treeNode) existingCgNode, exists := cg.Nodes[identifier] if !exists { - cg.Nodes[identifier] = newCallGraphNode(identifier, treeNode) + if cg.nodeCount < maxCallGraphNodes { + cg.Nodes[identifier] = newCallGraphNode(identifier, treeNode) + cg.nodeCount++ + } else { + // Return a stub node to prevent nil pointer errors + return &CallGraphNode{ + Namespace: identifier, + CallsTo: []CallReference{}, + TreeNode: treeNode, + } + } } else if treeNode != nil && existingCgNode.TreeNode == nil { // If the existing node has no tree node, we can set it now cg.Nodes[identifier].TreeNode = treeNode @@ -260,19 +287,31 @@ func (dri DfsResultItem) ToString() string { func (cg *CallGraph) DFS() []DfsResultItem { visited := make(map[string]bool) var dfsResult []DfsResultItem + resultCount := 0 // Initially Interpret callgraph in its natural execution order starting from // the file name which has reference for entrypoints (if any) - cg.dfsUtil(cg.FileName, cg.RootNode, nil, []CallArgument{}, visited, &dfsResult, 0) + cg.dfsUtil(cg.FileName, cg.RootNode, nil, []CallArgument{}, visited, &dfsResult, 0, &resultCount) + + // Check if we hit the limit during initial traversal + if resultCount >= maxDFSResultItems { + log.Warnf("DFS result limit (%d) reached for file %s, some paths may be incomplete", maxDFSResultItems, cg.FileName) + return dfsResult + } // Assumption - All functions and class constructors are reachable // This is required because most files only expose their classes/functions // which are imported and used by other files, so an entrypoint may not be // present in every file. for namespace, node := range cg.Nodes { + if resultCount >= maxDFSResultItems { + log.Warnf("DFS result limit (%d) reached for file %s, stopping traversal", maxDFSResultItems, cg.FileName) + break + } + if node.TreeNode != nil && dfsSourceNodeTypes[node.TreeNode.Type()] { if !visited[namespace] { - cg.dfsUtil(namespace, cg.RootNode, nil, []CallArgument{}, visited, &dfsResult, 0) + cg.dfsUtil(namespace, cg.RootNode, nil, []CallArgument{}, visited, &dfsResult, 0, &resultCount) } } } @@ -280,12 +319,21 @@ func (cg *CallGraph) DFS() []DfsResultItem { return dfsResult } -func (cg *CallGraph) dfsUtil(namespace string, caller *CallGraphNode, callerIdentifier *sitter.Node, arguments []CallArgument, visited map[string]bool, result *[]DfsResultItem, depth int) { +func (cg *CallGraph) dfsUtil(namespace string, caller *CallGraphNode, callerIdentifier *sitter.Node, arguments []CallArgument, visited map[string]bool, result *[]DfsResultItem, depth int, resultCount *int) { + // Check result count limit + if *resultCount >= maxDFSResultItems { + return + } + callgraphNode, callgraphNodeExists := cg.Nodes[namespace] if visited[namespace] { resolvedAssignmentTerminals := cg.assignmentGraph.resolve(namespace) for _, terminalAssignmentNode := range resolvedAssignmentTerminals { + if *resultCount >= maxDFSResultItems { + return + } + terminalCallgraphNode, terminalCallgraphNodeExists := cg.Nodes[terminalAssignmentNode.Namespace] if terminalCallgraphNodeExists { *result = append(*result, DfsResultItem{ @@ -297,6 +345,7 @@ func (cg *CallGraph) dfsUtil(namespace string, caller *CallGraphNode, callerIden Depth: depth, Terminal: true, }) + *resultCount++ } } return @@ -313,12 +362,16 @@ func (cg *CallGraph) dfsUtil(namespace string, caller *CallGraphNode, callerIden Depth: depth, Terminal: !callgraphNodeExists || len(callgraphNode.CallsTo) == 0, }) + *resultCount++ assignmentGraphNode, assignmentNodeExists := cg.assignmentGraph.Assignments[namespace] if assignmentNodeExists { // Recursively visit all the nodes assigned to the current node for _, assigned := range assignmentGraphNode.AssignedTo { - cg.dfsUtil(assigned, caller, callerIdentifier, arguments, visited, result, depth) + if *resultCount >= maxDFSResultItems { + return + } + cg.dfsUtil(assigned, caller, callerIdentifier, arguments, visited, result, depth, resultCount) } } @@ -326,7 +379,10 @@ func (cg *CallGraph) dfsUtil(namespace string, caller *CallGraphNode, callerIden // Any variable assignment would be ignored here, since it won't be in callgraph if callgraphNodeExists { for _, callRef := range callgraphNode.CallsTo { - cg.dfsUtil(callRef.CalleeNamespace, callgraphNode, callRef.CallerIdentifier, callRef.Arguments, visited, result, depth+1) + if *resultCount >= maxDFSResultItems { + return + } + cg.dfsUtil(callRef.CalleeNamespace, callgraphNode, callRef.CallerIdentifier, callRef.Arguments, visited, result, depth+1, resultCount) } } } @@ -339,3 +395,8 @@ func (cg *CallGraph) getInstanceKeyword() (string, bool) { } return resolveInstanceKeyword(language) } + +// LimitExceeded returns true if processing was truncated due to complexity limits +func (cg *CallGraph) LimitExceeded() bool { + return cg.limitExceeded +} diff --git a/plugin/callgraph/fixtures/testComplexJavascript.js b/plugin/callgraph/fixtures/testComplexJavascript.js new file mode 100644 index 0000000..162a424 --- /dev/null +++ b/plugin/callgraph/fixtures/testComplexJavascript.js @@ -0,0 +1,48 @@ +// Complex JavaScript file to test infinite loop/high complexity fixes + +// Deep member expression nesting (should trigger depth limit) +const deepNested = a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z; + +// Polymorphic assignments (should trigger polymorphic limit) +let poly = new ClassA(); +poly = new ClassB(); +poly = new ClassC(); +poly = new ClassD(); +poly = new ClassE(); +poly.method1(); +poly.method2(); + +// Circular-like assignments +let x = y; +let y = z; +let z = w; +let w = x; // Creates potential cycle + +// Many classes to create large assignment graph +class Class1 { method() {} } +class Class2 { method() {} } +class Class3 { method() {} } +class Class4 { method() {} } +class Class5 { method() {} } +class Class6 { method() {} } +class Class7 { method() {} } +class Class8 { method() {} } +class Class9 { method() {} } +class Class10 { method() {} } + +// Chained method calls +obj.method1().method2().method3().method4().method5().method6().method7().method8(); + +// Deeply nested object access +const result = obj.prop1.prop2.prop3.prop4.prop5.prop6.prop7.prop8.prop9.prop10; + +// Multiple reassignments +let variable = obj1; +variable = obj2; +variable = obj3; +variable = obj4; +variable = obj5; +variable.doSomething(); + +// Complex member expressions with calls +a.b.c().d.e().f.g().h.i().j.k().l.m().n.o().p.q(); diff --git a/plugin/callgraph/fixtures/testJavascript.js b/plugin/callgraph/fixtures/testJavascript.js new file mode 100644 index 0000000..be310f8 --- /dev/null +++ b/plugin/callgraph/fixtures/testJavascript.js @@ -0,0 +1,102 @@ +// Import statements +const fs = require('fs'); +const { readFile, writeFile } = require('fs/promises'); +import axios from 'axios'; +import { log, warn } from 'console'; +const sqlite3 = require('sqlite3'); + +// Simple function declaration +function simpleFunction(param1, param2) { + log("Simple function called"); + return param1 + param2; +} + +// Arrow function +const arrowFunc = (x) => { + warn("Arrow function called"); + return x * 2; +}; + +// Class with constructor and methods +class TestClass { + constructor(name, value) { + this.name = name; + this.value = value; + log("TestClass constructor"); + } + + helperMethod() { + log("Called helper method"); + return this.value; + } + + deepMethod() { + this.helperMethod(); + log("Called deep method"); + return "Success"; + } + + async asyncMethod() { + const result = await readFile("test.txt"); + log("Async method"); + return result; + } +} + +// Create instance and call methods +const instance = new TestClass("test", 42); +instance.helperMethod(); +instance.deepMethod(); + +// Module-level function calls - these should now be tracked! +simpleFunction(1, 2); +arrowFunc(5); + +// Additional module-level test +log("Module level call"); + +// Method calls on imported modules +fs.readFileSync("file.txt"); +axios.get("https://example.com"); + +// Chained method calls +const result = instance.helperMethod().toString(); + +// Assignment from method call +const name = instance.name; +const value = instance.helperMethod(); + +// Multiple class instances +class ClassA { + method1() { + log("ClassA method1"); + } + method2() { + warn("ClassA method2"); + } +} + +class ClassB { + method1() { + log("ClassB method1"); + } + method2() { + warn("ClassB method2"); + } + methodUnique() { + log("ClassB unique"); + } +} + +// Polymorphic assignment +let x = new ClassA(); +x = new ClassB(); +x.method1(); + +const y = x; +y.method1(); +y.method2(); +y.methodUnique(); + +// Member expression constructor test +const db = new sqlite3.Database(':memory:'); diff --git a/plugin/callgraph/plugin.go b/plugin/callgraph/plugin.go index 7246e8d..00e9743 100644 --- a/plugin/callgraph/plugin.go +++ b/plugin/callgraph/plugin.go @@ -39,6 +39,7 @@ var supportedLanguages = []core.LanguageCode{ core.LanguageCodePython, core.LanguageCodeJava, core.LanguageCodeGo, + core.LanguageCodeJavascript, } func (p *callgraphPlugin) SupportedLanguages() []core.LanguageCode { @@ -114,10 +115,20 @@ func processChildren(node *sitter.Node, treeData []byte, currentNamespace string return newProcessorResult() } + // Check processing depth to prevent stack overflow in deeply nested structures + if metadata.processingDepth > maxProcessingDepth { + log.Warnf("Maximum processing depth (%d) exceeded at namespace %s, skipping children", maxProcessingDepth, currentNamespace) + return newProcessorResult() + } + childrenResults := newProcessorResult() + // Increment depth for child processing + childMetadata := metadata + childMetadata.processingDepth++ + for i := 0; i < int(node.ChildCount()); i++ { - result := processNode(node.Child(i), treeData, currentNamespace, callGraph, metadata) + result := processNode(node.Child(i), treeData, currentNamespace, callGraph, childMetadata) childrenResults.addResults(result) } diff --git a/plugin/callgraph/plugin_test.go b/plugin/callgraph/plugin_test.go index f6641b1..4b44a44 100644 --- a/plugin/callgraph/plugin_test.go +++ b/plugin/callgraph/plugin_test.go @@ -640,6 +640,91 @@ var testcases = []callgraphTestcase{ {Namespace: "fixtures/testGoNestedImports.go//parseJSON", CallerNamespace: "fixtures/testGoNestedImports.go//main", CallerIdentifierContent: "parseJSON"}, }, }, + { + Language: core.LanguageCodeJavascript, + FilePath: "fixtures/testJavascript.js", + ExpectedAssignmentGraph: map[string][]string{ + "fs": {}, + "axios": {}, + "sqlite3": {}, + "readFile": {"fs//promises//readFile"}, + "writeFile": {"fs//promises//writeFile"}, + "log": {"console//log"}, + "warn": {"console//warn"}, + "fixtures/testJavascript.js//simpleFunction": {}, + "fixtures/testJavascript.js//TestClass": {}, + "fixtures/testJavascript.js//instance": {"fixtures/testJavascript.js//TestClass"}, + "fixtures/testJavascript.js//db": {"sqlite3//Database"}, + }, + ExpectedCallGraph: map[string][]expectedCallgraphRefs{ + "fixtures/testJavascript.js": { + {"fixtures/testJavascript.js//require", [][]string{}}, + {"fixtures/testJavascript.js//require", [][]string{}}, + {"fixtures/testJavascript.js//require", [][]string{}}, + {"fixtures/testJavascript.js//TestClass", [][]string{}}, + {"fixtures/testJavascript.js//TestClass//helperMethod", [][]string{}}, + {"fixtures/testJavascript.js//TestClass//deepMethod", [][]string{}}, + {"fixtures/testJavascript.js//simpleFunction", [][]string{}}, + {"fixtures/testJavascript.js//arrowFunc", [][]string{}}, + {"log", [][]string{}}, + {"fs//readFileSync", [][]string{}}, + {"axios//get", [][]string{}}, + {"instance.helperMethod()//toString", [][]string{}}, + {"fixtures/testJavascript.js//TestClass//helperMethod", [][]string{}}, + {"fixtures/testJavascript.js//ClassA", [][]string{}}, + {"fixtures/testJavascript.js//ClassB", [][]string{}}, + {"fixtures/testJavascript.js//ClassA//method1", [][]string{}}, + {"fixtures/testJavascript.js//ClassA//method1", [][]string{}}, + {"fixtures/testJavascript.js//ClassA//method2", [][]string{}}, + {"fixtures/testJavascript.js//ClassA//methodUnique", [][]string{}}, + {"sqlite3//Database", [][]string{}}, + }, + "fixtures/testJavascript.js//simpleFunction": { + {"log", [][]string{}}, + }, + "fixtures/testJavascript.js//arrowFunc": { + {"warn", [][]string{}}, + }, + "fixtures/testJavascript.js//TestClass//constructor": { + {"log", [][]string{}}, + }, + "fixtures/testJavascript.js//TestClass//helperMethod": { + {"log", [][]string{}}, + }, + "fixtures/testJavascript.js//TestClass//deepMethod": { + {"fixtures/testJavascript.js//TestClass//this//helperMethod", [][]string{}}, + {"log", [][]string{}}, + }, + "fixtures/testJavascript.js//ClassA//method1": { + {"log", [][]string{}}, + }, + "fixtures/testJavascript.js//ClassA//method2": { + {"warn", [][]string{}}, + }, + "fixtures/testJavascript.js//ClassB//method1": { + {"log", [][]string{}}, + }, + "fixtures/testJavascript.js//ClassB//method2": { + {"warn", [][]string{}}, + }, + "fixtures/testJavascript.js//ClassB//methodUnique": { + {"log", [][]string{}}, + }, + }, + ExpectedDfsResults: []dfsResultExpectation{ + {Namespace: "fixtures/testJavascript.js//simpleFunction", CallerNamespace: "fixtures/testJavascript.js", CallerIdentifierContent: "simpleFunction"}, + {Namespace: "fixtures/testJavascript.js//arrowFunc", CallerNamespace: "fixtures/testJavascript.js", CallerIdentifierContent: "arrowFunc"}, + {Namespace: "console//log", CallerNamespace: "fixtures/testJavascript.js", CallerIdentifierContent: "log"}, + {Namespace: "console//log", CallerNamespace: "fixtures/testJavascript.js//simpleFunction", CallerIdentifierContent: "log"}, + {Namespace: "console//warn", CallerNamespace: "fixtures/testJavascript.js//arrowFunc", CallerIdentifierContent: "warn"}, + {Namespace: "console//log", CallerNamespace: "fixtures/testJavascript.js//TestClass//constructor", CallerIdentifierContent: "log"}, + {Namespace: "console//log", CallerNamespace: "fixtures/testJavascript.js//TestClass//helperMethod", CallerIdentifierContent: "log"}, + {Namespace: "console//log", CallerNamespace: "fixtures/testJavascript.js//TestClass//deepMethod", CallerIdentifierContent: "log"}, + {Namespace: "console//log", CallerNamespace: "fixtures/testJavascript.js//ClassA//method1", CallerIdentifierContent: "log"}, + {Namespace: "console//warn", CallerNamespace: "fixtures/testJavascript.js//ClassA//method2", CallerIdentifierContent: "warn"}, + {Namespace: "fs//readFileSync", CallerNamespace: "fixtures/testJavascript.js", CallerIdentifierContent: "fs.readFileSync"}, + }, + }, } func TestCallgraphPlugin(t *testing.T) { diff --git a/plugin/callgraph/processors.go b/plugin/callgraph/processors.go index cd39596..a12a63e 100644 --- a/plugin/callgraph/processors.go +++ b/plugin/callgraph/processors.go @@ -10,9 +10,22 @@ import ( sitter "github.com/smacker/go-tree-sitter" ) +const ( + // Maximum recursion depth for nested member expressions and attributes + // Prevents infinite recursion in pathological JavaScript files + maxMemberExpressionDepth = 10 + maxAttributeDepth = 10 + maxProcessingDepth = 50 + + // Maximum number of resolved objects to process for polymorphic variables + // Prevents combinatorial explosion in complex type scenarios + maxPolymorphicResolutions = 20 +) + type processorMetadata struct { insideClass bool insideFunction bool + processingDepth int // Track recursion depth for processChildren } type processorResult struct { @@ -55,7 +68,7 @@ func init() { nodeProcessors = map[string]nodeProcessor{ "module": emptyProcessor, "program": emptyProcessor, - "expression_statement": skipResultsProcessor, + "expression_statement": expressionStatementProcessorWrapper, "binary_operator": binaryOperatorProcessor, "identifier": identifierProcessor, "class_definition": classDefinitionProcessor, @@ -86,10 +99,17 @@ func init() { "method_declaration": goMethodDeclarationProcessorWrapper, "assignment_expression": assignmentProcessor, - // Go-specific - "call_expression": goCallExpressionProcessorWrapper, - "function_declaration": goFunctionDeclarationProcessorWrapper, + // Go and JavaScript shared + "call_expression": callExpressionProcessorWrapper, + "function_declaration": functionDeclarationProcessorWrapper, "source_file": emptyProcessor, + + // JavaScript-specific + "member_expression": memberExpressionProcessor, + "arrow_function": arrowFunctionProcessor, + "method_definition": methodDefinitionProcessor, + "new_expression": jsNewExpressionProcessor, + "lexical_declaration": lexicalDeclarationProcessor, } for literalNodeType := range literalNodeTypes { @@ -129,6 +149,43 @@ func skipResultsProcessor(emptyNode *sitter.Node, treeData []byte, currentNamesp return newProcessorResult() } +// expressionStatementProcessorWrapper handles expression_statement nodes +// For module-level statements in JavaScript, we want to track calls +// For statements inside functions/classes, we skip result propagation +func expressionStatementProcessorWrapper(node *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if node == nil { + return newProcessorResult() + } + + // Check if we're at module level (not inside a function or class method body) + // For JavaScript at module level, we want to track the calls + if !metadata.insideFunction && !metadata.insideClass { + // Module-level: process children and track calls in the module namespace + for i := 0; i < int(node.ChildCount()); i++ { + childNode := node.Child(i) + if childNode == nil { + continue + } + + childResult := processNode(childNode, treeData, currentNamespace, callGraph, metadata) + + // Register any immediate calls from module level + for _, callRef := range childResult.ImmediateCallRefs { + callGraph.addEdge( + currentNamespace, nil, callRef.CallerIdentifier, + callRef.CalleeNamespace, callRef.CalleeTreeNode, + callRef.Arguments, + ) + } + } + } else { + // Inside function/class: skip results propagation (original behavior) + processChildren(node, treeData, currentNamespace, callGraph, metadata) + } + + return newProcessorResult() +} + // emptyProcessor processes its children and propagates results to parent node, func emptyProcessor(emptyNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { if emptyNode == nil { @@ -652,10 +709,21 @@ func searchSymbolInScopeChain(symbol string, currentNamespace string, callGraph // This can be used to identify correct objNamespace for objectSymbol, finally resulting // objNamespace//attributeQualifierNamespace func dissectAttributeQualifier(node *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) (string, string, error) { + return dissectAttributeQualifierWithDepth(node, treeData, currentNamespace, callGraph, metadata, 0) +} + +// dissectAttributeQualifierWithDepth is the internal implementation with depth tracking +func dissectAttributeQualifierWithDepth(node *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata, depth int) (string, string, error) { if node == nil { return "", "", fmt.Errorf("fnAttributeResolver - node is nil") } + // Prevent excessive recursion in deeply nested attributes + if depth > maxAttributeDepth { + log.Warnf("Maximum attribute depth (%d) exceeded for %s, truncating", maxAttributeDepth, node.Content(treeData)) + return node.Content(treeData), "", nil + } + if node.Type() == "identifier" { return node.Content(treeData), "", nil } @@ -677,7 +745,7 @@ func dissectAttributeQualifier(node *sitter.Node, treeData []byte, currentNamesp return "", "", fmt.Errorf("sub-attribute node not found for attribute - %s", node.Content(treeData)) } - objectSymbol, objectSubAttributeNamespace, err := dissectAttributeQualifier(objectNode, treeData, currentNamespace, callGraph, metadata) + objectSymbol, objectSubAttributeNamespace, err := dissectAttributeQualifierWithDepth(objectNode, treeData, currentNamespace, callGraph, metadata, depth+1) if err != nil { return "", "", err } @@ -1032,29 +1100,39 @@ func resolveQualifierObjectFieldaccess(invokedObjNode *sitter.Node, treeData []b // Go-specific ------ -// Wrapper functions to check language before processing Go-specific nodes +// Wrapper functions to check language before processing language-specific nodes -func goCallExpressionProcessorWrapper(node *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { +func callExpressionProcessorWrapper(node *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { treeLanguage, err := callGraph.Tree.Language() - if err != nil || treeLanguage.Meta().Code != core.LanguageCodeGo { - // Not Go, skip + if err != nil { + return newProcessorResult() + } + + switch treeLanguage.Meta().Code { + case core.LanguageCodeGo: + return goCallExpressionProcessor(node, treeData, currentNamespace, callGraph, metadata) + case core.LanguageCodeJavascript: + return jsCallExpressionProcessor(node, treeData, currentNamespace, callGraph, metadata) + default: return newProcessorResult() } - return goCallExpressionProcessor(node, treeData, currentNamespace, callGraph, metadata) } -func goFunctionDeclarationProcessorWrapper(node *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { +func functionDeclarationProcessorWrapper(node *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { treeLanguage, err := callGraph.Tree.Language() if err != nil { return newProcessorResult() } - if treeLanguage.Meta().Code == core.LanguageCodeGo { + switch treeLanguage.Meta().Code { + case core.LanguageCodeGo: return goFunctionDeclarationProcessor(node, treeData, currentNamespace, callGraph, metadata) + case core.LanguageCodeJavascript: + return jsFunctionDeclarationProcessor(node, treeData, currentNamespace, callGraph, metadata) + default: + // Fallback to default function definition processor for other languages + return functionDefinitionProcessor(node, treeData, currentNamespace, callGraph, metadata) } - - // Fallback to default function definition processor for other languages - return functionDefinitionProcessor(node, treeData, currentNamespace, callGraph, metadata) } func goMethodDeclarationProcessorWrapper(node *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { @@ -1368,3 +1446,427 @@ func extractGoReceiverType(receiverNode *sitter.Node, treeData []byte) string { return "" } + +// JavaScript-specific ------ + +// lexicalDeclarationProcessor handles JavaScript lexical_declaration nodes (const, let, var) +// Routes to variableDeclaratorProcessor which handles the actual assignments +func lexicalDeclarationProcessor(declarationNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if declarationNode == nil { + return newProcessorResult() + } + + // Process all variable_declarator children + for i := 0; uint32(i) < declarationNode.NamedChildCount(); i++ { + declaratorNode := declarationNode.NamedChild(i) + if declaratorNode != nil && declaratorNode.Type() == "variable_declarator" { + processNode(declaratorNode, treeData, currentNamespace, callGraph, metadata) + } + } + + return newProcessorResult() +} + +// jsFunctionDeclarationProcessor handles JavaScript function declarations +// Similar to Go, we register top-level functions as callable from the module namespace +func jsFunctionDeclarationProcessor(funcDefNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if funcDefNode == nil { + return newProcessorResult() + } + + functionNameNode := funcDefNode.ChildByFieldName("name") + if functionNameNode == nil { + log.Errorf("JS function declaration without name - %s", funcDefNode.Content(treeData)) + return newProcessorResult() + } + + funcName := functionNameNode.Content(treeData) + functionNamespace := currentNamespace + namespaceSeparator + funcName + + // Add function to call graph + if _, exists := callGraph.Nodes[functionNamespace]; !exists { + callGraph.addNode(functionNamespace, funcDefNode) + log.Debugf("Register JS function declaration for %s - %s", funcName, functionNamespace) + } + + results := newProcessorResult() + + // Process function body + functionBody := funcDefNode.ChildByFieldName("body") + if functionBody != nil { + metadata.insideFunction = true + result := processChildren(functionBody, treeData, functionNamespace, callGraph, metadata) + metadata.insideFunction = false + results.addResults(result) + } + + return results +} + +// jsCallExpressionProcessor handles JavaScript call_expression nodes +// Examples: +// - console.log("hello") -> console//log +// - myFunc(10) -> myFunc (unqualified function call) +// - obj.method() -> obj//method (method call) +func jsCallExpressionProcessor(callNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if callNode == nil { + return newProcessorResult() + } + + result := newProcessorResult() + + // Get function node + functionNode := callNode.ChildByFieldName("function") + if functionNode == nil { + return result + } + + // Get arguments node + argumentsNode := callNode.ChildByFieldName("arguments") + callArguments := []CallArgument{} + if argumentsNode != nil { + callArguments = resolveCallArguments(argumentsNode, treeData, currentNamespace, callGraph, metadata) + } + + // Resolve function name based on node type + var qualifiedName string + var resolved bool + + switch functionNode.Type() { + case "member_expression": + // Method call: obj.method() or pkg.func() + qualifiedName, resolved = resolveJSMemberExpression(functionNode, treeData, currentNamespace, callGraph) + case "identifier": + // Simple function call: func() + funcName := functionNode.Content(treeData) + qualifiedName, resolved = resolveJSIdentifier(funcName, currentNamespace, callGraph) + default: + // Other types (e.g., function expressions) - try as identifier + qualifiedName = functionNode.Content(treeData) + resolved = true + } + + if !resolved { + return result + } + + // Add edge to call graph + callGraph.addEdge( + currentNamespace, nil, functionNode, + qualifiedName, nil, + callArguments, + ) + + log.Debugf("JS call: %s -> %s", currentNamespace, qualifiedName) + + return result +} + +// resolveJSMemberExpression resolves JavaScript member expressions like obj.method or pkg.func +// Returns the qualified name and whether it was resolved +func resolveJSMemberExpression(memberNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph) (string, bool) { + return resolveJSMemberExpressionWithDepth(memberNode, treeData, currentNamespace, callGraph, 0) +} + +// resolveJSMemberExpressionWithDepth is the internal implementation with depth tracking +func resolveJSMemberExpressionWithDepth(memberNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, depth int) (string, bool) { + if memberNode == nil || memberNode.Type() != "member_expression" { + return "", false + } + + // Prevent excessive recursion in deeply nested member expressions + if depth > maxMemberExpressionDepth { + log.Warnf("Maximum member expression depth (%d) exceeded for %s, using direct resolution", maxMemberExpressionDepth, memberNode.Content(treeData)) + // Fallback: use the entire expression as-is + return memberNode.Content(treeData), true + } + + // Get object (left side) and property (right side) + objectNode := memberNode.ChildByFieldName("object") + propertyNode := memberNode.ChildByFieldName("property") + + if objectNode == nil || propertyNode == nil { + return "", false + } + + // Handle nested member expressions recursively with depth tracking + if objectNode.Type() == "member_expression" { + nestedQualified, resolved := resolveJSMemberExpressionWithDepth(objectNode, treeData, currentNamespace, callGraph, depth+1) + if resolved { + propertyName := propertyNode.Content(treeData) + qualifiedName := nestedQualified + namespaceSeparator + propertyName + log.Debugf("Resolved nested JS member (depth %d): %s", depth, qualifiedName) + return qualifiedName, true + } + } + + objectName := objectNode.Content(treeData) + propertyName := propertyNode.Content(treeData) + + // Check if object is an imported module or variable + objectAssignment, objectResolved := searchSymbolInScopeChain(objectName, currentNamespace, callGraph) + + if objectResolved { + // Could be an imported module or a variable + resolvedObjects := callGraph.assignmentGraph.resolve(objectAssignment.Namespace) + + // Build qualified name from resolved object + if len(resolvedObjects) > 0 { + qualifiedName := resolvedObjects[0].Namespace + namespaceSeparator + propertyName + log.Debugf("Resolved JS member (assigned): %s.%s -> %s", objectName, propertyName, qualifiedName) + return qualifiedName, true + } + } + + // Fallback: construct qualified name directly + qualifiedName := objectName + namespaceSeparator + propertyName + log.Debugf("Resolved JS member (direct): %s.%s -> %s", objectName, propertyName, qualifiedName) + + return qualifiedName, true +} + +// resolveJSIdentifier resolves unqualified JavaScript identifiers +// Returns the qualified name and whether it was resolved +func resolveJSIdentifier(identifier string, currentNamespace string, callGraph *CallGraph) (string, bool) { + // Try to find in scope chain + assignmentNode, found := searchSymbolInScopeChain(identifier, currentNamespace, callGraph) + + if found { + return assignmentNode.Namespace, true + } + + // If not found in scope chain, construct namespace-qualified name + qualifiedName := currentNamespace + namespaceSeparator + identifier + + return qualifiedName, true +} + +// memberExpressionProcessor handles JavaScript member_expression nodes +// This is used for property access, not method calls (which are handled by call_expression) +func memberExpressionProcessor(memberNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if memberNode == nil { + return newProcessorResult() + } + + objectNode := memberNode.ChildByFieldName("object") + propertyNode := memberNode.ChildByFieldName("property") + + if objectNode == nil || propertyNode == nil { + return newProcessorResult() + } + + objectSymbol := objectNode.Content(treeData) + propertyName := propertyNode.Content(treeData) + + targetObject, objectResolved := searchSymbolInScopeChain(objectSymbol, currentNamespace, callGraph) + if !objectResolved { + log.Errorf("Object not found in namespace for member expression - %s.%s", objectSymbol, propertyName) + return newProcessorResult() + } + + resolvedObjects := callGraph.assignmentGraph.resolve(targetObject.Namespace) + + // Limit polymorphic expansion to prevent combinatorial explosion + if len(resolvedObjects) > maxPolymorphicResolutions { + log.Warnf("Member expression %s.%s resolved to %d objects (limit: %d), truncating to prevent explosion", + objectSymbol, propertyName, len(resolvedObjects), maxPolymorphicResolutions) + resolvedObjects = resolvedObjects[:maxPolymorphicResolutions] + } + + result := newProcessorResult() + for _, resolvedObject := range resolvedObjects { + finalMemberNamespace := resolvedObject.Namespace + namespaceSeparator + propertyName + finalMemberNode := callGraph.assignmentGraph.addNode( + finalMemberNamespace, + memberNode, + ) + result.ImmediateAssignments = append(result.ImmediateAssignments, finalMemberNode) + } + + return result +} + +// arrowFunctionProcessor handles JavaScript arrow function expressions +// Arrow functions are treated similarly to function declarations +func arrowFunctionProcessor(arrowNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if arrowNode == nil { + return newProcessorResult() + } + + results := newProcessorResult() + + // Try to find the function name from parent variable_declarator + // e.g., const arrowFunc = (x) => { ... } + functionName := "" + parentNode := arrowNode.Parent() + + for parentNode != nil && functionName == "" { + if parentNode.Type() == "variable_declarator" { + nameNode := parentNode.ChildByFieldName("name") + if nameNode != nil { + functionName = nameNode.Content(treeData) + break + } + } + // Also check for assignment_expression: arrowFunc = (x) => { ... } + if parentNode.Type() == "assignment_expression" { + leftNode := parentNode.ChildByFieldName("left") + if leftNode != nil && leftNode.Type() == "identifier" { + functionName = leftNode.Content(treeData) + break + } + } + parentNode = parentNode.Parent() + } + + // Determine the namespace for this arrow function + arrowFunctionNamespace := currentNamespace + if functionName != "" { + arrowFunctionNamespace = currentNamespace + namespaceSeparator + functionName + + // Register arrow function as a callable node + if _, exists := callGraph.Nodes[arrowFunctionNamespace]; !exists { + callGraph.addNode(arrowFunctionNamespace, arrowNode) + log.Debugf("Register arrow function definition for %s - %s", functionName, arrowFunctionNamespace) + } + + // Mark this as an assignment so it can be resolved when called + callGraph.assignmentGraph.addNode(arrowFunctionNamespace, arrowNode) + } + + // Process arrow function body + bodyNode := arrowNode.ChildByFieldName("body") + if bodyNode != nil { + metadata.insideFunction = true + result := processChildren(bodyNode, treeData, arrowFunctionNamespace, callGraph, metadata) + metadata.insideFunction = false + results.addResults(result) + } + + return results +} + +// methodDefinitionProcessor handles JavaScript method_definition nodes in classes +// This is for class methods, similar to Java methods +func methodDefinitionProcessor(methodDefNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if methodDefNode == nil { + return newProcessorResult() + } + + methodNameNode := methodDefNode.ChildByFieldName("name") + if methodNameNode == nil { + log.Errorf("Method definition without name - %s", methodDefNode.Content(treeData)) + return newProcessorResult() + } + + // Method definition has its own scope, hence its own namespace + methodName := methodNameNode.Content(treeData) + methodNamespace := currentNamespace + namespaceSeparator + methodName + + // Add method to the call graph + if _, exists := callGraph.Nodes[methodNamespace]; !exists { + callGraph.addNode(methodNamespace, methodDefNode) + log.Debugf("Register JavaScript method definition for %s - %s", methodName, methodNamespace) + + // Add virtual method call from class instance => method + if metadata.insideClass { + instanceKeyword, exists := callGraph.getInstanceKeyword() + if exists { + instanceNamespace := currentNamespace + namespaceSeparator + instanceKeyword + namespaceSeparator + methodName + callGraph.addEdge( + instanceNamespace, nil, nil, + methodNamespace, methodDefNode, + []CallArgument{}, + ) + log.Debugf("Register instance method definition for %s - %s\n", methodName, instanceNamespace) + } + + // Register constructor + if methodName == "constructor" { + callGraph.addEdge( + currentNamespace, nil, nil, + methodNamespace, methodDefNode, + []CallArgument{}, + ) + log.Debugf("Register class constructor for %s", currentNamespace) + } + } + } + + results := newProcessorResult() + + // Process method body + methodBody := methodDefNode.ChildByFieldName("body") + if methodBody != nil { + metadata.insideFunction = true + result := processChildren(methodBody, treeData, methodNamespace, callGraph, metadata) + metadata.insideFunction = false + results.addResults(result) + } + + return results +} + +// jsNewExpressionProcessor handles JavaScript new_expression nodes (constructor calls) +// Examples: +// - new TestClass("test", 42) - simple identifier constructor +// - new sqlite3.Database(':memory:') - member expression constructor +func jsNewExpressionProcessor(newNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if newNode == nil { + return newProcessorResult() + } + + result := newProcessorResult() + + // Get constructor name + constructorNode := newNode.ChildByFieldName("constructor") + if constructorNode == nil { + return result + } + + // Get arguments + argumentsNode := newNode.ChildByFieldName("arguments") + callArguments := []CallArgument{} + if argumentsNode != nil { + callArguments = resolveCallArguments(argumentsNode, treeData, currentNamespace, callGraph, metadata) + } + + // Resolve constructor name based on node type (similar to jsCallExpressionProcessor) + var constructorNamespace string + var resolved bool + + switch constructorNode.Type() { + case "member_expression": + // Constructor like: new sqlite3.Database() or new pkg.ClassName() + constructorNamespace, resolved = resolveJSMemberExpression(constructorNode, treeData, currentNamespace, callGraph) + case "identifier": + // Simple constructor: new TestClass() + constructorName := constructorNode.Content(treeData) + constructorNamespace, resolved = resolveJSIdentifier(constructorName, currentNamespace, callGraph) + default: + // Fallback: use content as-is + constructorNamespace = constructorNode.Content(treeData) + resolved = true + } + + if !resolved { + return result + } + + log.Debugf("JS constructor resolved to %s", constructorNamespace) + + // Add constructor call edge + callGraph.addEdge( + currentNamespace, nil, newNode, + constructorNamespace, nil, + callArguments, + ) + + // Try to find the class/constructor in the assignment graph for return value tracking + classAssignment, classResolved := searchSymbolInScopeChain(constructorNamespace, currentNamespace, callGraph) + if classResolved { + result.ImmediateAssignments = append(result.ImmediateAssignments, classAssignment) + } + + return result +} diff --git a/plugin/callgraph/signatures.go b/plugin/callgraph/signatures.go index 2292a18..452aec5 100644 --- a/plugin/callgraph/signatures.go +++ b/plugin/callgraph/signatures.go @@ -115,8 +115,19 @@ func (sm *SignatureMatcher) MatchSignatures(cg *CallGraph) ([]SignatureMatchResu matcherResults := []SignatureMatchResult{} + // Early termination if callgraph is too large or was truncated + if cg.limitExceeded { + log.Warnf("CallGraph for %s was truncated due to size limits, signature matching may be incomplete", cg.FileName) + } + functionCallTrie := trie.NewTrie[[]DfsResultItem]() functionCallResultItems := cg.DFS() + + // Warn if DFS results are very large + if len(functionCallResultItems) > 50000 { + log.Warnf("Large DFS result set (%d items) for file %s, signature matching may be slow", len(functionCallResultItems), cg.FileName) + } + for _, resultItem := range functionCallResultItems { existingResultItem, exists := functionCallTrie.GetWord(resultItem.Namespace) if !exists { diff --git a/plugin/callgraph/signatures_test.go b/plugin/callgraph/signatures_test.go index 73c1bb4..189942e 100644 --- a/plugin/callgraph/signatures_test.go +++ b/plugin/callgraph/signatures_test.go @@ -1,9 +1,13 @@ package callgraph import ( + "context" "testing" callgraphv1 "buf.build/gen/go/safedep/api/protocolbuffers/go/safedep/messages/code/callgraph/v1" + "github.com/safedep/code/core" + "github.com/safedep/code/pkg/test" + "github.com/safedep/code/plugin" "github.com/stretchr/testify/assert" ) @@ -59,3 +63,205 @@ func TestValidateSignatures(t *testing.T) { } } } + +// signatureMatchExpectation defines an expected signature match result +type signatureMatchExpectation struct { + SignatureID string + ShouldMatch bool + ExpectedLanguage core.LanguageCode + MinEvidenceCount int + CalleeContains string // Optional: substring to verify in callee namespace +} + +// signatureMatcherTestCase defines a test case for signature matching +type signatureMatcherTestCase struct { + Name string + Language core.LanguageCode + FilePaths []string + Signatures []*callgraphv1.Signature + ExpectedMatches []signatureMatchExpectation +} + +func TestSignatureMatcher(t *testing.T) { + testCases := []signatureMatcherTestCase{ + { + Name: "JavaScript signatures", + Language: core.LanguageCodeJavascript, + FilePaths: []string{"fixtures/testJavascript.js"}, + Signatures: []*callgraphv1.Signature{ + { + Id: "js.console.log.usage", + Languages: map[string]*callgraphv1.Signature_LanguageMatcher{ + "javascript": { + Match: "any", + Conditions: []*callgraphv1.Signature_LanguageMatcher_SignatureCondition{ + { + Type: "call", + Value: "console/log", + }, + }, + }, + }, + }, + { + Id: "js.filesystem.access", + Languages: map[string]*callgraphv1.Signature_LanguageMatcher{ + "javascript": { + Match: "any", + Conditions: []*callgraphv1.Signature_LanguageMatcher_SignatureCondition{ + { + Type: "call", + Value: "fs/readFileSync", + }, + }, + }, + }, + }, + { + Id: "js.http.request", + Languages: map[string]*callgraphv1.Signature_LanguageMatcher{ + "javascript": { + Match: "any", + Conditions: []*callgraphv1.Signature_LanguageMatcher_SignatureCondition{ + { + Type: "call", + Value: "axios/get", + }, + }, + }, + }, + }, + { + Id: "js.database.constructor", + Languages: map[string]*callgraphv1.Signature_LanguageMatcher{ + "javascript": { + Match: "any", + Conditions: []*callgraphv1.Signature_LanguageMatcher_SignatureCondition{ + { + Type: "call", + Value: "sqlite3/Database", + }, + }, + }, + }, + }, + }, + ExpectedMatches: []signatureMatchExpectation{ + { + SignatureID: "js.console.log.usage", + ShouldMatch: true, + ExpectedLanguage: core.LanguageCodeJavascript, + MinEvidenceCount: 1, + CalleeContains: "log", + }, + { + SignatureID: "js.filesystem.access", + ShouldMatch: true, + ExpectedLanguage: core.LanguageCodeJavascript, + MinEvidenceCount: 1, + CalleeContains: "readFileSync", + }, + { + SignatureID: "js.http.request", + ShouldMatch: true, + ExpectedLanguage: core.LanguageCodeJavascript, + MinEvidenceCount: 1, + CalleeContains: "get", + }, + { + SignatureID: "js.database.constructor", + ShouldMatch: true, + ExpectedLanguage: core.LanguageCodeJavascript, + MinEvidenceCount: 1, + CalleeContains: "Database", + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.Name, func(t *testing.T) { + // Create signature matcher + matcher, err := NewSignatureMatcher(tc.Signatures) + assert.NoError(t, err, "Failed to create signature matcher") + assert.NotNil(t, matcher, "Expected matcher to be non-nil") + + // Setup test context + treeWalker, fileSystem, err := test.SetupBasicPluginContext(tc.FilePaths, []core.LanguageCode{tc.Language}) + assert.NoError(t, err, "Failed to setup plugin context") + + // Collect callgraphs + var capturedCallgraph *CallGraph + callgraphCallback := func(ctx context.Context, cg *CallGraph) error { + capturedCallgraph = cg + return nil + } + + // Execute plugin + pluginExecutor, err := plugin.NewTreeWalkPluginExecutor(treeWalker, []core.Plugin{ + NewCallGraphPlugin(callgraphCallback), + }) + assert.NoError(t, err, "Failed to create plugin executor") + + err = pluginExecutor.Execute(context.Background(), fileSystem) + assert.NoError(t, err, "Failed to execute plugin") + + // Verify we captured a callgraph + assert.NotNil(t, capturedCallgraph, "Expected to capture a callgraph") + + // Run signature matching + matchResults, err := matcher.MatchSignatures(capturedCallgraph) + assert.NoError(t, err, "Failed to match signatures") + + // Create a map for easier assertion + matchedSignatureIds := make(map[string]SignatureMatchResult) + for _, result := range matchResults { + matchedSignatureIds[result.MatchedSignature.Id] = result + } + + // Verify expected matches + for _, expectation := range tc.ExpectedMatches { + t.Run(expectation.SignatureID, func(t *testing.T) { + matchResult, found := matchedSignatureIds[expectation.SignatureID] + + if expectation.ShouldMatch { + assert.True(t, found, "Expected signature %s to match", expectation.SignatureID) + if !found { + return + } + + assert.Equal(t, expectation.ExpectedLanguage, matchResult.MatchedLanguageCode, + "Expected language code to match") + + assert.NotEmpty(t, matchResult.MatchedConditions, "Expected conditions to match") + if len(matchResult.MatchedConditions) == 0 { + return + } + + totalEvidences := 0 + for _, condition := range matchResult.MatchedConditions { + totalEvidences += len(condition.Evidences) + } + assert.GreaterOrEqual(t, totalEvidences, expectation.MinEvidenceCount, + "Expected at least %d evidences", expectation.MinEvidenceCount) + + // Verify callee namespace if specified + if expectation.CalleeContains != "" && totalEvidences > 0 { + evidence := matchResult.MatchedConditions[0].Evidences[0] + treeData, err := capturedCallgraph.Tree.Data() + assert.NoError(t, err) + + metadata := evidence.Metadata(treeData) + assert.NotEmpty(t, metadata.CallerNamespace, "Expected caller namespace") + assert.NotEmpty(t, metadata.CalleeNamespace, "Expected callee namespace") + assert.Contains(t, metadata.CalleeNamespace, expectation.CalleeContains, + "Expected callee namespace to contain '%s'", expectation.CalleeContains) + } + } else { + assert.False(t, found, "Expected signature %s NOT to match", expectation.SignatureID) + } + }) + } + }) + } +}