Skip to content

Commit 1204099

Browse files
committed
fix _validate_cpg_async to return false in case of empty CPG
1 parent f7b7bb5 commit 1204099

File tree

2 files changed

+161
-20
lines changed

2 files changed

+161
-20
lines changed

src/services/cpg_generator.py

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -203,29 +203,78 @@ async def _find_joern_executable(self, container, base_command: str) -> str:
203203
return f"/opt/joern/joern-cli/{base_command}"
204204

205205
async def _validate_cpg_async(self, container, cpg_path: str) -> bool:
206-
"""Validate that CPG file was created successfully"""
206+
"""Validate that CPG file was created successfully and is not empty"""
207207
try:
208208
loop = asyncio.get_event_loop()
209209

210210
def _check_file():
211-
# Check if file exists and get size using a more compatible command
212-
result = container.exec_run(f"ls -la {cpg_path}")
211+
# Check if file exists and get size using stat command
212+
result = container.exec_run(f"stat {cpg_path}")
213213
return result.output.decode('utf-8', errors='ignore').strip()
214214

215-
ls_result = await loop.run_in_executor(None, _check_file)
215+
stat_result = await loop.run_in_executor(None, _check_file)
216216

217-
# If ls succeeded and doesn't show "No such file", the file exists
218-
if "No such file" not in ls_result and cpg_path in ls_result:
219-
logger.info(f"CPG file created: {ls_result}")
220-
return True
221-
else:
222-
logger.error(f"CPG file not found: {ls_result}")
217+
# Check if stat was successful (file exists)
218+
if "No such file" in stat_result or "cannot stat" in stat_result:
219+
logger.error(f"CPG file not found: {stat_result}")
223220
return False
224221

222+
# Extract file size from stat output
223+
# stat output format contains "Size: <bytes>" line
224+
file_size = await self._extract_file_size_async(container, cpg_path)
225+
226+
if file_size is None:
227+
logger.error("Could not determine CPG file size")
228+
return False
229+
230+
# Check if file is too small (empty or nearly empty)
231+
# Joern CPGs typically have a minimum size; even small projects generate CPGs > 1KB
232+
min_cpg_size = 1024 # 1KB minimum
233+
234+
if file_size < min_cpg_size:
235+
logger.error(
236+
f"CPG file is too small ({file_size} bytes), likely empty or corrupted. "
237+
f"Minimum expected size: {min_cpg_size} bytes"
238+
)
239+
return False
240+
241+
logger.info(f"CPG file created successfully: {cpg_path} (size: {file_size} bytes)")
242+
return True
243+
225244
except Exception as e:
226245
logger.error(f"CPG validation failed: {e}")
227246
return False
228247

248+
async def _extract_file_size_async(self, container, cpg_path: str) -> Optional[int]:
249+
"""Extract file size from a file in the container"""
250+
try:
251+
loop = asyncio.get_event_loop()
252+
253+
def _get_size():
254+
# Use a more reliable method to get file size
255+
result = container.exec_run(f"stat -c%s {cpg_path}")
256+
return result.output.decode('utf-8', errors='ignore').strip()
257+
258+
size_str = await loop.run_in_executor(None, _get_size)
259+
260+
# Try to parse the size
261+
try:
262+
return int(size_str)
263+
except ValueError:
264+
# Fallback: try alternative command if stat -c doesn't work
265+
logger.debug(f"stat -c command returned: {size_str}, trying alternative method")
266+
267+
def _get_size_wc():
268+
result = container.exec_run(f"wc -c < {cpg_path}")
269+
return result.output.decode('utf-8', errors='ignore').strip()
270+
271+
size_str = await loop.run_in_executor(None, _get_size_wc)
272+
return int(size_str)
273+
274+
except Exception as e:
275+
logger.error(f"Failed to extract file size: {e}")
276+
return None
277+
229278
async def get_container_id(self, session_id: str) -> Optional[str]:
230279
"""Get container ID for session"""
231280
return self.session_containers.get(session_id)

tests/test_cpg_generator.py

Lines changed: 102 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -236,28 +236,120 @@ async def test_find_joern_executable_not_found(self, cpg_generator):
236236

237237
@pytest.mark.asyncio
238238
async def test_validate_cpg_success(self, cpg_generator):
239-
"""Test successful CPG validation"""
239+
"""Test successful CPG validation with valid file size"""
240240
mock_container = MagicMock()
241-
mock_exec_result = MagicMock()
242-
mock_exec_result.output = b"-rw-r--r-- 1 user user 1024 Jan 1 12:00 /playground/cpgs/session-123.cpg"
243-
mock_container.exec_run = MagicMock(return_value=mock_exec_result)
244-
245-
result = await cpg_generator._validate_cpg_async(mock_container, "/playground/cpgs/session-123.cpg")
241+
242+
# Mock stat command to return file exists
243+
def mock_exec_run(cmd):
244+
mock_result = MagicMock()
245+
if "stat -c%s" in cmd:
246+
# Return file size > 1KB
247+
mock_result.output = b"5242880" # 5MB
248+
else:
249+
mock_result.output = b"stat output..."
250+
return mock_result
251+
252+
mock_container.exec_run = mock_exec_run
253+
254+
result = await cpg_generator._validate_cpg_async(mock_container, "/workspace/cpg.bin")
246255

247256
assert result is True
248257

249258
@pytest.mark.asyncio
250-
async def test_validate_cpg_failure(self, cpg_generator):
251-
"""Test CPG validation failure"""
259+
async def test_validate_cpg_failure_file_not_found(self, cpg_generator):
260+
"""Test CPG validation failure when file doesn't exist"""
252261
mock_container = MagicMock()
253262
mock_exec_result = MagicMock()
254-
mock_exec_result.output = b"ls: cannot access '/playground/cpgs/session-123.cpg': No such file or directory"
263+
mock_exec_result.output = b"stat: cannot stat '/workspace/cpg.bin': No such file or directory"
255264
mock_container.exec_run = MagicMock(return_value=mock_exec_result)
256265

257-
result = await cpg_generator._validate_cpg_async(mock_container, "/playground/cpgs/session-123.cpg")
266+
result = await cpg_generator._validate_cpg_async(mock_container, "/workspace/cpg.bin")
258267

259268
assert result is False
260269

270+
@pytest.mark.asyncio
271+
async def test_validate_cpg_failure_empty_file(self, cpg_generator):
272+
"""Test CPG validation failure when file is too small (empty or nearly empty)"""
273+
mock_container = MagicMock()
274+
275+
# Mock exec_run to return small file size
276+
def mock_exec_run(cmd):
277+
mock_result = MagicMock()
278+
if "stat -c%s" in cmd:
279+
# Return file size < 1KB (empty)
280+
mock_result.output = b"0"
281+
else:
282+
mock_result.output = b"stat output..."
283+
return mock_result
284+
285+
mock_container.exec_run = mock_exec_run
286+
287+
result = await cpg_generator._validate_cpg_async(mock_container, "/workspace/cpg.bin")
288+
289+
assert result is False
290+
291+
@pytest.mark.asyncio
292+
async def test_validate_cpg_failure_too_small(self, cpg_generator):
293+
"""Test CPG validation failure when file is smaller than minimum threshold"""
294+
mock_container = MagicMock()
295+
296+
# Mock exec_run to return very small file size
297+
def mock_exec_run(cmd):
298+
mock_result = MagicMock()
299+
if "stat -c%s" in cmd:
300+
# Return file size < 1KB
301+
mock_result.output = b"512" # 512 bytes
302+
else:
303+
mock_result.output = b"stat output..."
304+
return mock_result
305+
306+
mock_container.exec_run = mock_exec_run
307+
308+
result = await cpg_generator._validate_cpg_async(mock_container, "/workspace/cpg.bin")
309+
310+
assert result is False
311+
312+
@pytest.mark.asyncio
313+
async def test_extract_file_size_success(self, cpg_generator):
314+
"""Test successful file size extraction"""
315+
mock_container = MagicMock()
316+
317+
def mock_exec_run(cmd):
318+
mock_result = MagicMock()
319+
if "stat -c%s" in cmd:
320+
mock_result.output = b"5242880" # 5MB
321+
return mock_result
322+
323+
mock_container.exec_run = mock_exec_run
324+
325+
result = await cpg_generator._extract_file_size_async(mock_container, "/workspace/cpg.bin")
326+
327+
assert result == 5242880
328+
329+
@pytest.mark.asyncio
330+
async def test_extract_file_size_fallback(self, cpg_generator):
331+
"""Test file size extraction with fallback to wc command"""
332+
mock_container = MagicMock()
333+
334+
call_count = 0
335+
def mock_exec_run(cmd):
336+
nonlocal call_count
337+
mock_result = MagicMock()
338+
339+
if "stat -c%s" in cmd:
340+
# First call fails with non-numeric output
341+
mock_result.output = b"invalid"
342+
elif "wc -c" in cmd:
343+
# Fallback to wc
344+
mock_result.output = b"1048576" # 1MB
345+
return mock_result
346+
347+
mock_container.exec_run = mock_exec_run
348+
349+
result = await cpg_generator._extract_file_size_async(mock_container, "/workspace/cpg.bin")
350+
351+
assert result == 1048576
352+
261353
@pytest.mark.asyncio
262354
async def test_get_container_id(self, cpg_generator):
263355
"""Test getting container ID for session"""

0 commit comments

Comments
 (0)