Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 5 additions & 40 deletions scripts/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@
build_searchable_text,
is_excluded_by_rules,
)
from utils.path_helpers import get_workspace_folder_paths as _shared_get_workspace_folder_paths # noqa: E402
from utils.path_helpers import ( # noqa: E402
get_workspace_folder_paths as _shared_get_workspace_folder_paths,
normalize_file_path,
to_epoch_ms,
)
from utils.tool_parser import parse_tool_call # noqa: E402
from utils.workspace_path import get_cli_chats_path # noqa: E402
from utils.cli_chat_reader import ( # noqa: E402
Expand Down Expand Up @@ -141,45 +145,6 @@ def get_global_state_dir() -> str:
return os.path.join(str(Path.home()), ".cursor-chat-browser")


def normalize_file_path(p: str) -> str:
n = re.sub(r"^file:///", "", p or "")
n = re.sub(r"^file://", "", n)
try:
from urllib.parse import unquote
n = unquote(n)
except Exception:
pass
if sys.platform == "win32":
n = n.replace("/", "\\")
n = re.sub(r"^\\([a-zA-Z]:)", r"\1", n)
n = n.lower()
return n


def to_epoch_ms(value) -> int:
"""Convert a timestamp (int, float, or ISO-8601 string) to epoch ms."""
if value is None:
return 0
if isinstance(value, (int, float)):
if value > 1e12:
return int(value)
if value > 0:
return int(value * 1000)
return 0
if isinstance(value, str):
try:
cleaned = value.rstrip("Z") + "+00:00" if value.endswith("Z") else value
dt = datetime.fromisoformat(cleaned)
return int(dt.timestamp() * 1000)
except Exception:
pass
try:
return to_epoch_ms(float(value))
except Exception:
pass
return 0


def slug(s: str) -> str:
s = re.sub(r'[<>:"/\\|?*]', "_", s or "")
s = re.sub(r"\s+", "-", s)
Expand Down
141 changes: 141 additions & 0 deletions tests/test_normalize_file_path.py
Comment thread
bradjin8 marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
"""Tests for utils.path_helpers path/timestamp helpers (closes #46).

Covers ``normalize_file_path`` and ``to_epoch_ms``, both previously duplicated
in scripts/export.py. All call-sites in the web app and CLI export script now
use the shared implementations in utils.path_helpers.

Test inventory (this module only): 21 cases — 12 ``normalize_file_path``,
9 ``to_epoch_ms``. On win32, 2 cases skip (POSIX passthrough in
``TestNormalizeFilePathPosixPassthrough`` only). A full-suite run may report
more skips (e.g. ``skipped=4``) from other test modules, not this file.
"""

import sys
import unittest
from datetime import datetime, timezone

from utils.path_helpers import normalize_file_path, to_epoch_ms


class TestNormalizeFilePathUriStripping(unittest.TestCase):
def test_file_triple_slash_stripped(self) -> None:
out = normalize_file_path("file:///home/user/project")
self.assertFalse(out.startswith("file:"))
self.assertIn("home", out)

def test_file_double_slash_stripped(self) -> None:
out = normalize_file_path("file://server/share/file.txt")
self.assertFalse(out.startswith("file:"))
self.assertIn("share", out)

def test_empty_string(self) -> None:
self.assertEqual(normalize_file_path(""), "")


class TestNormalizeFilePathPercentEncoding(unittest.TestCase):
def test_space_decoded(self) -> None:
out = normalize_file_path("file:///C:/My%20Documents/file.txt")
self.assertNotIn("%20", out)
self.assertIn("my documents", out)

Comment thread
coderabbitai[bot] marked this conversation as resolved.
def test_hash_decoded(self) -> None:
out = normalize_file_path("file:///C:/repo/src%23internal/mod.py")
self.assertNotIn("%23", out)
self.assertIn("#", out)

def test_percent_encoded_colon_in_uri_prefix(self) -> None:
"""URI-style /d%3A/... path: %3A is decoded to ':'.

Comment thread
bradjin8 marked this conversation as resolved.
Only test that exercises the leading-``/`` + drive-letter shape end-to-end
(Cursor sometimes stores ``/d%3A/...`` URIs). Other drive-path tests use
``D:/...`` or ``D:\\...`` without a leading slash.

On win32 the win32 branch strips the leading slash, lowercases, and
normalises to backslashes. On other platforms the leading ``/`` prevents
the ``^[a-zA-Z]:[/\\]`` cross-platform branch in ``path_helpers``, so the
path is returned as percent-decoded only (no slash flip / lowercasing).
"""
out = normalize_file_path("/d%3A/_Work/project")
self.assertNotIn("%3A", out)
if sys.platform == "win32":
self.assertEqual(out, r"d:\_work\project")
else:
self.assertEqual(out, "/d:/_Work/project")


class TestNormalizeFilePathWindowsDrives(unittest.TestCase):
"""Paths with Windows-style drive letters are normalised on all platforms.

On win32 the win32 branch handles them natively. On Linux/macOS the
``^[a-zA-Z]:[/\\]`` regex branch converts forward-slashes to backslashes
and lowercases the path so cross-platform reads of Cursor's Windows
workspaceStorage produce consistent keys.
"""

def test_backslash_drive_path_lowercased(self) -> None:
out = normalize_file_path(r"D:\Work\Boost")
self.assertEqual(out, r"d:\work\boost")

def test_forward_slash_drive_path_converted(self) -> None:
out = normalize_file_path("D:/Work/Boost")
self.assertEqual(out, r"d:\work\boost")

def test_file_uri_with_windows_drive(self) -> None:
out = normalize_file_path("file:///C:/Users/Dev/project")
# file:/// stripped, then same drive-letter branch as D:/ and D:\ inputs.
self.assertEqual(out, r"c:\users\dev\project")

def test_mixed_case_drive_lowercased(self) -> None:
out = normalize_file_path(r"E:\Mixed\Case\Path")
self.assertTrue(out.startswith("e:"))
self.assertEqual(out, r"e:\mixed\case\path")


class TestNormalizeFilePathPosixPassthrough(unittest.TestCase):
def test_plain_posix_path_unchanged_on_non_windows(self) -> None:
if sys.platform == "win32":
Comment thread
bradjin8 marked this conversation as resolved.
self.skipTest("POSIX path semantics differ on win32")
out = normalize_file_path("/home/user/project")
self.assertEqual(out, "/home/user/project")

def test_path_without_scheme_unchanged(self) -> None:
if sys.platform == "win32":
self.skipTest("plain relative path behaviour differs on win32")
out = normalize_file_path("relative/path/file.py")
self.assertEqual(out, "relative/path/file.py")


class TestToEpochMs(unittest.TestCase):
def test_none_returns_zero(self) -> None:
self.assertEqual(to_epoch_ms(None), 0)

def test_ms_int_passthrough(self) -> None:
self.assertEqual(to_epoch_ms(1_700_000_000_000), 1_700_000_000_000)

def test_seconds_int_converted_to_ms(self) -> None:
self.assertEqual(to_epoch_ms(1_700_000_000), 1_700_000_000_000)

def test_seconds_float_converted_to_ms(self) -> None:
self.assertEqual(to_epoch_ms(1_700_000_000.5), 1_700_000_000_500)

def test_zero_returns_zero(self) -> None:
self.assertEqual(to_epoch_ms(0), 0)

def test_iso8601_zulu(self) -> None:
expected = int(
datetime(2026, 2, 3, 20, 39, 54, 17_000, tzinfo=timezone.utc).timestamp() * 1000
)
self.assertEqual(to_epoch_ms("2026-02-03T20:39:54.017Z"), expected)

def test_numeric_string_already_ms(self) -> None:
self.assertEqual(to_epoch_ms("1700000000000"), 1_700_000_000_000)

def test_numeric_string_seconds(self) -> None:
self.assertEqual(to_epoch_ms("1700000000"), 1_700_000_000_000)

def test_unrecognised_string_returns_zero(self) -> None:
self.assertEqual(to_epoch_ms("not-a-timestamp"), 0)


if __name__ == "__main__":
unittest.main()
Comment thread
bradjin8 marked this conversation as resolved.
Loading