Skip to content

solidworks_mcp.ui.services.docs_service

solidworks_mcp.ui.services.docs_service

Docs context fetching and RAG ingestion for the Prefab CAD dashboard.

Responsibilities (Single Responsibility principle): - Fetch and filter docs from the MCP docs endpoint (fetch_docs_context). - Ingest local files or URLs into the simple retrieval index (ingest_reference_source).

Does NOT own: LLM calls, preview export, session state mutation beyond metadata merges.

Attributes

DEFAULT_API_ORIGIN module-attribute

DEFAULT_API_ORIGIN = getenv('SOLIDWORKS_UI_API_ORIGIN', 'http://127.0.0.1:8766')

DEFAULT_RAG_DIR module-attribute

DEFAULT_RAG_DIR = Path('.solidworks_mcp') / 'rag'

Classes

HTMLTextExtractor

HTMLTextExtractor()

Bases: HTMLParser

Minimal HTML-to-plain-text extractor.

Strips tags and collects visible text content. Script, style, and nav elements are suppressed entirely.

Source code in src/solidworks_mcp/ui/services/_utils.py
def __init__(self) -> None:
    super().__init__()
    self._parts: list[str] = []
    self._skip_depth = 0
Functions
text
text() -> str

Return the accumulated plain text.

Returns:

Type Description
str

Plain text content extracted from the HTML document.

Source code in src/solidworks_mcp/ui/services/_utils.py
def text(self) -> str:
    """Return the accumulated plain text.

    Returns:
        Plain text content extracted from the HTML document.
    """
    return "\n".join(self._parts)

Functions

_chunk_text

_chunk_text(text: str, chunk_size: int = 1000, overlap: int = 150) -> list[str]

Split long text into overlapping chunks for simple local retrieval.

Parameters:

Name Type Description Default
text str

Input text processed by the operation.

required
chunk_size int

Maximum number of characters to keep in each chunk. Defaults to 1000.

1000
overlap int

Number of overlapping characters between chunks. Defaults to 150.

150

Returns:

Type Description
list[str]

list[str]: A list containing the resulting items.

Source code in src/solidworks_mcp/agents/retrieval_index.py
def _chunk_text(text: str, chunk_size: int = 1000, overlap: int = 150) -> list[str]:
    """Split long text into overlapping chunks for simple local retrieval.

    Args:
        text (str): Input text processed by the operation.
        chunk_size (int): Maximum number of characters to keep in each chunk. Defaults to
                          1000.
        overlap (int): Number of overlapping characters between chunks. Defaults to 150.

    Returns:
        list[str]: A list containing the resulting items.
    """
    normalized = (text or "").strip()
    if not normalized:
        return []

    if len(normalized) <= chunk_size:
        return [normalized]

    chunks: list[str] = []
    start = 0
    while start < len(normalized):
        end = min(start + chunk_size, len(normalized))
        chunks.append(normalized[start:end])
        if end == len(normalized):
            break
        start = max(0, end - overlap)
    return chunks

fetch_docs_context

fetch_docs_context(session_id: str, *, docs_query: str = '', db_path: Path | None = None, api_origin: str = DEFAULT_API_ORIGIN) -> dict[str, Any]

Fetch docs text from the /docs endpoint and store a filtered context snippet.

Parameters:

Name Type Description Default
session_id str

Dashboard session identifier.

required
docs_query str

Keyword(s) to filter the docs text by.

''
db_path Path | None

Optional override for the SQLite database path.

None
api_origin str

Base URL of the running FastAPI server.

DEFAULT_API_ORIGIN

Returns:

Type Description
dict[str, Any]

Full dashboard state payload.

Source code in src/solidworks_mcp/ui/services/docs_service.py
def fetch_docs_context(
    session_id: str,
    *,
    docs_query: str = "",
    db_path: Path | None = None,
    api_origin: str = DEFAULT_API_ORIGIN,
) -> dict[str, Any]:
    """Fetch docs text from the ``/docs`` endpoint and store a filtered context snippet.

    Args:
        session_id: Dashboard session identifier.
        docs_query: Keyword(s) to filter the docs text by.
        db_path: Optional override for the SQLite database path.
        api_origin: Base URL of the running FastAPI server.

    Returns:
        Full dashboard state payload.
    """
    from .session_service import build_dashboard_state, ensure_dashboard_session  # noqa: PLC0415

    ensure_dashboard_session(session_id, db_path=db_path)
    docs_url = f"{api_origin}/docs"
    query_text = sanitize_ui_text(docs_query, "solidworks workflow")
    try:
        request = Request(docs_url, headers={"User-Agent": "solidworks-mcp-ui/1.0"})
        with urlopen(request, timeout=8) as response:
            html = response.read().decode("utf-8", errors="ignore")
        extractor = HTMLTextExtractor()
        extractor.feed(html)
        snippet = filter_docs_text(extractor.text(), query_text)
        persist_ui_action(
            session_id,
            tool_name="ui.docs.fetch",
            db_path=db_path,
            metadata_updates={
                "docs_query": query_text,
                "docs_context_text": snippet,
                "latest_message": "Docs context updated from MCP docs endpoint.",
                "latest_error_text": "",
                "remediation_hint": "",
            },
            input_payload={"query": query_text, "url": docs_url},
            output_payload={"chars": len(snippet)},
        )
    except Exception as exc:
        logger.exception("[ui.fetch_docs_context] failed session_id={}", session_id)
        merge_metadata(
            session_id,
            db_path=db_path,
            docs_query=query_text,
            docs_context_text="",
            latest_error_text=str(exc),
            remediation_hint="Verify the /docs endpoint is reachable, then retry docs refresh.",
        )
    return build_dashboard_state(session_id, db_path=db_path, api_origin=api_origin)

filter_docs_text

filter_docs_text(text: str, query: str, *, max_chars: int = 4000) -> str

Filter plain-text docs content to lines most relevant to query.

Returns up to max_chars characters of the most relevant lines, scored by how many query tokens they contain.

Parameters:

Name Type Description Default
text str

Full plain-text docs content.

required
query str

Space-separated keyword query.

required
max_chars int

Maximum characters to return.

4000

Returns:

Type Description
str

Filtered and truncated text snippet.

Source code in src/solidworks_mcp/ui/services/_utils.py
def filter_docs_text(text: str, query: str, *, max_chars: int = 4000) -> str:
    """Filter plain-text docs content to lines most relevant to *query*.

    Returns up to ``max_chars`` characters of the most relevant lines, scored
    by how many query tokens they contain.

    Args:
        text: Full plain-text docs content.
        query: Space-separated keyword query.
        max_chars: Maximum characters to return.

    Returns:
        Filtered and truncated text snippet.
    """
    if not text:
        return ""
    tokens = {t.lower() for t in query.split() if t}
    lines = text.splitlines()
    scored: list[tuple[int, str]] = []
    for line in lines:
        lower = line.lower()
        score = sum(1 for t in tokens if t in lower)
        if score > 0:
            scored.append((score, line))
    scored.sort(key=lambda x: x[0], reverse=True)
    selected = [line for _, line in scored]
    combined = "\n".join(selected)
    return combined[:max_chars]

ingest_reference_source

ingest_reference_source(session_id: str, *, source_path: str, namespace: str, chunk_size: int = 1200, overlap: int = 200, db_path: Path | None = None) -> dict[str, Any]

Ingest a local file or URL into the simple local retrieval index.

Parameters:

Name Type Description Default
session_id str

Dashboard session identifier.

required
source_path str

Absolute file path or http/https URL.

required
namespace str

Namespace key that isolates this index from others.

required
chunk_size int

Maximum characters per chunk.

1200
overlap int

Overlapping characters between adjacent chunks.

200
db_path Path | None

Optional override for the SQLite database path.

None

Returns:

Type Description
dict[str, Any]

Full dashboard state payload.

Source code in src/solidworks_mcp/ui/services/docs_service.py
def ingest_reference_source(
    session_id: str,
    *,
    source_path: str,
    namespace: str,
    chunk_size: int = 1200,
    overlap: int = 200,
    db_path: Path | None = None,
) -> dict[str, Any]:
    """Ingest a local file or URL into the simple local retrieval index.

    Args:
        session_id: Dashboard session identifier.
        source_path: Absolute file path or http/https URL.
        namespace: Namespace key that isolates this index from others.
        chunk_size: Maximum characters per chunk.
        overlap: Overlapping characters between adjacent chunks.
        db_path: Optional override for the SQLite database path.

    Returns:
        Full dashboard state payload.
    """
    from .session_service import build_dashboard_state, ensure_dashboard_session  # noqa: PLC0415

    ensure_dashboard_session(session_id, db_path=db_path)
    source_reference = (source_path or "").strip()
    resolved_namespace = (
        namespace or "engineering-reference"
    ).strip() or "engineering-reference"

    try:
        if is_url_reference(source_reference):
            source_identifier = source_reference
            source_text, source_label = read_reference_url(source_reference)
        else:
            resolved_source = Path(source_reference).expanduser()
            if not resolved_source.exists():
                merge_metadata(
                    session_id,
                    db_path=db_path,
                    rag_source_path=str(resolved_source),
                    rag_namespace=resolved_namespace,
                    rag_status="Reference source path was not found.",
                    latest_error_text=f"Missing reference source: {resolved_source}",
                    remediation_hint=(
                        "Provide an absolute path or an http/https URL for a PDF, "
                        "markdown, text, or HTML source."
                    ),
                )
                return build_dashboard_state(session_id, db_path=db_path)
            source_identifier = str(resolved_source.resolve())
            source_label = resolved_source.name
            source_text = read_reference_source(resolved_source)

        chunks = _chunk_text(source_text, chunk_size=chunk_size, overlap=overlap)
        output_path = DEFAULT_RAG_DIR / f"{resolved_namespace}.json"
        output_path.parent.mkdir(parents=True, exist_ok=True)
        payload = {
            "version": "1.0",
            "namespace": resolved_namespace,
            "source_location": source_identifier,
            "chunk_count": len(chunks),
            "chunks": [
                {
                    "id": f"{resolved_namespace}-{index}",
                    "source": source_identifier,
                    "text": chunk,
                }
                for index, chunk in enumerate(chunks, start=1)
            ],
        }
        output_path.write_text(
            json.dumps(payload, indent=2, ensure_ascii=True), encoding="utf-8"
        )

        # --- FAISS vector index (best-effort; skipped if optional deps missing) ---
        try:
            from ...agents.vector_rag import VectorRAGIndex  # noqa: PLC0415

            idx = VectorRAGIndex.load(
                namespace=resolved_namespace, rag_dir=DEFAULT_RAG_DIR
            )
            for chunk in payload["chunks"]:
                idx.ingest_text(
                    chunk["text"], source=source_identifier, tags=[resolved_namespace]
                )
            idx.save()
            logger.info(
                "[ui.ingest_reference_source] FAISS index updated namespace={} chunks={}",
                resolved_namespace,
                len(chunks),
            )
        except ImportError:
            logger.debug(
                "[ui.ingest_reference_source] FAISS not available; skipping vector index"
            )
        except Exception as faiss_exc:
            logger.warning(
                "[ui.ingest_reference_source] FAISS indexing failed (non-fatal): {}",
                faiss_exc,
            )

        insert_evidence_link(
            session_id=session_id,
            source_type="rag_ingest",
            source_id=source_identifier,
            relevance_score=0.88,
            rationale=f"Ingested {len(chunks)} chunk(s) into namespace '{resolved_namespace}'.",
            payload_json=json.dumps(
                {
                    "namespace": resolved_namespace,
                    "index_path": str(output_path.resolve()),
                    "chunk_count": len(chunks),
                },
                ensure_ascii=True,
            ),
            db_path=db_path,
        )
        merge_metadata(
            session_id,
            db_path=db_path,
            rag_source_path=source_identifier,
            rag_namespace=resolved_namespace,
            rag_status=f"Ingested {len(chunks)} chunk(s) from {source_label}.",
            rag_index_path=str(output_path.resolve()),
            rag_chunk_count=len(chunks),
            rag_provenance_text=(
                f"Namespace {resolved_namespace} | source {source_label} | chunks {len(chunks)}"
            ),
            latest_message=f"Reference source {source_label} ingested for retrieval.",
            latest_error_text="",
            remediation_hint="",
        )
        insert_tool_call_record(
            session_id=session_id,
            tool_name="ui.ingest_reference_source",
            input_json=json.dumps(
                {
                    "source_path": source_identifier,
                    "namespace": resolved_namespace,
                    "chunk_size": chunk_size,
                    "overlap": overlap,
                },
                ensure_ascii=True,
            ),
            output_json=json.dumps(
                {"index_path": str(output_path.resolve()), "chunk_count": len(chunks)},
                ensure_ascii=True,
            ),
            success=True,
            db_path=db_path,
        )
    except Exception as exc:
        merge_metadata(
            session_id,
            db_path=db_path,
            rag_source_path=source_reference,
            rag_namespace=resolved_namespace,
            rag_status="Reference ingestion failed.",
            latest_error_text=str(exc),
            remediation_hint=(
                "Use a readable local file or http/https URL and ensure optional "
                "PDF dependencies are installed."
            ),
        )
        insert_tool_call_record(
            session_id=session_id,
            tool_name="ui.ingest_reference_source",
            input_json=json.dumps(
                {"source_path": source_reference, "namespace": resolved_namespace},
                ensure_ascii=True,
            ),
            output_json=json.dumps({"error": str(exc)}, ensure_ascii=True),
            success=False,
            db_path=db_path,
        )

    return build_dashboard_state(session_id, db_path=db_path)
insert_evidence_link(*, session_id: str, source_type: str, source_id: str, checkpoint_id: int | None = None, relevance_score: float | None = None, rationale: str | None = None, payload_json: str | None = None, db_path: Path | None = None) -> None

Insert one evidence row used by planning/classification.

Parameters:

Name Type Description Default
session_id str

The session id value.

required
source_type str

The source type value.

required
source_id str

The source id value.

required
checkpoint_id int | None

The checkpoint id value. Defaults to None.

None
relevance_score float | None

The relevance score value. Defaults to None.

None
rationale str | None

The rationale value. Defaults to None.

None
payload_json str | None

The payload json value. Defaults to None.

None
db_path Path | None

The db path value. Defaults to None.

None

Returns:

Name Type Description
None None

None.

Source code in src/solidworks_mcp/agents/history_db.py
def insert_evidence_link(
    *,
    session_id: str,
    source_type: str,
    source_id: str,
    checkpoint_id: int | None = None,
    relevance_score: float | None = None,
    rationale: str | None = None,
    payload_json: str | None = None,
    db_path: Path | None = None,
) -> None:
    """Insert one evidence row used by planning/classification.

    Args:
        session_id (str): The session id value.
        source_type (str): The source type value.
        source_id (str): The source id value.
        checkpoint_id (int | None): The checkpoint id value. Defaults to None.
        relevance_score (float | None): The relevance score value. Defaults to None.
        rationale (str | None): The rationale value. Defaults to None.
        payload_json (str | None): The payload json value. Defaults to None.
        db_path (Path | None): The db path value. Defaults to None.

    Returns:
        None: None.
    """
    resolved = init_db(db_path)
    engine = _build_engine(resolved)
    with Session(engine) as session:
        session.add(
            EvidenceLink(
                session_id=session_id,
                checkpoint_id=checkpoint_id,
                source_type=source_type,
                source_id=source_id,
                relevance_score=relevance_score,
                rationale=rationale,
                payload_json=payload_json,
                created_at=_utc_now_iso(),
            )
        )
        session.commit()

insert_tool_call_record

insert_tool_call_record(*, session_id: str, tool_name: str, checkpoint_id: int | None = None, run_id: str | None = None, input_json: str | None = None, output_json: str | None = None, success: bool = True, latency_ms: float | None = None, db_path: Path | None = None) -> None

Insert one tool call execution record.

Parameters:

Name Type Description Default
session_id str

The session id value.

required
tool_name str

The tool name value.

required
checkpoint_id int | None

The checkpoint id value. Defaults to None.

None
run_id str | None

The run id value. Defaults to None.

None
input_json str | None

The input json value. Defaults to None.

None
output_json str | None

The output json value. Defaults to None.

None
success bool

The success value. Defaults to True.

True
latency_ms float | None

The latency ms value. Defaults to None.

None
db_path Path | None

The db path value. Defaults to None.

None

Returns:

Name Type Description
None None

None.

Source code in src/solidworks_mcp/agents/history_db.py
def insert_tool_call_record(
    *,
    session_id: str,
    tool_name: str,
    checkpoint_id: int | None = None,
    run_id: str | None = None,
    input_json: str | None = None,
    output_json: str | None = None,
    success: bool = True,
    latency_ms: float | None = None,
    db_path: Path | None = None,
) -> None:
    """Insert one tool call execution record.

    Args:
        session_id (str): The session id value.
        tool_name (str): The tool name value.
        checkpoint_id (int | None): The checkpoint id value. Defaults to None.
        run_id (str | None): The run id value. Defaults to None.
        input_json (str | None): The input json value. Defaults to None.
        output_json (str | None): The output json value. Defaults to None.
        success (bool): The success value. Defaults to True.
        latency_ms (float | None): The latency ms value. Defaults to None.
        db_path (Path | None): The db path value. Defaults to None.

    Returns:
        None: None.
    """
    resolved = init_db(db_path)
    engine = _build_engine(resolved)
    with Session(engine) as session:
        session.add(
            ToolCallRecord(
                session_id=session_id,
                checkpoint_id=checkpoint_id,
                run_id=run_id,
                tool_name=tool_name,
                input_json=input_json,
                output_json=output_json,
                success=success,
                latency_ms=latency_ms,
                created_at=_utc_now_iso(),
            )
        )
        session.commit()

is_url_reference

is_url_reference(source_path: str) -> bool

Return True when source_path is an http/https URL.

Parameters:

Name Type Description Default
source_path str

Raw source path string from the UI.

required

Returns:

Type Description
bool

True if the path starts with http:// or https://.

Source code in src/solidworks_mcp/ui/services/_utils.py
def is_url_reference(source_path: str) -> bool:
    """Return ``True`` when *source_path* is an http/https URL.

    Args:
        source_path: Raw source path string from the UI.

    Returns:
        ``True`` if the path starts with http:// or https://.
    """
    parsed = urlparse((source_path or "").strip())
    return parsed.scheme in {"http", "https"} and bool(parsed.netloc)

merge_metadata

merge_metadata(session_id: str, *, db_path: Path | None = None, user_goal: str | None = None, **updates: Any) -> dict[str, Any]

Read session metadata, merge updates into it, and write it back.

Implements the optimistic read-modify-write pattern used across all service functions that need to update one or more metadata keys without overwriting unrelated keys.

Parameters:

Name Type Description Default
session_id str

Target session identifier.

required
db_path Path | None

Optional override for the SQLite database path.

None
user_goal str | None

When provided, also updates the user_goal column.

None
**updates Any

Arbitrary key-value pairs to merge into metadata.

{}

Returns:

Type Description
dict[str, Any]

The merged metadata dict after the write.

Source code in src/solidworks_mcp/ui/services/_utils.py
def merge_metadata(
    session_id: str,
    *,
    db_path: Path | None = None,
    user_goal: str | None = None,
    **updates: Any,
) -> dict[str, Any]:
    """Read session metadata, merge *updates* into it, and write it back.

    Implements the optimistic read-modify-write pattern used across all
    service functions that need to update one or more metadata keys without
    overwriting unrelated keys.

    Args:
        session_id: Target session identifier.
        db_path: Optional override for the SQLite database path.
        user_goal: When provided, also updates the ``user_goal`` column.
        **updates: Arbitrary key-value pairs to merge into metadata.

    Returns:
        The merged metadata dict after the write.
    """
    session_row = get_design_session(session_id, db_path=db_path)
    metadata = parse_json_blob(session_row["metadata_json"]) if session_row else {}
    metadata.update(updates)

    effective_goal = user_goal or (
        session_row["user_goal"] if session_row else DEFAULT_USER_GOAL
    )
    effective_source = (
        session_row["source_mode"] if session_row else DEFAULT_SOURCE_MODE
    )
    effective_family = session_row["accepted_family"] if session_row else None
    effective_status = session_row["status"] if session_row else "active"
    effective_index = session_row["current_checkpoint_index"] if session_row else 0

    upsert_design_session(
        session_id=session_id,
        user_goal=effective_goal,
        source_mode=effective_source,
        accepted_family=effective_family,
        status=effective_status,
        current_checkpoint_index=effective_index,
        metadata_json=json.dumps(metadata, ensure_ascii=True),
        db_path=db_path,
    )
    return metadata

persist_ui_action

persist_ui_action(session_id: str, *, tool_name: str, db_path: Path | None = None, metadata_updates: dict[str, Any] | None = None, user_goal: str | None = None, input_payload: dict[str, Any] | None = None, output_payload: dict[str, Any] | None = None, output_metadata: bool = False, success: bool = True, checkpoint_id: int | None = None) -> dict[str, Any]

Persist metadata updates and a matching tool-call audit record atomically.

Combines :func:merge_metadata and insert_tool_call_record so callers can update session state and write an audit entry in a single call.

Parameters:

Name Type Description Default
session_id str

Target session identifier.

required
tool_name str

Logical name for the audit record (e.g. "ui.approve_brief").

required
db_path Path | None

Optional override for the SQLite database path.

None
metadata_updates dict[str, Any] | None

Key-value pairs to merge into session metadata.

None
user_goal str | None

When provided, also updates the user_goal column.

None
input_payload dict[str, Any] | None

Dict serialised as the input_json audit column.

None
output_payload dict[str, Any] | None

Dict serialised as the output_json audit column.

None
output_metadata bool

When True, write the merged metadata as output_json.

False
success bool

Whether the action succeeded.

True
checkpoint_id int | None

Optional FK to the associated plan checkpoint.

None

Returns:

Type Description
dict[str, Any]

The merged metadata dict after the write.

Source code in src/solidworks_mcp/ui/services/_utils.py
def persist_ui_action(
    session_id: str,
    *,
    tool_name: str,
    db_path: Path | None = None,
    metadata_updates: dict[str, Any] | None = None,
    user_goal: str | None = None,
    input_payload: dict[str, Any] | None = None,
    output_payload: dict[str, Any] | None = None,
    output_metadata: bool = False,
    success: bool = True,
    checkpoint_id: int | None = None,
) -> dict[str, Any]:
    """Persist metadata updates and a matching tool-call audit record atomically.

    Combines :func:`merge_metadata` and ``insert_tool_call_record`` so callers
    can update session state and write an audit entry in a single call.

    Args:
        session_id: Target session identifier.
        tool_name: Logical name for the audit record (e.g. ``"ui.approve_brief"``).
        db_path: Optional override for the SQLite database path.
        metadata_updates: Key-value pairs to merge into session metadata.
        user_goal: When provided, also updates the ``user_goal`` column.
        input_payload: Dict serialised as the ``input_json`` audit column.
        output_payload: Dict serialised as the ``output_json`` audit column.
        output_metadata: When ``True``, write the merged metadata as ``output_json``.
        success: Whether the action succeeded.
        checkpoint_id: Optional FK to the associated plan checkpoint.

    Returns:
        The merged metadata dict after the write.
    """
    merged_metadata: dict[str, Any] = {}
    if metadata_updates is not None or user_goal is not None:
        merged_metadata = merge_metadata(
            session_id,
            db_path=db_path,
            user_goal=user_goal,
            **(metadata_updates or {}),
        )

    record_output = merged_metadata if output_metadata else output_payload

    insert_tool_call_record(
        session_id=session_id,
        checkpoint_id=checkpoint_id,
        tool_name=tool_name,
        input_json=(
            json.dumps(input_payload, ensure_ascii=True)
            if input_payload is not None
            else None
        ),
        output_json=(
            json.dumps(record_output, ensure_ascii=True)
            if record_output is not None
            else None
        ),
        success=success,
        db_path=db_path,
    )
    return merged_metadata

read_reference_source

read_reference_source(source_path: Path) -> str

Read the text content of a local file (PDF, markdown, text).

PDF extraction requires the optional pypdf package.

Parameters:

Name Type Description Default
source_path Path

Local file path to read.

required

Returns:

Type Description
str

Extracted plain text content.

Raises:

Type Description
RuntimeError

When a PDF is supplied but pypdf is not installed.

Source code in src/solidworks_mcp/ui/services/_utils.py
def read_reference_source(source_path: Path) -> str:
    """Read the text content of a local file (PDF, markdown, text).

    PDF extraction requires the optional ``pypdf`` package.

    Args:
        source_path: Local file path to read.

    Returns:
        Extracted plain text content.

    Raises:
        RuntimeError: When a PDF is supplied but ``pypdf`` is not installed.
    """
    try:
        PdfReader = import_module("pypdf").PdfReader
    except ImportError:
        PdfReader = None

    suffix = source_path.suffix.lower()
    if suffix == ".pdf":
        if PdfReader is None:
            raise RuntimeError(
                "Install pypdf to ingest PDF sources, or provide a text/markdown file instead."
            )
        reader = PdfReader(str(source_path))
        return "\n\n".join((page.extract_text() or "") for page in reader.pages).strip()

    return source_path.read_text(encoding="utf-8")

read_reference_url

read_reference_url(source_url: str) -> tuple[str, str]

Fetch content from a URL and return (text, label).

Handles HTML (stripped), PDF (requires pypdf), and plain text.

Parameters:

Name Type Description Default
source_url str

http/https URL to fetch.

required

Returns:

Type Description
tuple[str, str]

Tuple of (plain_text_content, label) where label is derived from the URL path.

Raises:

Type Description
RuntimeError

When a PDF is served but pypdf is not installed.

Source code in src/solidworks_mcp/ui/services/_utils.py
def read_reference_url(source_url: str) -> tuple[str, str]:
    """Fetch content from a URL and return (text, label).

    Handles HTML (stripped), PDF (requires ``pypdf``), and plain text.

    Args:
        source_url: http/https URL to fetch.

    Returns:
        Tuple of (plain_text_content, label) where label is derived from the URL path.

    Raises:
        RuntimeError: When a PDF is served but ``pypdf`` is not installed.
    """
    try:
        PdfReader = import_module("pypdf").PdfReader
    except ImportError:
        PdfReader = None

    request = Request(source_url, headers={"User-Agent": "SolidWorksMCP/1.0"})
    with urlopen(request, timeout=20) as response:
        content_type = response.headers.get_content_type()
        charset = response.headers.get_content_charset() or "utf-8"
        raw_bytes = response.read()

    parsed = urlparse(source_url)
    label = Path(parsed.path).name or parsed.netloc or source_url
    suffix = Path(parsed.path).suffix.lower()

    if content_type == "application/pdf" or suffix == ".pdf":
        if PdfReader is None:
            raise RuntimeError(
                "Install pypdf to ingest PDF sources, or provide a text, markdown, or HTML source instead."
            )
        reader = PdfReader(BytesIO(raw_bytes))
        text = "\n\n".join((page.extract_text() or "") for page in reader.pages).strip()
        return text, label

    decoded = raw_bytes.decode(charset, errors="ignore")
    if "html" in content_type or suffix in {".html", ".htm"}:
        parser = HTMLTextExtractor()
        parser.feed(decoded)
        return parser.text().strip(), label

    return decoded.strip(), label

sanitize_ui_text

sanitize_ui_text(value: Any, fallback: str = '') -> str

Return a clean string from value, using fallback for empty/invalid inputs.

Strips template placeholders such as {{ field }}, bare quotes, and pydantic-ai expression strings that sometimes leak into UI state.

Parameters:

Name Type Description Default
value Any

Arbitrary input (str, None, etc.).

required
fallback str

Value to return when value is empty or invalid.

''

Returns:

Type Description
str

Cleaned string, or fallback.

Source code in src/solidworks_mcp/ui/services/_utils.py
def sanitize_ui_text(value: Any, fallback: str = "") -> str:
    """Return a clean string from *value*, using *fallback* for empty/invalid inputs.

    Strips template placeholders such as ``{{ field }}``, bare quotes, and
    pydantic-ai expression strings that sometimes leak into UI state.

    Args:
        value: Arbitrary input (str, None, etc.).
        fallback: Value to return when *value* is empty or invalid.

    Returns:
        Cleaned string, or *fallback*.
    """
    if value is None:
        return fallback
    text = str(value).strip()
    if not text:
        return fallback
    if text in {'"', "'"}:
        return fallback
    if text.startswith("{{") and text.endswith("}}"):
        return fallback
    if "$result." in text or "$error" in text:
        return fallback
    return text