Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ output.json
.ruff-cache/
.mypy_cache/
ruff_report.txt
ruff-error.txt
mypy_report.txt
coverage.xml
htmlcov/
9 changes: 8 additions & 1 deletion scripts/gen_json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,14 @@ def _write_schema(name: str, model: type[BaseModel], output_dir: Path) -> Path:


def main() -> int:
"""Generate JSON Schemas for ExStruct public models."""
"""
Generate JSON Schema files for ExStruct's public Pydantic models into the repository 'schemas' directory.

Writes one JSON Schema file per public model into the 'schemas' folder at the project root.

Returns:
exit_code (int): 0 on success.
"""
project_root = Path(__file__).resolve().parent.parent
output_dir = project_root / "schemas"
targets: dict[str, type[BaseModel]] = {
Expand Down
116 changes: 102 additions & 14 deletions src/exstruct/core/shapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,33 @@


def compute_line_angle_deg(w: float, h: float) -> float:
"""Compute clockwise angle in Excel coordinates where 0 deg points East."""
"""
Compute the clockwise angle (in degrees) in Excel coordinates where 0° points East.

Parameters:
w (float): Horizontal delta (width, positive to the right).
h (float): Vertical delta (height, positive downward).

Returns:
float: Angle in degrees measured clockwise from East (e.g., 0° = East, 90° = South).
"""
return math.degrees(math.atan2(h, w)) % 360.0


def angle_to_compass(
angle: float,
) -> Literal["E", "SE", "S", "SW", "W", "NW", "N", "NE"]:
"""Convert angle to 8-point compass direction (0deg=E, 45deg=NE, 90deg=N, etc)."""
"""
Map an angle in degrees to one of eight compass directions.

The angle is interpreted with 0 degrees at East and increasing values rotating counterclockwise (45 -> NE, 90 -> N).

Parameters:
angle (float): Angle in degrees.

Returns:
str: One of `"E"`, `"SE"`, `"S"`, `"SW"`, `"W"`, `"NW"`, `"N"`, or `"NE"` corresponding to the nearest 8-point compass direction.
"""
dirs = ["E", "NE", "N", "NW", "W", "SW", "S", "SE"]
idx = int(((angle + 22.5) % 360) // 45)
return cast(Literal["E", "SE", "S", "SW", "W", "NW", "N", "NE"], dirs[idx])
Expand All @@ -28,7 +47,18 @@ def angle_to_compass(
def coord_to_cell_by_edges(
row_edges: list[float], col_edges: list[float], x: float, y: float
) -> str | None:
"""Estimate cell address from coordinates and cumulative edges; return None if out of range."""
"""
Estimate the Excel A1-style cell that contains a point given cumulative row and column edge coordinates.

Parameters:
row_edges (list[float]): Monotonic list of cumulative vertical edges (top-to-bottom). Consecutive entries define row spans.
col_edges (list[float]): Monotonic list of cumulative horizontal edges (left-to-right). Consecutive entries define column spans.
x (float): Horizontal coordinate (same coordinate system as col_edges).
y (float): Vertical coordinate (same coordinate system as row_edges).

Returns:
str | None: A1-style cell address (e.g., "B3") if the point falls inside the grid; `None` if the point is outside the provided edge ranges. Intervals are treated as left-inclusive and right-exclusive: [edge_i, edge_{i+1}).
"""

def find_index(edges: list[float], pos: float) -> int | None:
for i in range(1, len(edges)):
Expand Down Expand Up @@ -82,10 +112,18 @@ def _should_include_shape(
output_mode: str = "standard",
) -> bool:
"""
Decide whether to emit a shape given output mode.
- standard: emit if text exists OR the shape is an arrow/line/connector.
- light: suppress shapes entirely (handled upstream, but guard defensively).
- verbose: include all (except already-filtered chart/comment/picture/form controls).
Determine whether a shape should be included in the output based on its properties and the selected output mode.

Modes:
- "light": always exclude shapes.
- "standard": include when the shape has text or represents a relationship (line/connector).
- "verbose": include all shapes (other global exclusions are handled elsewhere).

Parameters:
output_mode (str): One of "light", "standard", or "verbose"; controls inclusion rules.

Returns:
bool: `True` if the shape should be emitted, `False` otherwise.
"""
if output_mode == "light":
return False
Expand Down Expand Up @@ -142,7 +180,12 @@ class _SmartArtLike(Protocol):


def _shape_has_smartart(shp: xw.Shape) -> bool:
"""Return True if the shape exposes SmartArt content."""
"""
Determine whether a shape exposes SmartArt content.

Returns:
bool: `True` if the shape exposes SmartArt (i.e., has an accessible `HasSmartArt` attribute), `False` otherwise.
"""
try:
api = shp.api
except Exception:
Expand All @@ -154,7 +197,12 @@ def _shape_has_smartart(shp: xw.Shape) -> bool:


def _get_smartart_layout_name(smartart: _SmartArtLike | None) -> str:
"""Return SmartArt layout name or a fallback label."""
"""
Get the SmartArt layout name or "Unknown" if it cannot be determined.

Returns:
layout_name (str): The layout name from `smartart.Layout.Name`, or "Unknown" when `smartart` is None or the name cannot be retrieved.
"""
if smartart is None:
return "Unknown"
try:
Expand All @@ -168,7 +216,15 @@ def _get_smartart_layout_name(smartart: _SmartArtLike | None) -> str:
def _collect_smartart_node_info(
smartart: _SmartArtLike | None,
) -> list[tuple[int, str]]:
"""Collect (level, text) pairs from SmartArt nodes."""
"""
Extract a list of (level, text) tuples for each node present in the given SmartArt.

Parameters:
smartart (_SmartArtLike | None): A SmartArt-like COM object or `None`. If `None` or inaccessible, no nodes are collected.

Returns:
list[tuple[int, str]]: A list of tuples where each tuple is (node level, node text). Returns an empty list if the SmartArt is `None`, inaccessible, or if nodes lack a numeric level.
"""
nodes_info: list[tuple[int, str]] = []
if smartart is None:
return nodes_info
Expand All @@ -194,15 +250,30 @@ def _collect_smartart_node_info(


def _get_smartart_node_level(node: _SmartArtNodeLike) -> int | None:
"""Return SmartArt node level or None when unavailable."""
"""
Get the numerical level of a SmartArt node.

Returns:
int | None: The node's level as an integer, or `None` if the level is missing or cannot be converted to an integer.
"""
try:
return int(node.Level)
except Exception:
return None


def _build_smartart_tree(nodes_info: list[tuple[int, str]]) -> list[SmartArtNode]:
"""Build nested SmartArtNode roots from flat (level, text) tuples."""
"""
Build a nested tree of SmartArtNode objects from a flat list of (level, text) tuples.

Parameters:
nodes_info (list[tuple[int, str]]): Ordered tuples where each tuple is (level, text);
`level` is the hierarchical depth (integer) and `text` is the node label.

Returns:
roots (list[SmartArtNode]): Top-level SmartArtNode instances whose `kids` lists
contain their nested child nodes according to the provided levels.
"""
roots: list[SmartArtNode] = []
stack: list[tuple[int, SmartArtNode]] = []
for level, text in nodes_info:
Expand All @@ -218,15 +289,32 @@ def _build_smartart_tree(nodes_info: list[tuple[int, str]]) -> list[SmartArtNode


def _extract_smartart_nodes(smartart: _SmartArtLike | None) -> list[SmartArtNode]:
"""Extract SmartArt nodes as nested roots."""
"""
Convert a SmartArt COM object into a list of root SmartArtNode trees.

Parameters:
smartart (_SmartArtLike | None): SmartArt-like COM object to extract nodes from; pass `None` to produce an empty list.

Returns:
list[SmartArtNode]: Root nodes representing the hierarchical SmartArt structure (each node contains its text and children).
"""
nodes_info = _collect_smartart_node_info(smartart)
return _build_smartart_tree(nodes_info)


def get_shapes_with_position( # noqa: C901
workbook: Book, mode: str = "standard"
) -> dict[str, list[Shape | Arrow | SmartArt]]:
"""Scan shapes in a workbook and return per-sheet shape lists with position info."""
"""
Scan all shapes in each worksheet and collect their positional and metadata information.

Parameters:
workbook (Book): The xlwings workbook to scan.
mode (str): Output detail level; "light" skips most shapes, "standard" includes shapes with text or relationships, and "verbose" includes full size/rotation details.

Returns:
dict[str, list[Shape | Arrow | SmartArt]]: Mapping of sheet name to a list of collected shape objects (Shape, Arrow, or SmartArt) containing position (left/top), optional size (width/height), textual content, and other captured metadata (ids, directions, connections, layout/nodes for SmartArt).
"""
shape_data: dict[str, list[Shape | Arrow | SmartArt]] = {}
for sheet in workbook.sheets:
shapes: list[Shape | Arrow | SmartArt] = []
Expand Down
38 changes: 36 additions & 2 deletions src/exstruct/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,17 @@


def dict_without_empty_values(obj: object) -> JsonStructure:
"""Recursively drop empty values from nested structures."""
"""
Remove None, empty string, empty list, and empty dict values from a nested structure or supported model object.

Recursively processes dicts, lists, and supported model types (WorkbookData, CellRow, Chart, PrintArea, PrintAreaView, Shape, Arrow, SmartArt). Model instances are converted to dictionaries with None fields excluded before recursive cleaning. Values considered empty and removed are: `None`, `""` (empty string), `[]` (empty list), and `{}` (empty dict).

Parameters:
obj (object): A value to clean; may be a dict, list, scalar, or one of the supported model instances.

Returns:
JsonStructure: The input structure with empty values removed, preserving other values and nesting.
"""
if isinstance(obj, dict):
return {
k: dict_without_empty_values(v)
Expand Down Expand Up @@ -173,13 +183,37 @@ def _area_to_px_rect(


def _rects_overlap(a: tuple[int, int, int, int], b: tuple[int, int, int, int]) -> bool:
"""Return True if rectangles (l, t, r, b) overlap."""
"""
Determine whether two axis-aligned rectangles intersect (overlap in area).

Parameters:
a (tuple[int, int, int, int]): Rectangle A as (left, top, right, bottom).
b (tuple[int, int, int, int]): Rectangle B as (left, top, right, bottom).

Notes:
Rectangles are treated as half-open in this context: if they only touch at edges or corners, they do not count as overlapping.

Returns:
bool: `True` if the rectangles have a non-zero-area intersection, `False` otherwise.
"""
return not (a[2] <= b[0] or a[0] >= b[2] or a[3] <= b[1] or a[1] >= b[3])


def _filter_shapes_to_area(
shapes: list[Shape | Arrow | SmartArt], area: PrintArea
) -> list[Shape | Arrow | SmartArt]:
"""
Filter drawable shapes to those that intersect the given print area.

Shapes and the print area are compared in approximate pixel coordinates. Shapes that have both width and height are included when their bounding rectangle overlaps the area. Shapes with unknown size (width or height is None) are treated as a point at their left/top coordinates and included only if that point lies inside the area.

Parameters:
shapes (list[Shape | Arrow | SmartArt]): Drawable objects with `l`, `t`, `w`, `h` coordinates.
area (PrintArea): Cell-based print area that will be converted to an approximate pixel rectangle.

Returns:
list[Shape | Arrow | SmartArt]: Subset of `shapes` whose geometry intersects the print area.
"""
area_rect = _area_to_px_rect(area)
filtered: list[Shape | Arrow | SmartArt] = []
for shp in shapes:
Expand Down
14 changes: 14 additions & 0 deletions tests/com/test_shapes_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,15 @@ def _make_workbook_with_shapes(path: Path) -> None:


def test_図形の種別とテキストが抽出される(tmp_path: Path) -> None:
"""
Verifies extraction of shape types, texts, IDs, and uniqueness from a workbook containing various shapes.

Creates a workbook with a rectangle, an oval, a line, a nested group, and a connector, then extracts shapes from "Sheet1" and asserts:
- a shape with text "rect" is an AutoShape, has non-negative left/top coordinates, and a positive id;
- a nested child with text "inner" is not reported as a Group and has a positive id;
- all emitted shape ids are unique;
- no AutoShape without text is emitted in standard mode.
"""
_ensure_excel()
path = tmp_path / "shapes.xlsx"
_make_workbook_with_shapes(path)
Expand Down Expand Up @@ -92,6 +101,11 @@ def test_図形の種別とテキストが抽出される(tmp_path: Path) -> Non


def test_線図形の方向と矢印情報が抽出される(tmp_path: Path) -> None:
"""
Verifies that a line shape's direction and arrow style information is extracted correctly from a workbook.

Creates a workbook containing shapes, extracts shapes from "Sheet1", finds an Arrow with a begin or end arrow style, and asserts its direction is "E".
"""
_ensure_excel()
path = tmp_path / "lines.xlsx"
_make_workbook_with_shapes(path)
Expand Down
6 changes: 6 additions & 0 deletions tests/core/test_mode_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@


def _make_basic_book(path: Path) -> None:
"""
Create and save a simple Excel workbook with one sheet named "Sheet1" containing "v1" in A1 and "v2" in B1.

Parameters:
path (Path): Filesystem path where the workbook will be saved.
"""
wb = Workbook()
ws = wb.active
ws.title = "Sheet1"
Expand Down
30 changes: 30 additions & 0 deletions tests/core/test_shapes_positions_dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ def Line(self) -> _DummyLine:

@property
def Rotation(self) -> float:
"""
Get the shape's rotation angle.

Returns:
rotation (float): Rotation angle in degrees.
"""
return self.rotation


Expand All @@ -52,18 +58,42 @@ class _DummyApiSmartArt:

@property
def Type(self) -> int:
"""
Get the shape's type identifier.

Returns:
shape_type (int): Integer identifier for the shape type.
"""
return self.shape_type

@property
def AutoShapeType(self) -> int:
"""
Indicates that an AutoShape type is unavailable for this API.

Raises:
RuntimeError: Always raised with the message "AutoShapeType unavailable".
"""
raise RuntimeError("AutoShapeType unavailable")

@property
def HasSmartArt(self) -> bool:
"""
Indicates whether the API represents a SmartArt shape.

Returns:
bool: `True` if the shape is a SmartArt shape, `False` otherwise.
"""
return True

@property
def SmartArt(self) -> object:
"""
Provide a generic placeholder object representing SmartArt details.

Returns:
smartart (object): A generic placeholder object for SmartArt; its structure is not specified and should not be relied upon.
"""
return object()


Expand Down
Loading
Loading