diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 39daf3dd7f88..b865f88350ea 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ repos: - id: auto-walrus - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.15.9 + rev: v0.15.15 hooks: - id: ruff-check - id: ruff-format @@ -32,7 +32,7 @@ repos: - tomli - repo: https://github.com/tox-dev/pyproject-fmt - rev: v2.21.0 + rev: v2.23.0 hooks: - id: pyproject-fmt diff --git a/DIRECTORY.md b/DIRECTORY.md index ca454bd5fd82..daf71bab8162 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -471,6 +471,8 @@ * [Geometry](geometry/geometry.py) * [Graham Scan](geometry/graham_scan.py) * [Jarvis March](geometry/jarvis_march.py) + * [Ramer Douglas Peucker](geometry/ramer_douglas_peucker.py) + * [Segment Intersection](geometry/segment_intersection.py) * Tests * [Test Graham Scan](geometry/tests/test_graham_scan.py) * [Test Jarvis March](geometry/tests/test_jarvis_march.py) @@ -523,6 +525,7 @@ * [Graphs Floyd Warshall](graphs/graphs_floyd_warshall.py) * [Greedy Best First](graphs/greedy_best_first.py) * [Greedy Min Vertex Cover](graphs/greedy_min_vertex_cover.py) + * [Johnson](graphs/johnson.py) * [Kahns Algorithm Long](graphs/kahns_algorithm_long.py) * [Kahns Algorithm Topo](graphs/kahns_algorithm_topo.py) * [Karger](graphs/karger.py) @@ -543,6 +546,7 @@ * [Strongly Connected Components](graphs/strongly_connected_components.py) * [Tarjans Scc](graphs/tarjans_scc.py) * Tests + * [Test Johnson](graphs/tests/test_johnson.py) * [Test Min Spanning Tree Kruskal](graphs/tests/test_min_spanning_tree_kruskal.py) * [Test Min Spanning Tree Prim](graphs/tests/test_min_spanning_tree_prim.py) diff --git a/geometry/ramer_douglas_peucker.py b/geometry/ramer_douglas_peucker.py new file mode 100644 index 000000000000..a03bbb2e5086 --- /dev/null +++ b/geometry/ramer_douglas_peucker.py @@ -0,0 +1,184 @@ +""" +Ramer-Douglas-Peucker polyline simplification algorithm. + +Given a sequence of 2-D points and a tolerance epsilon, the algorithm +reduces the number of points while preserving the overall shape of the curve. + +Time complexity: O(n log n) average, O(n²) worst case +Space complexity: O(n) + +References: + https://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm +""" + +from __future__ import annotations + +import math + + +def _euclidean_distance( + point_a: tuple[float, float], + point_b: tuple[float, float], +) -> float: + """Return the Euclidean distance between two 2-D points. + + >>> _euclidean_distance((0.0, 0.0), (3.0, 4.0)) + 5.0 + >>> _euclidean_distance((1.0, 1.0), (1.0, 1.0)) + 0.0 + """ + return math.hypot(point_b[0] - point_a[0], point_b[1] - point_a[1]) + + +def _perpendicular_distance( + point: tuple[float, float], + line_start: tuple[float, float], + line_end: tuple[float, float], +) -> float: + """Return the distance from *point* to the line **segment** between + *line_start* and *line_end*. + + When the perpendicular projection of *point* onto the infinite line falls + within the segment, this equals the perpendicular distance to that line. + When the projection falls outside the segment, the distance to the nearest + endpoint is returned instead (projection clamped to [0, 1]). + + This is the correct distance measure for the Ramer-Douglas-Peucker + algorithm: using the infinite-line distance can incorrectly discard points + whose projection lies beyond a segment endpoint. + + >>> _perpendicular_distance((4.0, 0.0), (0.0, 0.0), (0.0, 3.0)) + 4.0 + >>> # order of line_start and line_end does not affect the result + >>> _perpendicular_distance((4.0, 0.0), (0.0, 3.0), (0.0, 0.0)) + 4.0 + >>> _perpendicular_distance((4.0, 1.0), (0.0, 1.0), (0.0, 4.0)) + 4.0 + >>> _perpendicular_distance((2.0, 1.0), (-2.0, 1.0), (-2.0, 4.0)) + 4.0 + >>> # projection falls outside the segment; distance to nearest endpoint + >>> round(_perpendicular_distance((0.0, 2.0), (1.0, 0.0), (3.0, 0.0)), 6) + 2.236068 + """ + px, py = point + ax, ay = line_start + bx, by = line_end + dx, dy = bx - ax, by - ay + seg_len_sq = dx * dx + dy * dy + if seg_len_sq == 0.0: + # line_start and line_end coincide; fall back to point-to-point distance + return _euclidean_distance(point, line_start) + # Project point onto the segment line, then clamp t to [0, 1] so the + # nearest point is always on the segment rather than the infinite line. + t = max(0.0, min(1.0, ((px - ax) * dx + (py - ay) * dy) / seg_len_sq)) + nearest_x = ax + t * dx + nearest_y = ay + t * dy + return math.hypot(px - nearest_x, py - nearest_y) + + +def ramer_douglas_peucker( + pts: list[tuple[float, float]], + epsilon: float, +) -> list[tuple[float, float]]: + """Simplify a polyline using the Ramer-Douglas-Peucker algorithm. + + Given a sequence of 2-D points and a maximum allowable deviation + *epsilon* (>= 0), returns a simplified list of points such that no + discarded point is farther than *epsilon* from the simplified polyline. + + Parameters + ---------- + pts: + Ordered sequence of ``(x, y)`` points describing the polyline. + epsilon: + Maximum allowable distance of any discarded point from the + simplified polyline. Must be non-negative. + + Returns + ------- + list[tuple[float, float]] + Simplified list of ``(x, y)`` points. The first and last points of + *pts* are always preserved. + + Raises + ------ + ValueError + If *epsilon* is negative. + + References + ---------- + https://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm + + Examples + -------- + >>> ramer_douglas_peucker([], epsilon=1.0) + [] + >>> ramer_douglas_peucker([(0.0, 0.0)], epsilon=1.0) + [(0.0, 0.0)] + >>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 0.0)], epsilon=1.0) + [(0.0, 0.0), (1.0, 0.0)] + >>> # middle point is within epsilon - it is discarded + >>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 0.1), (2.0, 0.0)], epsilon=0.5) + [(0.0, 0.0), (2.0, 0.0)] + >>> # middle point exceeds epsilon - it is kept + >>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 1.0), (2.0, 0.0)], epsilon=0.5) + [(0.0, 0.0), (1.0, 1.0), (2.0, 0.0)] + >>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 0.5), (2.0, 0.0)], epsilon=-1.0) + Traceback (most recent call last): + ... + ValueError: epsilon must be non-negative, got -1.0 + """ + if epsilon < 0: + msg = f"epsilon must be non-negative, got {epsilon!r}" + raise ValueError(msg) + + if len(pts) < 3: + return list(pts) + + # --------------------------------------------------------------------------- + # Iterative, stack-based implementation. + # + # The naive recursive approach copies sublists at every level via slicing + # (pts[:max_index+1] / pts[max_index:]), which is O(n) per call and makes + # the overall algorithm O(n²) in memory even for well-balanced splits. An + # explicit stack operating on index ranges avoids all copying and also + # eliminates the risk of hitting Python's recursion limit for long polylines. + # --------------------------------------------------------------------------- + n = len(pts) + + # keep[i] is True when pts[i] must appear in the output. + keep: list[bool] = [False] * n + keep[0] = True + keep[-1] = True + + # Stack of (start_index, end_index) pairs still to be examined. + stack: list[tuple[int, int]] = [(0, n - 1)] + + while stack: + start, end = stack.pop() + if end - start < 2: + # Only one interior candidate at most; nothing to split further. + continue + + # Find the interior point with the greatest distance to the segment. + max_dist = 0.0 + max_index = start + for i in range(start + 1, end): + dist = _perpendicular_distance(pts[i], pts[start], pts[end]) + if dist > max_dist: + max_dist = dist + max_index = i + + if max_dist > epsilon: + keep[max_index] = True + stack.append((start, max_index)) + stack.append((max_index, end)) + # else: all interior points are within epsilon; discard them all. + + return [pts[i] for i in range(n) if keep[i]] + + +if __name__ == "__main__": + import doctest + + doctest.testmod() diff --git a/geometry/segment_intersection.py b/geometry/segment_intersection.py new file mode 100644 index 000000000000..e2e2e10f1e4d --- /dev/null +++ b/geometry/segment_intersection.py @@ -0,0 +1,112 @@ +""" +Given two line segments, determine whether they intersect. + +This is based on the algorithm described in Introduction to Algorithms +(CLRS), Chapter 33. + +Reference: + - https://en.wikipedia.org/wiki/Line%E2%80%93line_intersection + - https://en.wikipedia.org/wiki/Orientation_(geometry) +""" + +from __future__ import annotations + +from typing import NamedTuple + + +class Point(NamedTuple): + """A point in 2D space. + + >>> Point(0, 0) + Point(x=0, y=0) + >>> Point(1, -3) + Point(x=1, y=-3) + """ + + x: float + y: float + + +def direction(pivot: Point, target: Point, query: Point) -> float: + """Return the cross product of vectors (pivot->query) and (pivot->target). + + The sign of the result encodes the orientation of the ordered triple + (pivot, target, query): + - Negative -> counter-clockwise (left turn) + - Positive -> clockwise (right turn) + - Zero -> collinear + + >>> direction(Point(0, 0), Point(1, 0), Point(0, 1)) + -1 + >>> direction(Point(0, 0), Point(0, 1), Point(1, 0)) + 1 + >>> direction(Point(0, 0), Point(1, 1), Point(2, 2)) + 0 + """ + return (query.x - pivot.x) * (target.y - pivot.y) - (target.x - pivot.x) * ( + query.y - pivot.y + ) + + +def on_segment(seg_start: Point, seg_end: Point, point: Point) -> bool: + """Check whether *point*, known to be collinear with the segment, lies on it. + + >>> on_segment(Point(0, 0), Point(4, 4), Point(2, 2)) + True + >>> on_segment(Point(0, 0), Point(4, 4), Point(5, 5)) + False + >>> on_segment(Point(0, 0), Point(4, 0), Point(2, 0)) + True + """ + return min(seg_start.x, seg_end.x) <= point.x <= max( + seg_start.x, seg_end.x + ) and min(seg_start.y, seg_end.y) <= point.y <= max(seg_start.y, seg_end.y) + + +def segments_intersect(p1: Point, p2: Point, p3: Point, p4: Point) -> bool: + """Return True if line segment p1p2 intersects line segment p3p4. + + Uses the CLRS cross-product / orientation method. Handles both the + general case (proper crossing) and degenerate cases where one endpoint + lies exactly on the other segment. + + >>> segments_intersect(Point(0, 0), Point(2, 2), Point(0, 2), Point(2, 0)) + True + >>> segments_intersect(Point(0, 0), Point(2, 2), Point(1, 1), Point(3, 3)) + True + >>> segments_intersect(Point(0, 0), Point(1, 0), Point(2, 0), Point(3, 0)) + False + >>> segments_intersect(Point(0, 0), Point(1, 1), Point(1, 0), Point(2, 1)) + False + >>> segments_intersect(Point(0, 0), Point(1, 1), Point(0, 1), Point(0, 2)) + False + >>> segments_intersect(Point(0, 0), Point(1, 0), Point(1, 0), Point(2, 0)) + True + """ + d1 = direction(p3, p4, p1) + d2 = direction(p3, p4, p2) + d3 = direction(p1, p2, p3) + d4 = direction(p1, p2, p4) + + if ((d1 < 0 < d2) or (d2 < 0 < d1)) and ((d3 < 0 < d4) or (d4 < 0 < d3)): + return True + + if d1 == 0 and on_segment(p3, p4, p1): + return True + if d2 == 0 and on_segment(p3, p4, p2): + return True + if d3 == 0 and on_segment(p1, p2, p3): + return True + return d4 == 0 and on_segment(p1, p2, p4) + + +if __name__ == "__main__": + import doctest + + doctest.testmod() + + print("Enter four points as 'x y' pairs (one per line):") + points = [Point(*map(float, input().split())) for _ in range(4)] + p1, p2, p3, p4 = points + result = segments_intersect(p1, p2, p3, p4) + print(1 if result else 0) diff --git a/graphs/johnson.py b/graphs/johnson.py new file mode 100644 index 000000000000..6306ab5f8654 --- /dev/null +++ b/graphs/johnson.py @@ -0,0 +1,118 @@ +import heapq +from collections.abc import Hashable + +Node = Hashable +edge = tuple[Node, Node, float] +adjacency = dict[Node, list[tuple[Node, float]]] + + +def _collect_nodes_and_edges(graph: adjacency) -> tuple[list[Node], list[edge]]: + nodes = set() + edges: list[edge] = [] + for u, neighbors in graph.items(): + nodes.add(u) + for v, w in neighbors: + nodes.add(v) + edges.append((u, v, w)) + return list(nodes), edges + + +def _bellman_ford(nodes: list[Node], edges: list[edge]) -> dict[Node, float]: + """ + Bellman-Ford relaxation to compute potentials h[v] for all vertices. + Raises ValueError if a negative weight cycle exists. + """ + dist: dict[Node, float] = dict.fromkeys(nodes, 0.0) + n = len(nodes) + + for _ in range(n - 1): + updated = False + for u, v, w in edges: + if dist[u] + w < dist[v]: + dist[v] = dist[u] + w + updated = True + if not updated: + break + else: + for u, v, w in edges: + if dist[u] + w < dist[v]: + raise ValueError("Negative weight cycle detected") + return dist + + +def _dijkstra( + start: Node, + nodes: list[Node], + graph: adjacency, + potentials: dict[Node, float], +) -> dict[Node, float]: + """ + Dijkstra over reweighted graph, using potentials h to make weights non-negative. + Returns distances from start in the reweighted space. + """ + inf = float("inf") + dist: dict[Node, float] = dict.fromkeys(nodes, inf) + dist[start] = 0.0 + heap: list[tuple[float, Node]] = [(0.0, start)] + + while heap: + d_u, u = heapq.heappop(heap) + if d_u > dist[u]: + continue + for v, w in graph.get(u, []): + w_prime = w + potentials[u] - potentials[v] + if w_prime < 0: + raise ValueError( + "Negative edge weight after reweighting: numeric error" + ) + new_dist = d_u + w_prime + if new_dist < dist[v]: + dist[v] = new_dist + heapq.heappush(heap, (new_dist, v)) + return dist + + +def johnson(graph: adjacency) -> dict[Node, dict[Node, float]]: + """ + Compute all-pairs shortest paths using Johnson's algorithm. + + Reference: + https://en.wikipedia.org/wiki/Johnson%27s_algorithm + + Args: + graph: adjacency list {u: [(v, weight), ...], ...} + + Returns: + dict of dicts: dist[u][v] = shortest distance from u to v + + Raises: + ValueError: if a negative weight cycle is detected + + Example: + >>> g = { + ... 0: [(1, 3), (2, 8), (4, -4)], + ... 1: [(3, 1), (4, 7)], + ... 2: [(1, 4)], + ... 3: [(0, 2), (2, -5)], + ... 4: [(3, 6)], + ... } + >>> round(johnson(g)[0][3], 2) + 2.0 + """ + nodes, edges = _collect_nodes_and_edges(graph) + potentials = _bellman_ford(nodes, edges) + + all_pairs: dict[Node, dict[Node, float]] = {} + inf = float("inf") + for s in nodes: + dist_reweighted = _dijkstra(s, nodes, graph, potentials) + dists_orig: dict[Node, float] = {} + for v in nodes: + d_prime = dist_reweighted[v] + if d_prime < inf: + dists_orig[v] = d_prime - potentials[s] + potentials[v] + else: + dists_orig[v] = inf + all_pairs[s] = dists_orig + + return all_pairs diff --git a/graphs/tests/test_johnson.py b/graphs/tests/test_johnson.py new file mode 100644 index 000000000000..e149aac85d0f --- /dev/null +++ b/graphs/tests/test_johnson.py @@ -0,0 +1,24 @@ +import math + +import pytest + +from graphs.johnson import johnson + + +def test_johnson_basic(): + g = { + 0: [(1, 3), (2, 8), (4, -4)], + 1: [(3, 1), (4, 7)], + 2: [(1, 4)], + 3: [(0, 2), (2, -5)], + 4: [(3, 6)], + } + dist = johnson(g) + assert math.isclose(dist[0][3], 2.0, abs_tol=1e-9) + assert math.isclose(dist[3][2], -5.0, abs_tol=1e-9) + + +def test_johnson_negative_cycle(): + g2 = {0: [(1, 1)], 1: [(0, -3)]} + with pytest.raises(ValueError): + johnson(g2) diff --git a/pyproject.toml b/pyproject.toml index 34e099a46435..0b0c3f5cfda4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -169,14 +169,14 @@ skip = """\ python_version = "3.14" [tool.pytest] -ini_options.markers = [ - "mat_ops: mark a test as utilizing matrix operations.", -] ini_options.addopts = [ "--durations=10", "--doctest-modules", "--showlocals", ] +ini_options.markers = [ + "mat_ops: mark a test as utilizing matrix operations.", +] [tool.coverage] report.omit = [ diff --git a/sorts/tim_sort.py b/sorts/tim_sort.py index 41ab4a10a87b..2eeed88b7399 100644 --- a/sorts/tim_sort.py +++ b/sorts/tim_sort.py @@ -1,4 +1,7 @@ -def binary_search(lst, item, start, end): +from typing import Any + + +def binary_search(lst: list[Any], item: Any, start: int, end: int) -> int: if start == end: return start if lst[start] > item else start + 1 if start > end: @@ -13,7 +16,7 @@ def binary_search(lst, item, start, end): return mid -def insertion_sort(lst): +def insertion_sort(lst: list[Any]) -> list[Any]: length = len(lst) for index in range(1, length): @@ -24,7 +27,7 @@ def insertion_sort(lst): return lst -def merge(left, right): +def merge(left: list[Any], right: list[Any]) -> list[Any]: if not left: return right @@ -37,7 +40,7 @@ def merge(left, right): return [right[0], *merge(left, right[1:])] -def tim_sort(lst): +def tim_sort(lst: list[Any] | tuple[Any, ...] | str) -> list[Any]: """ >>> tim_sort("Python") ['P', 'h', 'n', 'o', 't', 'y'] @@ -53,7 +56,7 @@ def tim_sort(lst): length = len(lst) runs, sorted_runs = [], [] new_run = [lst[0]] - sorted_array = [] + sorted_array: list[Any] = [] i = 1 while i < length: if lst[i] < lst[i - 1]: