diff --git a/paper/src/chapters/03-methodology.tex b/paper/src/chapters/03-methodology.tex index 8c58717..799486e 100644 --- a/paper/src/chapters/03-methodology.tex +++ b/paper/src/chapters/03-methodology.tex @@ -23,7 +23,7 @@ where: The platform does not directly observe the true underlying demand function $d(p)$. Instead, it observes a behavioral proxy $\hat{q}_t$, which is a composite signal derived from the mixture of actor types. We define the demand proxy for product $i$ at epoch $t$ as a weighted aggregation of events: \begin{equation} \label{eq:qhat} -\hat{q}_{t,i} = \sum_{s \in \mathcal{S}_t} \sum_{k=1}^{L_s} \omega(a_{s,k}) \cdot \mathds{1}[i_{s,k} = i] +\hat{q}_{t,i} = \sum_{s \in \mathcal{S}_t} \sum_{k=1}^{L_s} \omega(a_{s,k}) \cdot \mathbf{1}[i_{s,k} = i] \end{equation} where $\omega: \mathcal{A} \to \mathbb{R}_+$ assigns weights to actions based on their signal strength regarding willingness to pay. diff --git a/paper/src/chapters/mdp_agent.pdf b/paper/src/chapters/mdp_agent.pdf index 6845eb5..aeab1b7 100644 Binary files a/paper/src/chapters/mdp_agent.pdf and b/paper/src/chapters/mdp_agent.pdf differ diff --git a/paper/src/chapters/mdp_human.pdf b/paper/src/chapters/mdp_human.pdf index 69bc8d3..b753b4e 100644 Binary files a/paper/src/chapters/mdp_human.pdf and b/paper/src/chapters/mdp_human.pdf differ diff --git a/scripts/nx_paper.sh b/scripts/nx_paper.sh index 375db5a..036a3c4 100644 --- a/scripts/nx_paper.sh +++ b/scripts/nx_paper.sh @@ -4,15 +4,34 @@ set -euo pipefail cmd="${1:-}" +sync_mdp_figures() { + local script_dir project_root sim_dir chapters_dir + script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + project_root="$(cd "$script_dir/.." && pwd)" + sim_dir="$project_root/sim/rl/behavior_loader" + chapters_dir="$project_root/paper/src/chapters" + + printf '%s\n' 'Refreshing MDP figures for paper...' + ( + cd "$sim_dir" + python models.py + ) + + cp "$sim_dir/human_mdp_viz.pdf" "$chapters_dir/mdp_human.pdf" + cp "$sim_dir/agent_mdp_viz.pdf" "$chapters_dir/mdp_agent.pdf" +} + case "$cmd" in build) mkdir -p paper/build + sync_mdp_figures bash paper/concat_code.sh cd paper/src latexmk -pdf -jobname=main -f -interaction=nonstopmode -file-line-error -r ../.latexmkrc -outdir=../build main.tex ;; watch) mkdir -p paper/build + sync_mdp_figures cd paper/src latexmk -pvc -pdf -jobname=main -f -interaction=nonstopmode -file-line-error -r ../.latexmkrc -outdir=../build main.tex ;; @@ -33,11 +52,13 @@ case "$cmd" in ;; build-genpop) mkdir -p paper/build + sync_mdp_figures cd paper/src latexmk -pdf -jobname=main-genpop -f -interaction=nonstopmode -file-line-error -r ../.latexmkrc -outdir=../build main-genpop.tex ;; watch-genpop) mkdir -p paper/build + sync_mdp_figures cd paper/src latexmk -pvc -pdf -jobname=main-genpop -f -interaction=nonstopmode -file-line-error -r ../.latexmkrc -outdir=../build main-genpop.tex ;; diff --git a/sim/rl/behavior_loader/models.py b/sim/rl/behavior_loader/models.py index 0b1a285..25c8c15 100644 --- a/sim/rl/behavior_loader/models.py +++ b/sim/rl/behavior_loader/models.py @@ -209,19 +209,94 @@ def _resolve_event_order( return sorted(observed) -def _fixed_circle_positions( - events: List[str], radius: float +def _compass_from_angle(angle_rad: float) -> str: + ports = ("e", "ne", "n", "nw", "w", "sw", "s", "se") + normalized = (angle_rad + (2 * np.pi)) % (2 * np.pi) + step = np.pi / 4 + idx = int(np.round(normalized / step)) % len(ports) + return ports[idx] + + +def _edge_ports( + src: str, + dst: str, + positions: Dict[str, Tuple[float, float]], + has_reverse: bool, +) -> Tuple[str, str]: + src_x, src_y = positions[src] + dst_x, dst_y = positions[dst] + angle = float(np.arctan2(dst_y - src_y, dst_x - src_x)) + + if has_reverse: + bend = np.pi / 10 + angle += bend if src < dst else -bend + + tail_port = _compass_from_angle(angle) + head_port = _compass_from_angle(angle + np.pi) + return tail_port, head_port + + +def _edge_style(prob: float) -> Dict[str, str]: + if prob >= 0.75: + edge_color = "#111827" + elif prob >= 0.50: + edge_color = "#374151" + elif prob >= 0.25: + edge_color = "#6b7280" + else: + edge_color = "#9ca3af" + return { + "color": edge_color, + "fontcolor": "#111827", + "fontsize": "10", + "penwidth": f"{0.9 + 3.6 * prob:.2f}", + "arrowsize": f"{0.55 + 0.55 * prob:.2f}", + } + + +def _format_node_label(evt: str) -> str: + max_line_len = 16 + tokens = evt.split("_") + if len(tokens) == 1: + return evt + + lines: List[str] = [] + curr = "" + for token in tokens: + piece = token if not curr else f"_{token}" + if curr and len(curr) + len(piece) > max_line_len: + lines.append(curr) + curr = token + else: + curr = f"{curr}{piece}" if curr else token + if curr: + lines.append(curr) + return "\n".join(lines) + + +def _compute_flow_positions( + events: List[str], + layout_radius: float, ) -> Dict[str, Tuple[float, float]]: + """Balanced grid layout for paper-friendly diagrams.""" if not events: return {} - step = (2 * np.pi) / len(events) - return { - evt: ( - float(radius * np.cos(idx * step)), - float(radius * np.sin(idx * step)), - ) - for idx, evt in enumerate(events) - } + + num_events = len(events) + cols = int(np.ceil(np.sqrt(num_events))) + rows = int(np.ceil(num_events / cols)) + x_step = max(layout_radius * 1.10, 3.6) + y_step = max(layout_radius * 0.95, 3.2) + + positions: Dict[str, Tuple[float, float]] = {} + for idx, evt in enumerate(events): + row = idx // cols + col = idx % cols + x = (col - (cols - 1) / 2.0) * x_step + y = ((rows - 1) / 2.0 - row) * y_step + positions[evt] = (float(x), float(y)) + + return positions def visualize_mdp( @@ -232,35 +307,79 @@ def visualize_mdp( view: bool = False, export_dot: bool = False, event_order: Optional[List[str]] = None, - layout_radius: float = 6.0, - node_diameter: float = 2.4, + layout_radius: float = 10.0, + node_diameter: float = 1.8, + label_threshold: float = 0.08, ): if not model.mdp: raise ValueError("build MDP first") evt_trans = aggregate_event_transitions(model.mdp) ordered_events = _resolve_event_order(evt_trans, event_order=event_order) - positions = _fixed_circle_positions(ordered_events, radius=layout_radius) + positions = _compute_flow_positions(ordered_events, layout_radius=layout_radius) g = graphviz.Digraph(format=fmt, engine="neato") - g.attr(overlap="false", splines="true", outputorder="edgesfirst") + g.attr( + overlap="false", + splines="true", + outputorder="edgesfirst", + pad="0.5", + sep="+9", + esep="+4", + bgcolor="white", + dpi="180", + ) g.attr( "node", shape="circle", + fixedsize="true", width=f"{node_diameter:.2f}", height=f"{node_diameter:.2f}", - fixedsize="true", - fontsize="10", + fontsize="11", + fontname="Helvetica", + style="filled", + fillcolor="white", + color="#374151", + fontcolor="#111827", + penwidth="1.8", + peripheries="1", + ) + g.attr( + "edge", + fontname="Helvetica", ) for evt in ordered_events: - x_pos, y_pos = positions[evt] - g.node(evt, pos=f"{x_pos:.3f},{y_pos:.3f}!", pin="true") + x, y = positions[evt] + g.node(evt, label=_format_node_label(evt), pos=f"{x:.2f},{y:.2f}!", pin="true") - for src, dsts in evt_trans.items(): - for dst, prob in dsts.items(): - if prob > threshold: - g.edge(src, dst, label=f"{prob:.2f}") + edges = [ + (src, dst, prob) + for src, dsts in evt_trans.items() + for dst, prob in dsts.items() + if prob > threshold + ] + edge_set = {(src, dst) for src, dst, _ in edges} + + for src, dst, prob in sorted(edges, key=lambda row: row[2]): + edge_attrs: Dict[str, str] = _edge_style(prob) + + if src == dst: + # pick a loop port away from the main flow + sx, sy = positions[src] + loop_port = "n" if sy <= 0 else "s" + edge_attrs.update({"tailport": loop_port, "headport": loop_port}) + else: + has_reverse = (dst, src) in edge_set + tail_port, head_port = _edge_ports(src, dst, positions, has_reverse) + edge_attrs.update({"tailport": tail_port, "headport": head_port}) + if has_reverse: + edge_attrs["constraint"] = "false" + + if prob >= label_threshold or src == dst: + edge_attrs["label"] = f" {prob:.2f} " + + g.edge(src, dst, **edge_attrs) g.render(output, view=view, cleanup=True) print(f"Saved MDP graph to {output}.{fmt}")