mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
changed to new test method for singificance
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -18,6 +18,9 @@ phantom.egg-info/
|
|||||||
.nextstep
|
.nextstep
|
||||||
.ignore-gitlogue
|
.ignore-gitlogue
|
||||||
.cloudflare
|
.cloudflare
|
||||||
|
.nx/
|
||||||
|
node_modules/
|
||||||
|
dist/
|
||||||
|
|
||||||
# generated svg/graphics
|
# generated svg/graphics
|
||||||
**/session_*.svg
|
**/session_*.svg
|
||||||
|
|||||||
49
Makefile
49
Makefile
@@ -8,6 +8,7 @@ VENV := .venv
|
|||||||
PYTHON := $(VENV)/bin/python
|
PYTHON := $(VENV)/bin/python
|
||||||
PIP := $(VENV)/bin/pip
|
PIP := $(VENV)/bin/pip
|
||||||
PYTEST := $(VENV)/bin/pytest
|
PYTEST := $(VENV)/bin/pytest
|
||||||
|
NX := npx nx
|
||||||
|
|
||||||
SWEEP_ENV_FILE ?= .env.sweep
|
SWEEP_ENV_FILE ?= .env.sweep
|
||||||
|
|
||||||
@@ -36,7 +37,7 @@ SWEEP_ENV_LOAD = set -a; [ -f "$(SWEEP_ENV_FILE)" ] && . "$(SWEEP_ENV_FILE)" ||
|
|||||||
.PHONY: help
|
.PHONY: help
|
||||||
help:
|
help:
|
||||||
@echo "pdf.build pdf.watch pdf.clean | test.backend test.e2e test.all | web.dev | install | train | train.agent | train.bootstrap | train.tpu.pod | train.tpu.vm | train.tpu.vm.sweep | stats.lines"
|
@echo "pdf.build pdf.watch pdf.clean | test.backend test.e2e test.all | web.dev | install | train | train.agent | train.bootstrap | train.tpu.pod | train.tpu.vm | train.tpu.vm.sweep | stats.lines"
|
||||||
@echo "docker.train.publish"
|
@echo "backend.server backend.provider backend.worker | platform.up platform.down platform.logs | docker.train.publish"
|
||||||
@echo ""
|
@echo ""
|
||||||
@echo "Local wandb run:"
|
@echo "Local wandb run:"
|
||||||
@echo " make train LOCAL_TRAIN_ARGS='--algo ppo --total-timesteps 50000'"
|
@echo " make train LOCAL_TRAIN_ARGS='--algo ppo --total-timesteps 50000'"
|
||||||
@@ -208,11 +209,43 @@ train.tpu.vm.sweep:
|
|||||||
--tpu-repo-dir "$(TPU_REPO_DIR)" \
|
--tpu-repo-dir "$(TPU_REPO_DIR)" \
|
||||||
$(if $(filter-out 0,$(AGENT_COUNT)),--count $(AGENT_COUNT),)
|
$(if $(filter-out 0,$(AGENT_COUNT)),--count $(AGENT_COUNT),)
|
||||||
|
|
||||||
|
.PHONY: backend.server backend.provider backend.worker platform.up platform.down platform.logs
|
||||||
|
backend.server:
|
||||||
|
@$(NX) run backend-server:dev
|
||||||
|
|
||||||
|
backend.provider:
|
||||||
|
@$(NX) run pricing-provider:dev
|
||||||
|
|
||||||
|
backend.worker:
|
||||||
|
@$(NX) run backend-worker:dev
|
||||||
|
|
||||||
|
platform.up:
|
||||||
|
@$(NX) run platform:up
|
||||||
|
|
||||||
|
platform.down:
|
||||||
|
@$(NX) run platform:down
|
||||||
|
|
||||||
|
platform.logs:
|
||||||
|
@$(NX) run platform:logs
|
||||||
|
|
||||||
.PHONY: pdf clean watch run.webapp test count-lines all
|
.PHONY: pdf clean watch run.webapp test count-lines all
|
||||||
pdf: pdf.build
|
pdf:
|
||||||
clean: pdf.clean
|
@$(NX) run paper:build
|
||||||
watch: pdf.watch
|
|
||||||
run.webapp: web.dev
|
clean:
|
||||||
test: test.backend
|
@$(NX) run paper:clean
|
||||||
count-lines: stats.lines
|
|
||||||
all: pdf.build
|
watch:
|
||||||
|
@$(NX) run paper:watch
|
||||||
|
|
||||||
|
run.webapp:
|
||||||
|
@$(NX) run web:dev
|
||||||
|
|
||||||
|
test:
|
||||||
|
@$(NX) run research:test
|
||||||
|
|
||||||
|
count-lines:
|
||||||
|
@$(NX) run research:stats
|
||||||
|
|
||||||
|
all:
|
||||||
|
@$(NX) run paper:build
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
fastapi==0.104.1
|
fastapi>=0.135,<0.136
|
||||||
uvicorn[standard]==0.24.0
|
uvicorn[standard]>=0.41,<0.42
|
||||||
kafka-python==2.0.2
|
kafka-python>=2.3,<2.4
|
||||||
pydantic==2.5.0
|
pydantic>=2.12,<3
|
||||||
python-dotenv==1.0.0
|
python-dotenv>=1.0,<2
|
||||||
supabase==2.9.1
|
supabase>=2.28,<3
|
||||||
|
|||||||
@@ -616,3 +616,17 @@ Volume: 21},
|
|||||||
year = {2026},
|
year = {2026},
|
||||||
file = {Snapshot:/home/velocitatem/Zotero/storage/N724QGF6/v4.html:text/html},
|
file = {Snapshot:/home/velocitatem/Zotero/storage/N724QGF6/v4.html:text/html},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@article{mann_test_1947,
|
||||||
|
title = {On a {Test} of {Whether} one of {Two} {Random} {Variables} is {Stochastically} {Larger} than the {Other}},
|
||||||
|
volume = {18},
|
||||||
|
url = {https://doi.org/10.1214/aoms/1177730491},
|
||||||
|
doi = {10.1214/aoms/1177730491},
|
||||||
|
abstract = {Let x and y be two random variables with continuous cumulative distribution functions f and g. A statistic U depending on the relative ranks of the x's and y's is proposed for testing the hypothesis f = g. Wilcoxon proposed an equivalent test in the Biometrics Bulletin, December, 1945, but gave only a few points of the distribution of his statistic. Under the hypothesis f = g the probability of obtaining a given U in a sample of n x's and m y's is the solution of a certain recurrence relation involving n and m. Using this recurrence relation tables have been computed giving the probability of U for samples up to n = m = 8. At this point the distribution is almost normal. From the recurrence relation explicit expressions for the mean, variance, and fourth moment are obtained. The 2rth moment is shown to have a certain form which enabled us to prove that the limit distribution is normal if m, n go to infinity in any arbitrary manner. The test is shown to be consistent with respect to the class of alternatives f(x) {\textgreater} g(x) for every x.},
|
||||||
|
number = {1},
|
||||||
|
journal = {The Annals of Mathematical Statistics},
|
||||||
|
author = {Mann, H. B. and Whitney, D. R.},
|
||||||
|
year = {1947},
|
||||||
|
note = {Publisher: Institute of Mathematical Statistics},
|
||||||
|
pages = {50 -- 60},
|
||||||
|
}
|
||||||
|
|||||||
@@ -303,13 +303,9 @@ To train a robust pricing learner, we need a simulator that can generate realist
|
|||||||
\subsubsection{Ground-Truth Separability}
|
\subsubsection{Ground-Truth Separability}
|
||||||
Because sessions are collected under controlled experimental conditions where each actor is assigned a known type at the start of the trial, labels $\theta_s \in \{H, A\}$ are available as ground truth rather than as the output of a heuristic classifier. We therefore estimate separate transition kernels directly from each labeled partition $\mathcal{D}_H$ and $\mathcal{D}_A$, treating the resulting $\hat{\mathcal{T}}_H$ and $\hat{\mathcal{T}}_A$ as the ground-truth behavioral profiles for each class. We then ask a direct methodological question: are the kernels separable enough to justify downstream pricing control that depends on that separability?
|
Because sessions are collected under controlled experimental conditions where each actor is assigned a known type at the start of the trial, labels $\theta_s \in \{H, A\}$ are available as ground truth rather than as the output of a heuristic classifier. We therefore estimate separate transition kernels directly from each labeled partition $\mathcal{D}_H$ and $\mathcal{D}_A$, treating the resulting $\hat{\mathcal{T}}_H$ and $\hat{\mathcal{T}}_A$ as the ground-truth behavioral profiles for each class. We then ask a direct methodological question: are the kernels separable enough to justify downstream pricing control that depends on that separability?
|
||||||
|
|
||||||
To answer this, we compute average KL divergence between transition probability matrices. This statistic gives global separability and event-level diagnostics at the same time. To test whether the observed between-class value exceeds finite-sample estimation noise, we compute an intra-class bootstrap baseline by repeatedly splitting $\mathcal{D}_H$ and $\mathcal{D}_A$ into two random halves, fitting a transition kernel on each half, and re-computing the same average KL statistic for each split.
|
To answer this, we compute per-session KL divergence scores against both class-level centroids. For each session $s$ in either partition, we fit a session-level event transition kernel $\hat{\mathcal{T}}_s$ from that session's trajectory alone, then compute its average KL divergence to the human centroid ($\Delta_{H,s}$) and to the agent centroid ($\Delta_{A,s}$). The per-session separability score is the gap $\Delta_{H,s} - \Delta_{A,s}$: a negative value indicates proximity to human behavior, a positive value indicates proximity to agent behavior.
|
||||||
|
|
||||||
Formally, for $B$ bootstrap splits per class we obtain reference samples $\{d_{H,b}^{\text{intra}}\}_{b=1}^B$ and $\{d_{A,b}^{\text{intra}}\}_{b=1}^B$, then compare the between-class divergence $d^{\text{inter}}$ against the pooled null distribution. We report pooled mean and variance, lift ratio $d^{\text{inter}}/\mathbb{E}[d^{\text{intra}}]$, and the empirical one-sided p-value
|
The normality assumption cannot be made for KL divergence distributions, which are right-skewed and bounded below by zero, so we do not use a Student's $t$-test. Instead we apply a Mann-Whitney $U$ test \parencite{mann_test_1947} on the per-session gap scores between the two groups. The Mann-Whitney test is a rank-based nonparametric test that compares the stochastic ordering of two independent samples without distributional assumptions, making it appropriate for small samples drawn from skewed populations. We report $U$, the exact two-sided $p$-value, and group-level descriptive statistics for the gap scores.
|
||||||
\begin{equation}
|
|
||||||
\hat p = \frac{1 + \sum_{j=1}^{2B}\mathbf{1}\{d_j^{\text{intra}} \ge d^{\text{inter}}\}}{2B + 1},
|
|
||||||
\end{equation}
|
|
||||||
which gives a direct significance check for separability before using divergence-derived centroid control signals in pricing.
|
|
||||||
|
|
||||||
\begin{definition}[Kullback-Leibler Divergence for Transition Distributions]
|
\begin{definition}[Kullback-Leibler Divergence for Transition Distributions]
|
||||||
Let $P_e$ and $Q_e$ be categorical distributions over destination states following event $e$, derived from human and agent trajectories respectively. The KL divergence between these distributions is:
|
Let $P_e$ and $Q_e$ be categorical distributions over destination states following event $e$, derived from human and agent trajectories respectively. The KL divergence between these distributions is:
|
||||||
|
|||||||
@@ -10,26 +10,25 @@
|
|||||||
|
|
||||||
\subsection{Behavioral Analysis}
|
\subsection{Behavioral Analysis}
|
||||||
|
|
||||||
The transition-kernel analysis is evaluated with both between-class divergence and an intra-class bootstrap null baseline. This allows us to separate real behavioral differences from finite-sample estimation noise and bias.
|
Separability between human and agent sessions is evaluated by computing per-session divergence gap scores $\Delta_{H,s} - \Delta_{A,s}$ and comparing the two groups with a Mann-Whitney $U$ test. Table~\ref{tab:divergence_significance} reports the group-level descriptive statistics for the gap scores and the test result.
|
||||||
|
|
||||||
\begin{table}[ht]
|
\begin{table}[ht]
|
||||||
\centering
|
\centering
|
||||||
\caption{Divergence significance using intra-class bootstrap baseline (B=100 per class).}
|
\caption{Per-session divergence gap ($\Delta_H - \Delta_A$) by actor class with Mann-Whitney $U$ test.}
|
||||||
\label{tab:divergence_significance}
|
\label{tab:divergence_significance}
|
||||||
\begin{tabular}{lcccc}
|
\begin{tabular}{lccc}
|
||||||
\toprule
|
\toprule
|
||||||
Metric & Mean KL & Std & 5\% quantile & 95\% quantile \\
|
Group & $n$ & Mean gap & Std \\
|
||||||
\midrule
|
\midrule
|
||||||
Between-class (Human vs Agent) & 5.3067 & -- & -- & -- \\
|
Human sessions & 11 & $-3.3522$ & $2.6748$ \\
|
||||||
Human intra-class split & 2.5271 & 1.2501 & 0.6845 & 4.6015 \\
|
Agent sessions & 6 & $+1.6482$ & $2.8349$ \\
|
||||||
Agent intra-class split & 1.2065 & 1.2607 & 0.2177 & 4.2345 \\
|
\midrule
|
||||||
|
\multicolumn{4}{l}{Mann-Whitney $U = 2.0$, $p = 0.0006$ (two-sided)} \\
|
||||||
\bottomrule
|
\bottomrule
|
||||||
\end{tabular}
|
\end{tabular}
|
||||||
\end{table}
|
\end{table}
|
||||||
|
|
||||||
For this run ($n_H=11$, $n_A=7$, $B=100$), the empirical p-value is $0.0149$, both computed as defined in Section~\ref{sec:tpe}. This places the between-class divergence clearly above the intra-class null and supports the use of divergence-derived contamination signals in downstream pricing control.
|
The sign structure is consistent with the theoretical expectation: human sessions produce negative gap scores (closer to the human centroid, far from the agent centroid) while agent sessions produce positive gap scores (closer to the agent centroid). The two-sided $p$-value of $0.0006$ indicates near-complete rank separation between the groups at $n_H=11$, $n_A=6$, providing strong evidence that the transition kernels are separable enough to justify their use as a control signal in downstream pricing.
|
||||||
|
|
||||||
% TODO: instead could we do a simple t test to see the difference in the means in some way? That way we can yield a P value
|
|
||||||
|
|
||||||
|
|
||||||
\subsection{Experimental Outcomes}
|
\subsection{Experimental Outcomes}
|
||||||
@@ -54,6 +53,6 @@ This comparison isolates the effect of robustness terms from model capacity and
|
|||||||
|
|
||||||
|
|
||||||
\subsection{Interpretation and Insights}
|
\subsection{Interpretation and Insights}
|
||||||
Between-class divergence substantially above the intra-class null indicates that the two actor classes are behaviorally separable at the transition-kernel level. In pricing experiments, this is the condition required for separability to act as a useful control signal rather than just an auxiliary classifier score.
|
The Mann-Whitney result ($U=2.0$, $p<0.001$) confirms that per-session divergence gaps separate the two actor classes with near-zero overlap in rank ordering. This is the condition required for separability to act as a useful control signal in the pricing loop rather than just an auxiliary classifier score.
|
||||||
|
|
||||||
\subsection{Anomalies}
|
\subsection{Anomalies}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import os
|
|||||||
import json
|
import json
|
||||||
from pydantic import BaseModel as Base
|
from pydantic import BaseModel as Base
|
||||||
|
|
||||||
|
|
||||||
class PayloadModel(Base):
|
class PayloadModel(Base):
|
||||||
sessionId: str
|
sessionId: str
|
||||||
experimentId: str | None
|
experimentId: str | None
|
||||||
@@ -13,6 +14,7 @@ class PayloadModel(Base):
|
|||||||
userAgent: str
|
userAgent: str
|
||||||
ts: str
|
ts: str
|
||||||
|
|
||||||
|
|
||||||
class ValueModel(Base):
|
class ValueModel(Base):
|
||||||
payload: PayloadModel
|
payload: PayloadModel
|
||||||
encoding: str
|
encoding: str
|
||||||
@@ -20,6 +22,7 @@ class ValueModel(Base):
|
|||||||
schemaId: int
|
schemaId: int
|
||||||
size: int
|
size: int
|
||||||
|
|
||||||
|
|
||||||
class InteractionModel(Base):
|
class InteractionModel(Base):
|
||||||
partitionID: int
|
partitionID: int
|
||||||
offset: int
|
offset: int
|
||||||
@@ -30,14 +33,17 @@ class InteractionModel(Base):
|
|||||||
key: dict
|
key: dict
|
||||||
value: ValueModel
|
value: ValueModel
|
||||||
|
|
||||||
|
|
||||||
def _is_admin(page: str | None) -> bool:
|
def _is_admin(page: str | None) -> bool:
|
||||||
return page is not None and page.startswith("/admin/")
|
return page is not None and page.startswith("/admin/")
|
||||||
|
|
||||||
|
|
||||||
class Loader:
|
class Loader:
|
||||||
def __init__(self, src_dir: str):
|
def __init__(self, src_dir: str):
|
||||||
self.src_dir = src_dir
|
self.src_dir = src_dir
|
||||||
self.entries = os.listdir(src_dir)
|
self.entries = os.listdir(src_dir)
|
||||||
if not self.entries: raise ValueError("empty directory")
|
if not self.entries:
|
||||||
|
raise ValueError("empty directory")
|
||||||
self.data = self._load_sessions()
|
self.data = self._load_sessions()
|
||||||
|
|
||||||
def _load_sessions(self) -> dict:
|
def _load_sessions(self) -> dict:
|
||||||
@@ -55,16 +61,21 @@ class Loader:
|
|||||||
def get_entries(self) -> tuple[list[str], int]:
|
def get_entries(self) -> tuple[list[str], int]:
|
||||||
return self.entries, len(self.entries)
|
return self.entries, len(self.entries)
|
||||||
|
|
||||||
|
|
||||||
class AgentLoader(Loader):
|
class AgentLoader(Loader):
|
||||||
def _load_sessions(self) -> dict:
|
def _load_sessions(self) -> dict:
|
||||||
sessions = {}
|
sessions = {}
|
||||||
for entry in self.entries:
|
for entry in self.entries:
|
||||||
with open(f"{self.src_dir}/{entry}/int.json") as f:
|
path = f"{self.src_dir}/{entry}/int.json"
|
||||||
|
if not os.path.isfile(path):
|
||||||
|
continue
|
||||||
|
with open(path) as f:
|
||||||
raw = json.load(f)
|
raw = json.load(f)
|
||||||
ints = [PayloadModel(**i) for i in raw]
|
ints = [PayloadModel(**i) for i in raw]
|
||||||
sessions[entry] = [i for i in ints if not _is_admin(i.page)]
|
sessions[entry] = [i for i in ints if not _is_admin(i.page)]
|
||||||
return sessions
|
return sessions
|
||||||
|
|
||||||
|
|
||||||
class JointLoader:
|
class JointLoader:
|
||||||
def __init__(self, human_dir: str, agent_dir: str):
|
def __init__(self, human_dir: str, agent_dir: str):
|
||||||
self.human_loader = Loader(human_dir)
|
self.human_loader = Loader(human_dir)
|
||||||
@@ -74,10 +85,14 @@ class JointLoader:
|
|||||||
|
|
||||||
def _merge(self) -> dict:
|
def _merge(self) -> dict:
|
||||||
return {
|
return {
|
||||||
**{f"human_{sid}": [e.value.payload for e in evts]
|
**{
|
||||||
for sid, evts in self.human_loader.get_data().items()},
|
f"human_{sid}": [e.value.payload for e in evts]
|
||||||
**{f"agent_{sid}": evts
|
for sid, evts in self.human_loader.get_data().items()
|
||||||
for sid, evts in self.agent_loader.get_data().items()}
|
},
|
||||||
|
**{
|
||||||
|
f"agent_{sid}": evts
|
||||||
|
for sid, evts in self.agent_loader.get_data().items()
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_data(self) -> dict:
|
def get_data(self) -> dict:
|
||||||
@@ -86,12 +101,17 @@ class JointLoader:
|
|||||||
def get_entries(self) -> tuple[list[str], int]:
|
def get_entries(self) -> tuple[list[str], int]:
|
||||||
return self.entries, len(self.entries)
|
return self.entries, len(self.entries)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
agent_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
|
agent_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
|
||||||
human_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
|
human_dir = (
|
||||||
|
"/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
|
||||||
|
)
|
||||||
|
|
||||||
for name, cls, path in [("agent", AgentLoader, agent_dir),
|
for name, cls, path in [
|
||||||
|
("agent", AgentLoader, agent_dir),
|
||||||
("human", Loader, human_dir),
|
("human", Loader, human_dir),
|
||||||
("joint", lambda d: JointLoader(human_dir, d), agent_dir)]:
|
("joint", lambda d: JointLoader(human_dir, d), agent_dir),
|
||||||
|
]:
|
||||||
ldr = cls(path) if name != "joint" else cls(agent_dir)
|
ldr = cls(path) if name != "joint" else cls(agent_dir)
|
||||||
print(f"Loaded {len(ldr.get_entries()[0])} {name} sessions")
|
print(f"Loaded {len(ldr.get_entries()[0])} {name} sessions")
|
||||||
|
|||||||
@@ -260,6 +260,29 @@ def _avg_event_kl(
|
|||||||
return float(np.mean([kl_divergence(src_evt[e], dst_evt[e]) for e in common]))
|
return float(np.mean([kl_divergence(src_evt[e], dst_evt[e]) for e in common]))
|
||||||
|
|
||||||
|
|
||||||
|
def per_session_divergence(
|
||||||
|
model: BehaviorModel,
|
||||||
|
reference_evt: Dict[str, Dict[str, float]],
|
||||||
|
) -> List[float]:
|
||||||
|
"""KL from each session's event-level transition dist to a reference kernel. Returns one scalar per session."""
|
||||||
|
scores = []
|
||||||
|
for sid, evts in model.data.items():
|
||||||
|
if len(evts) < 2:
|
||||||
|
continue
|
||||||
|
subset_mdp = _build_subset_mdp(model, [sid])
|
||||||
|
sess_evt = aggregate_event_transitions(subset_mdp)
|
||||||
|
common = set(sess_evt.keys()) & set(reference_evt.keys())
|
||||||
|
if not common:
|
||||||
|
scores.append(0.0)
|
||||||
|
continue
|
||||||
|
scores.append(
|
||||||
|
float(
|
||||||
|
np.mean([kl_divergence(sess_evt[e], reference_evt[e]) for e in common])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return scores
|
||||||
|
|
||||||
|
|
||||||
def bootstrap_intra_class_divergence(
|
def bootstrap_intra_class_divergence(
|
||||||
model: BehaviorModel,
|
model: BehaviorModel,
|
||||||
n_bootstrap: int = 100,
|
n_bootstrap: int = 100,
|
||||||
@@ -412,3 +435,36 @@ if __name__ == "__main__":
|
|||||||
f" Lift vs pooled intra mean: {inter_class_avg / max(float(np.mean(pooled_null)), 1e-10):.2f}x"
|
f" Lift vs pooled intra mean: {inter_class_avg / max(float(np.mean(pooled_null)), 1e-10):.2f}x"
|
||||||
)
|
)
|
||||||
print(f" Empirical p-value (inter > intra): {p_empirical:.4f}")
|
print(f" Empirical p-value (inter > intra): {p_empirical:.4f}")
|
||||||
|
|
||||||
|
# per-session divergence scores: delta_H - delta_A per session (positive means closer to agent behavior)
|
||||||
|
from scipy.stats import mannwhitneyu
|
||||||
|
|
||||||
|
human_dH = per_session_divergence(
|
||||||
|
human_model, human_evt
|
||||||
|
) # human session vs human centroid
|
||||||
|
human_dA = per_session_divergence(
|
||||||
|
human_model, agent_evt
|
||||||
|
) # human session vs agent centroid
|
||||||
|
agent_dH = per_session_divergence(
|
||||||
|
agent_model, human_evt
|
||||||
|
) # agent session vs human centroid
|
||||||
|
agent_dA = per_session_divergence(
|
||||||
|
agent_model, agent_evt
|
||||||
|
) # agent session vs agent centroid
|
||||||
|
# score = delta_H - delta_A: high means far from humans, close to agents
|
||||||
|
n_h = min(len(human_dH), len(human_dA))
|
||||||
|
n_a = min(len(agent_dH), len(agent_dA))
|
||||||
|
human_diff = [human_dH[i] - human_dA[i] for i in range(n_h)]
|
||||||
|
agent_diff = [agent_dH[i] - agent_dA[i] for i in range(n_a)]
|
||||||
|
print(f"\nPer-session divergence gap (delta_H - delta_A):")
|
||||||
|
print(
|
||||||
|
f" Human sessions (n={n_h}): mean={np.mean(human_diff):.4f}, std={np.std(human_diff):.4f}"
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
f" Agent sessions (n={n_a}): mean={np.mean(agent_diff):.4f}, std={np.std(agent_diff):.4f}"
|
||||||
|
)
|
||||||
|
if n_h >= 2 and n_a >= 2:
|
||||||
|
U, mw_p = mannwhitneyu(human_diff, agent_diff, alternative="two-sided")
|
||||||
|
print(f" Mann-Whitney U={U:.1f}, p={mw_p:.4f}")
|
||||||
|
else:
|
||||||
|
print(" Insufficient sessions for Mann-Whitney test")
|
||||||
|
|||||||
Reference in New Issue
Block a user