This commit is contained in:
2026-02-05 12:28:26 +01:00
parent c4d82b2ecc
commit ebd2378859

View File

@@ -1,5 +1,8 @@
\section{Methodology}
% Extra notes and clarifications: we observed some humans and get their transition probabilities between event types
% We modify behavioral profiles of transition matrices with price elasticity matrices generated by sample valuations of a distributing.
This section details the theoretical and practical framework developed to address dynamic pricing under the influence of non-human actors. We begin by formalizing the problem environment and the nature of the actors. We then derive the \textit{Cost of Information} (COI) theorem, proving the erosion of pricing power in the limit of agent saturation. Following this, we outline our generative contamination strategy using GOFAI-driven separability and transition probability learning. Finally, we formulate the robust control problem as a Stackelberg game solved via Distributionally Robust Reinforcement Learning (DR-RL) with constructed ambiguity sets.
\subsection{Problem Formalization}
@@ -36,15 +39,17 @@ where $\alpha \in [0, 1]$ represents the contamination parameter (proportion of
\subsection{Cost of Information (COI) Framework}
The \textit{Cost of Information} (COI) represents the markup a pricing policy $\pi$ attempts to extract from the market by leveraging demand signals. We define COI as the expected premium over the minimum viable price $\underline{p}$ (or marginal cost). This also speaks to the financial urgency as a consequence of information asymmetry between the platform and the actors.
The platform's pricing power comes from information asymmetry: users who express strong interest signals pay more than the base price. We quantify this markup as the \textit{Cost of Information} (COI), which represents the average premium extracted above marginal cost. COI measures the revenue at risk when information asymmetry collapses.
\begin{definition}[Cost of Information]
Let $\pi(\tau)$ be a pricing policy mapping interaction histories to prices. The COI is defined as:
\begin{align}
\text{COI} &= \mathbb{E}[P] - \underline{p} \\
&= \int_{\underline{p}}^{\bar{p}} (1 - F_\pi(p)) \, dp
\end{align}
where $F_\pi(p)$ is the cumulative distribution function of prices generated by $\pi$ under standard operating conditions.
\begin{equation}
\text{COI} = \mathbb{E}[P] - \underline{p}
\end{equation}
where $\mathbb{E}[P]$ is the expected price charged by the policy and $\underline{p}$ is the minimum viable price (marginal cost).
% Alternative survival function representation (used in proof):
% COI = \int_{\underline{p}}^{\bar{p}} (1 - F_\pi(p)) \, dp
% where F_\pi(p) is the CDF of prices generated by \pi
\end{definition}
\begin{figure}[ht]
@@ -86,37 +91,24 @@ Let $N$ be the number of independent, utility-maximizing agents querying the pla
\end{theorem}
\begin{proof}
Let $p_1, \ldots, p_N$ be independent and identically distributed (i.i.d.) price samples drawn from the policy's distribution $F(p)$ with support $[\underline{p}, \bar{p}]$. The realizable price for an optimal searching agent is the first order statistic $p_{(1)} = \min(p_1, \ldots, p_N)$.
Consider $N$ independent agents querying the platform, each receiving a price sample $p_i$ drawn from the pricing policy's distribution $F(p)$ with support $[\underline{p}, \bar{p}]$. A strategic agent conducting reconnaissance will select the minimum observed price: $p_{(1)} = \min(p_1, \ldots, p_N)$.
The survival function (or reliability function) of the minimum price is given by:
The probability that the minimum price exceeds some threshold $t$ is:
\begin{equation}
S_{p_{(1)}}(t) = P(p_{(1)} > t) = [1 - F(t)]^N
P(p_{(1)} > t) = P(\text{all } p_i > t) = [1 - F(t)]^N
\end{equation}
To determine the expected value $\mathbb{E}[p_{(1)}]$, we recall the property that for any continuous random variable $X$ with support $[A, B]$, the expectation can be expressed as the lower bound plus the integral of the survival function:
For any price $t > \underline{p}$, the CDF satisfies $F(t) > 0$, so $1 - F(t) < 1$. As $N$ grows, this probability decays exponentially: $[1 - F(t)]^N \to 0$.
The expected minimum price can be written as:
\begin{equation}
\mathbb{E}[X] = A + \int_{A}^{B} P(X > t) \, dt
\mathbb{E}[p_{(1)}] = \underline{p} + \int_{\underline{p}}^{\bar{p}} [1 - F(t)]^N \, dt
\end{equation}
Applying this to our pricing statistic where the lower bound is $\underline{p}$:
\begin{align}
\mathbb{E}[p_{(1)}] &= \underline{p} + \int_{\underline{p}}^{\bar{p}} P(p_{(1)} > t) \, dt \\
&= \underline{p} + \int_{\underline{p}}^{\bar{p}} [1 - F(t)]^N \, dt
\end{align}
Since $F(t)$ is a valid CDF, for any $t > \underline{p}$, we have strict inequality $F(t) > 0$, implying $0 \le 1 - F(t) < 1$. By the properties of limits, as $N \to \infty$, the term $[1 - F(t)]^N$ converges to 0 pointwise for all $t > \underline{p}$.
Applying the Lebesgue Dominated Convergence Theorem (noting that the integrand is bounded by 1 on the finite interval $[\underline{p}, \bar{p}]$):
Since the integrand vanishes as $N \to \infty$ for all $t > \underline{p}$, the integral converges to zero. Therefore:
\begin{equation}
\lim_{N \to \infty} \int_{\underline{p}}^{\bar{p}} [1 - F(t)]^N \, dt = \int_{\underline{p}}^{\bar{p}} 0 \, dt = 0
\lim_{N \to \infty} \text{COI} = \lim_{N \to \infty} (\mathbb{E}[p_{(1)}] - \underline{p}) = 0
\end{equation}
Substituting this back into the expression for COI:
\begin{align}
\lim_{N \to \infty} \text{COI} &= \lim_{N \to \infty} (\mathbb{E}[p_{(1)}] - \underline{p}) \\
&= \lim_{N \to \infty} \left( (\underline{p} + 0) - \underline{p} \right) \\
&= 0
\end{align}
\end{proof}