{
"cells": [
{
"cell_type": "markdown",
"id": "6d87eac9",
"metadata": {},
"source": [
"[download source](./plot.ipynb)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "007140c0-12d7-42d0-8b84-9275f7b96057",
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import matplotlib.ticker\n",
"import numpy as np\n",
"from scipy.stats import norm, lognorm\n",
"import math\n",
"import contextlib"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "11beb378-a796-42e4-b31e-d5f93dec5b70",
"metadata": {},
"outputs": [],
"source": [
"%config InlineBackend.figure_formats = ['svg']\n",
"matplotlib.rcParams['svg.hashsalt'] = '12345'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "0caf2e2c-0bb1-418c-8b3d-a173c86653b1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Median 60000.0\n",
"Top 1% 614428.4193787279\n",
"98923.27624200769 129671.84495370525\n",
"(array(98923.27624201), array(1.68147874e+10))\n",
"Norm top 1% 400585.0970730134\n"
]
}
],
"source": [
"# distribution chosen to be roughly similar to US household income:\n",
"# https://dqydj.com/average-median-top-household-income-percentiles/\n",
"ln_params = dict(loc=0, scale=60000, s=1)\n",
"print(\"Median\", lognorm.ppf(0.5, **ln_params))\n",
"print(\"Top 1%\", lognorm.ppf(0.99, **ln_params))\n",
"[mean, var] = lognorm.stats(**ln_params, moments='mv')\n",
"print(mean, math.sqrt(var))\n",
"n_params = dict(loc=mean, scale=np.sqrt(var))\n",
"print(norm.stats(**n_params, moments='mv'))\n",
"print(\"Norm top 1%\", norm.ppf(0.99, **n_params))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a00ac6f9-0a59-476f-a4c0-ff64b05e69a9",
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"cm = plt.xkcd()\n",
"fmt = matplotlib.ticker.FuncFormatter(\n",
" lambda x, pos: f\"{int(x/1000):,}k\"\n",
")\n",
"with cm:\n",
" fig, ax = plt.subplots(figsize=(8, 3.6))\n",
" x = np.linspace(0, 700_000, 1001)\n",
" y_ln = lognorm.pdf(x, **ln_params)\n",
" y_n = norm.pdf(x, **n_params)\n",
" ax.plot(x, y_ln, label='Heavy-tailed (log-normal, µ=100k, σ=130k)')\n",
" ax.plot(x, y_n, label='Light-tailed (normal, µ=100k, σ=130k)')\n",
" ax.set_yticks([])\n",
" ax.set_ylabel(\"Probability density\")\n",
" ax.set_xlabel(\"Income\")\n",
" ax.xaxis.set_major_formatter(fmt)\n",
" \n",
" # inset axes....\n",
" axins = ax.inset_axes([0.5, 0.3, 0.4, 0.4])\n",
" axins.plot(x, y_ln)\n",
" axins.plot(x, y_n)\n",
" # sub region of the original image\n",
" x1, x2, y1, y2 = 500_000, 700_000, -1e-9, 1e-7\n",
" axins.set_xlim(x1, x2)\n",
" axins.set_ylim(y1, y2)\n",
" axins.set_xticks([x1, (x1+x2)/2, x2])\n",
" axins.set_yticks([0])\n",
" axins.xaxis.set_major_formatter(fmt)\n",
"\n",
" ax.indicate_inset_zoom(axins)\n",
" ax.legend()\n",
" plt.savefig(\"./plot.svg\", metadata={\"Date\": None}, bbox_inches='tight')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "478050a4",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}