Skip to content

Commit 52c2c3d

Browse files
committed
Adding River
1 parent 57f0244 commit 52c2c3d

13 files changed

+2935
-0
lines changed
Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "c9d42a09",
7+
"metadata": {},
8+
"outputs": [
9+
{
10+
"name": "stdout",
11+
"output_type": "stream",
12+
"text": [
13+
"Downloading https://maxhalford.github.io/files/datasets/toulouse_bikes.zip (1.12 MB)\n",
14+
"Uncompressing into /home/jbris/river_data/Bikes\n",
15+
"{'clouds': 75,\n",
16+
" 'description': 'light rain',\n",
17+
" 'humidity': 81,\n",
18+
" 'moment': datetime.datetime(2016, 4, 1, 0, 0, 7),\n",
19+
" 'pressure': 1017.0,\n",
20+
" 'station': 'metro-canal-du-midi',\n",
21+
" 'temperature': 6.54,\n",
22+
" 'wind': 9.3}\n",
23+
"Number of available bikes: 1\n",
24+
"[20,000] MAE: 4.912763\n",
25+
"[40,000] MAE: 5.333578\n",
26+
"[60,000] MAE: 5.330969\n",
27+
"[80,000] MAE: 5.392334\n",
28+
"[100,000] MAE: 5.423078\n",
29+
"[120,000] MAE: 5.541239\n",
30+
"[140,000] MAE: 5.613038\n",
31+
"[160,000] MAE: 5.622441\n",
32+
"[180,000] MAE: 5.567836\n",
33+
"[182,470] MAE: 5.563905\n"
34+
]
35+
},
36+
{
37+
"data": {
38+
"text/plain": [
39+
"MAE: 5.563905"
40+
]
41+
},
42+
"execution_count": 1,
43+
"metadata": {},
44+
"output_type": "execute_result"
45+
}
46+
],
47+
"source": [
48+
"from pprint import pprint\n",
49+
"from river import datasets\n",
50+
"\n",
51+
"dataset = datasets.Bikes()\n",
52+
"\n",
53+
"for x, y in dataset:\n",
54+
" pprint(x)\n",
55+
" print(f'Number of available bikes: {y}')\n",
56+
" break\n",
57+
" \n",
58+
"from river import compose\n",
59+
"from river import linear_model\n",
60+
"from river import metrics\n",
61+
"from river import evaluate\n",
62+
"from river import preprocessing\n",
63+
"from river import optim\n",
64+
"\n",
65+
"model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')\n",
66+
"model |= preprocessing.StandardScaler()\n",
67+
"model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))\n",
68+
"\n",
69+
"metric = metrics.MAE()\n",
70+
"\n",
71+
"evaluate.progressive_val_score(dataset, model, metric, print_every=20_000)"
72+
]
73+
},
74+
{
75+
"cell_type": "code",
76+
"execution_count": 2,
77+
"id": "93b94267",
78+
"metadata": {},
79+
"outputs": [
80+
{
81+
"name": "stdout",
82+
"output_type": "stream",
83+
"text": [
84+
"[20,000] MAE: 3.720766\n",
85+
"[40,000] MAE: 3.829739\n",
86+
"[60,000] MAE: 3.844905\n",
87+
"[80,000] MAE: 3.910137\n",
88+
"[100,000] MAE: 3.888553\n",
89+
"[120,000] MAE: 3.923644\n",
90+
"[140,000] MAE: 3.980882\n",
91+
"[160,000] MAE: 3.949972\n",
92+
"[180,000] MAE: 3.934489\n",
93+
"[182,470] MAE: 3.933442\n"
94+
]
95+
},
96+
{
97+
"data": {
98+
"text/plain": [
99+
"MAE: 3.933442"
100+
]
101+
},
102+
"execution_count": 2,
103+
"metadata": {},
104+
"output_type": "execute_result"
105+
}
106+
],
107+
"source": [
108+
"from river import feature_extraction\n",
109+
"from river import stats\n",
110+
"\n",
111+
"def get_hour(x):\n",
112+
" x['hour'] = x['moment'].hour\n",
113+
" return x\n",
114+
"\n",
115+
"model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')\n",
116+
"model += (\n",
117+
" get_hour |\n",
118+
" feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())\n",
119+
")\n",
120+
"model |= preprocessing.StandardScaler()\n",
121+
"model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))\n",
122+
"\n",
123+
"metric = metrics.MAE()\n",
124+
"\n",
125+
"evaluate.progressive_val_score(dataset, model, metric, print_every=20_000)"
126+
]
127+
},
128+
{
129+
"cell_type": "code",
130+
"execution_count": 3,
131+
"id": "aa7a091c",
132+
"metadata": {},
133+
"outputs": [
134+
{
135+
"name": "stdout",
136+
"output_type": "stream",
137+
"text": [
138+
"0. Input\n",
139+
"--------\n",
140+
"clouds: 75 (int)\n",
141+
"description: light rain (str)\n",
142+
"humidity: 81 (int)\n",
143+
"moment: 2016-04-01 00:00:07 (datetime)\n",
144+
"pressure: 1,017.00000 (float)\n",
145+
"station: metro-canal-du-midi (str)\n",
146+
"temperature: 6.54000 (float)\n",
147+
"wind: 9.30000 (float)\n",
148+
"\n",
149+
"1. Transformer union\n",
150+
"--------------------\n",
151+
" 1.0 Select\n",
152+
" ----------\n",
153+
" clouds: 75 (int)\n",
154+
" humidity: 81 (int)\n",
155+
" pressure: 1,017.00000 (float)\n",
156+
" temperature: 6.54000 (float)\n",
157+
" wind: 9.30000 (float)\n",
158+
"\n",
159+
" 1.1 get_hour | y_mean_by_station_and_hour\n",
160+
" -----------------------------------------\n",
161+
" y_mean_by_station_and_hour: 4.43243 (float)\n",
162+
"\n",
163+
"clouds: 75 (int)\n",
164+
"humidity: 81 (int)\n",
165+
"pressure: 1,017.00000 (float)\n",
166+
"temperature: 6.54000 (float)\n",
167+
"wind: 9.30000 (float)\n",
168+
"y_mean_by_station_and_hour: 4.43243 (float)\n",
169+
"\n",
170+
"2. StandardScaler\n",
171+
"-----------------\n",
172+
"clouds: 0.47566 (float)\n",
173+
"humidity: 0.42247 (float)\n",
174+
"pressure: 1.05314 (float)\n",
175+
"temperature: -1.22098 (float)\n",
176+
"wind: 2.21104 (float)\n",
177+
"y_mean_by_station_and_hour: -0.59098 (float)\n",
178+
"\n",
179+
"3. LinearRegression\n",
180+
"-------------------\n",
181+
"Name Value Weight Contribution \n",
182+
" Intercept 1.00000 6.58252 6.58252 \n",
183+
" pressure 1.05314 3.78529 3.98646 \n",
184+
" humidity 0.42247 1.44921 0.61225 \n",
185+
"y_mean_by_station_and_hour -0.59098 0.54167 -0.32011 \n",
186+
" clouds 0.47566 -1.92255 -0.91448 \n",
187+
" wind 2.21104 -0.77720 -1.71843 \n",
188+
" temperature -1.22098 2.47030 -3.01619 \n",
189+
"\n",
190+
"Prediction: 5.21201\n"
191+
]
192+
}
193+
],
194+
"source": [
195+
"import itertools\n",
196+
"\n",
197+
"model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')\n",
198+
"model += (\n",
199+
" get_hour |\n",
200+
" feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())\n",
201+
")\n",
202+
"model |= preprocessing.StandardScaler()\n",
203+
"model |= linear_model.LinearRegression()\n",
204+
"\n",
205+
"for x, y in itertools.islice(dataset, 10000):\n",
206+
" y_pred = model.predict_one(x)\n",
207+
" model.learn_one(x, y)\n",
208+
"\n",
209+
"x, y = next(iter(dataset))\n",
210+
"print(model.debug_one(x))"
211+
]
212+
},
213+
{
214+
"cell_type": "code",
215+
"execution_count": 4,
216+
"id": "a06bc18b",
217+
"metadata": {},
218+
"outputs": [
219+
{
220+
"name": "stdout",
221+
"output_type": "stream",
222+
"text": [
223+
"[20,000] MAE: 20.198137\n",
224+
"[40,000] MAE: 12.199763\n",
225+
"[60,000] MAE: 9.468279\n",
226+
"[80,000] MAE: 8.126625\n",
227+
"[100,000] MAE: 7.273133\n",
228+
"[120,000] MAE: 6.735469\n",
229+
"[140,000] MAE: 6.376704\n",
230+
"[160,000] MAE: 6.06156\n",
231+
"[180,000] MAE: 5.806744\n",
232+
"[182,470] MAE: 5.780772\n"
233+
]
234+
},
235+
{
236+
"data": {
237+
"text/plain": [
238+
"MAE: 5.780772"
239+
]
240+
},
241+
"execution_count": 4,
242+
"metadata": {},
243+
"output_type": "execute_result"
244+
}
245+
],
246+
"source": [
247+
"import datetime as dt\n",
248+
"\n",
249+
"evaluate.progressive_val_score(\n",
250+
" dataset=dataset,\n",
251+
" model=model.clone(),\n",
252+
" metric=metrics.MAE(),\n",
253+
" moment='moment',\n",
254+
" delay=dt.timedelta(minutes=30),\n",
255+
" print_every=20_000\n",
256+
")"
257+
]
258+
},
259+
{
260+
"cell_type": "code",
261+
"execution_count": null,
262+
"id": "9bbc8b4e",
263+
"metadata": {},
264+
"outputs": [],
265+
"source": []
266+
}
267+
],
268+
"metadata": {
269+
"kernelspec": {
270+
"display_name": "Python 3 (ipykernel)",
271+
"language": "python",
272+
"name": "python3"
273+
},
274+
"language_info": {
275+
"codemirror_mode": {
276+
"name": "ipython",
277+
"version": 3
278+
},
279+
"file_extension": ".py",
280+
"mimetype": "text/x-python",
281+
"name": "python",
282+
"nbconvert_exporter": "python",
283+
"pygments_lexer": "ipython3",
284+
"version": "3.10.13"
285+
}
286+
},
287+
"nbformat": 4,
288+
"nbformat_minor": 5
289+
}

0 commit comments

Comments
 (0)