|
138 | 138 | "cell_type": "markdown", |
139 | 139 | "metadata": {}, |
140 | 140 | "source": [ |
141 | | - "### Load your data\n", |
142 | | - "\n", |
143 | | - "Use the `#!value` and `#!share` magic commands to fetch the data from GitHub, store it in the `taxi_data` variable and load it into a `DataFrame` " |
| 141 | + "### Download or Locate Data\n", |
| 142 | + "The following code tries to locate the data file in a few known locations or it will download it from the known GitHub location." |
144 | 143 | ] |
145 | 144 | }, |
146 | 145 | { |
|
152 | 151 | } |
153 | 152 | }, |
154 | 153 | "source": [ |
155 | | - "#!value --name taxi_data --from-url https://github.com/dotnet/csharp-notebooks/raw/main/machine-learning/data/taxi-fare.csv" |
| 154 | + "using System;\n", |
| 155 | + "using System.IO;\n", |
| 156 | + "using System.Net;\n", |
| 157 | + "\n", |
| 158 | + "string EnsureDataSetDownloaded(string fileName)\n", |
| 159 | + "{\n", |
| 160 | + "\n", |
| 161 | + "\t// This is the path if the repo has been checked out.\n", |
| 162 | + "\tvar filePath = Path.Combine(Directory.GetCurrentDirectory(),\"data\", fileName);\n", |
| 163 | + "\n", |
| 164 | + "\tif (!File.Exists(filePath))\n", |
| 165 | + "\t{\n", |
| 166 | + "\t\t// This is the path if the file has already been downloaded.\n", |
| 167 | + "\t\tfilePath = Path.Combine(Directory.GetCurrentDirectory(), fileName);\n", |
| 168 | + "\t}\n", |
| 169 | + "\n", |
| 170 | + "\tif (!File.Exists(filePath))\n", |
| 171 | + "\t{\n", |
| 172 | + "\t\tusing (var client = new WebClient())\n", |
| 173 | + "\t\t{\n", |
| 174 | + "\t\t\tclient.DownloadFile($\"https://raw.githubusercontent.com/dotnet/csharp-notebooks/main/machine-learning/data/{fileName}\", filePath);\n", |
| 175 | + "\t\t}\n", |
| 176 | + "\t\tConsole.WriteLine($\"Downloaded {fileName} to : {filePath}\");\n", |
| 177 | + "\t}\n", |
| 178 | + "\telse\n", |
| 179 | + "\t{\n", |
| 180 | + "\t\tConsole.WriteLine($\"{fileName} found here: {filePath}\");\n", |
| 181 | + "\t}\n", |
| 182 | + "\n", |
| 183 | + "\treturn filePath;\n", |
| 184 | + "}" |
156 | 185 | ], |
157 | | - "outputs": [] |
158 | | - }, |
159 | | - { |
160 | | - "cell_type": "code", |
161 | | - "execution_count": 1, |
162 | | - "metadata": { |
163 | | - "dotnet_interactive": { |
164 | | - "language": "csharp" |
| 186 | + "outputs": [ |
| 187 | + { |
| 188 | + "output_type": "execute_result", |
| 189 | + "data": { |
| 190 | + "text/plain": "Train Data Path: C:\\dev\\csharp-notebooks\\machine-learning\\data\\taxi-fare.csv\r\n" |
| 191 | + }, |
| 192 | + "execution_count": 1, |
| 193 | + "metadata": {} |
165 | 194 | } |
166 | | - }, |
167 | | - "source": [ |
168 | | - "#!share taxi_data --from value" |
169 | | - ], |
170 | | - "outputs": [] |
| 195 | + ] |
171 | 196 | }, |
172 | 197 | { |
173 | 198 | "cell_type": "code", |
|
178 | 203 | } |
179 | 204 | }, |
180 | 205 | "source": [ |
181 | | - "var df = DataFrame.LoadCsvFromString(taxi_data);" |
| 206 | + "var trainDataPath = EnsureDataSetDownloaded(\"taxi-fare.csv\");\n", |
| 207 | + "var df = DataFrame.LoadCsv(trainDataPath);" |
182 | 208 | ], |
183 | 209 | "outputs": [] |
184 | 210 | }, |
|
204 | 230 | { |
205 | 231 | "output_type": "execute_result", |
206 | 232 | "data": { |
207 | | - "text/html": "<table id=\"table_637927149236214714\"><thead><tr><th><i>index</i></th><th>vendor_id</th><th>rate_code</th><th>passenger_count</th><th>trip_time_in_secs</th><th>trip_distance</th><th>payment_type</th><th>fare_amount</th></tr></thead><tbody><tr><td><i><div class=\"dni-plaintext\">0</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1271</div></td><td><div class=\"dni-plaintext\">3.8</div></td><td>CRD</td><td><div class=\"dni-plaintext\">17.5</div></td></tr><tr><td><i><div class=\"dni-plaintext\">1</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">474</div></td><td><div class=\"dni-plaintext\">1.5</div></td><td>CRD</td><td><div class=\"dni-plaintext\">8</div></td></tr><tr><td><i><div class=\"dni-plaintext\">2</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">637</div></td><td><div class=\"dni-plaintext\">1.4</div></td><td>CRD</td><td><div class=\"dni-plaintext\">8.5</div></td></tr><tr><td><i><div class=\"dni-plaintext\">3</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">181</div></td><td><div class=\"dni-plaintext\">0.6</div></td><td>CSH</td><td><div class=\"dni-plaintext\">4.5</div></td></tr><tr><td><i><div class=\"dni-plaintext\">4</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">661</div></td><td><div class=\"dni-plaintext\">1.1</div></td><td>CRD</td><td><div class=\"dni-plaintext\">8.5</div></td></tr></tbody></table>" |
| 233 | + "text/html": "<table id=\"table_637934937843853168\"><thead><tr><th><i>index</i></th><th>vendor_id</th><th>rate_code</th><th>passenger_count</th><th>trip_time_in_secs</th><th>trip_distance</th><th>payment_type</th><th>fare_amount</th></tr></thead><tbody><tr><td><i><div class=\"dni-plaintext\">0</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1271</div></td><td><div class=\"dni-plaintext\">3.8</div></td><td>CRD</td><td><div class=\"dni-plaintext\">17.5</div></td></tr><tr><td><i><div class=\"dni-plaintext\">1</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">474</div></td><td><div class=\"dni-plaintext\">1.5</div></td><td>CRD</td><td><div class=\"dni-plaintext\">8</div></td></tr><tr><td><i><div class=\"dni-plaintext\">2</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">637</div></td><td><div class=\"dni-plaintext\">1.4</div></td><td>CRD</td><td><div class=\"dni-plaintext\">8.5</div></td></tr><tr><td><i><div class=\"dni-plaintext\">3</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">181</div></td><td><div class=\"dni-plaintext\">0.6</div></td><td>CSH</td><td><div class=\"dni-plaintext\">4.5</div></td></tr><tr><td><i><div class=\"dni-plaintext\">4</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">661</div></td><td><div class=\"dni-plaintext\">1.1</div></td><td>CRD</td><td><div class=\"dni-plaintext\">8.5</div></td></tr></tbody></table>" |
208 | 234 | }, |
209 | 235 | "execution_count": 1, |
210 | 236 | "metadata": {} |
|
367 | 393 | } |
368 | 394 | }, |
369 | 395 | "source": [ |
370 | | - "var result = await experiment.Run();" |
| 396 | + "var result = await experiment.RunAsync();" |
371 | 397 | ], |
372 | | - "outputs": [ |
373 | | - { |
374 | | - "output_type": "error", |
375 | | - "ename": "", |
376 | | - "evalue": "(1,14): error CS1061: 'TrialResult' does not contain a definition for 'GetAwaiter' and no accessible extension method 'GetAwaiter' accepting a first argument of type 'TrialResult' could be found (are you missing a using directive or an assembly reference?)", |
377 | | - "traceback": [ |
378 | | - null |
379 | | - ] |
380 | | - } |
381 | | - ] |
| 398 | + "outputs": [] |
382 | 399 | }, |
383 | 400 | { |
384 | 401 | "cell_type": "markdown", |
|
400 | 417 | ], |
401 | 418 | "outputs": [ |
402 | 419 | { |
403 | | - "output_type": "error", |
404 | | - "ename": "", |
405 | | - "evalue": "(1,15): error CS0103: The name 'result' does not exist in the current context", |
406 | | - "traceback": [ |
407 | | - null |
408 | | - ] |
| 420 | + "output_type": "execute_result", |
| 421 | + "data": { |
| 422 | + "text/plain": "R-Squared: 0.9329530384286037" |
| 423 | + }, |
| 424 | + "execution_count": 1, |
| 425 | + "metadata": {} |
409 | 426 | } |
410 | 427 | ] |
411 | 428 | }, |
|
430 | 447 | "ITransformer bestModel = result.Model;\n", |
431 | 448 | "var predictions = bestModel.Transform(testSet);" |
432 | 449 | ], |
433 | | - "outputs": [ |
434 | | - { |
435 | | - "output_type": "error", |
436 | | - "ename": "", |
437 | | - "evalue": "(1,26): error CS0103: The name 'result' does not exist in the current context", |
438 | | - "traceback": [ |
439 | | - null |
440 | | - ] |
441 | | - } |
442 | | - ] |
| 450 | + "outputs": [] |
443 | 451 | }, |
444 | 452 | { |
445 | 453 | "cell_type": "markdown", |
|
469 | 477 | ], |
470 | 478 | "outputs": [ |
471 | 479 | { |
472 | | - "output_type": "error", |
473 | | - "ename": "", |
474 | | - "evalue": "(1,14): error CS0103: The name 'predictions' does not exist in the current context\r\n(2,17): error CS0103: The name 'predictions' does not exist in the current context", |
475 | | - "traceback": [ |
476 | | - null |
477 | | - ] |
| 480 | + "output_type": "execute_result", |
| 481 | + "data": { |
| 482 | + "text/html": "<table><thead><tr><th><i>index</i></th><th>Actual</th><th>Predicted</th><th>Difference</th></tr></thead><tbody><tr><td>0</td><td><div class=\"dni-plaintext\">24.5</div></td><td><div class=\"dni-plaintext\">23.087162</div></td><td><div class=\"dni-plaintext\">1.412838</div></td></tr><tr><td>1</td><td><div class=\"dni-plaintext\">9.5</div></td><td><div class=\"dni-plaintext\">8.993666</div></td><td><div class=\"dni-plaintext\">0.5063343</div></td></tr><tr><td>2</td><td><div class=\"dni-plaintext\">4.5</div></td><td><div class=\"dni-plaintext\">4.808011</div></td><td><div class=\"dni-plaintext\">-0.30801105</div></td></tr><tr><td>3</td><td><div class=\"dni-plaintext\">8</div></td><td><div class=\"dni-plaintext\">7.994398</div></td><td><div class=\"dni-plaintext\">0.005601883</div></td></tr><tr><td>4</td><td><div class=\"dni-plaintext\">52</div></td><td><div class=\"dni-plaintext\">52.039684</div></td><td><div class=\"dni-plaintext\">-0.039684296</div></td></tr></tbody></table>" |
| 483 | + }, |
| 484 | + "execution_count": 1, |
| 485 | + "metadata": {} |
478 | 486 | } |
479 | 487 | ] |
480 | 488 | }, |
|
487 | 495 | "With ML.NET, you don't have to manually calculate the evaluation metrics for your models. ML.NET provides a built-in `Evaluate` method for each of the machine learning tasks it supports. Use the `Evaluate` method for the regression task to calculate the evaluation metrics for the test set where the `fare_amount` column is the actual value and the `Score` column is the predicted value." |
488 | 496 | ] |
489 | 497 | }, |
490 | | - { |
491 | | - "cell_type": "code", |
492 | | - "execution_count": 1, |
493 | | - "metadata": { |
494 | | - "dotnet_interactive": { |
495 | | - "language": "csharp" |
496 | | - } |
497 | | - }, |
498 | | - "source": [ |
499 | | - "" |
500 | | - ], |
501 | | - "outputs": [] |
502 | | - }, |
503 | 498 | { |
504 | 499 | "cell_type": "code", |
505 | 500 | "execution_count": 1, |
|
511 | 506 | "source": [ |
512 | 507 | "var evaluationMetrics = mlContext.Regression.Evaluate(predictions,\"fare_amount\", \"Score\");" |
513 | 508 | ], |
514 | | - "outputs": [ |
515 | | - { |
516 | | - "output_type": "error", |
517 | | - "ename": "", |
518 | | - "evalue": "(1,55): error CS0103: The name 'predictions' does not exist in the current context", |
519 | | - "traceback": [ |
520 | | - null |
521 | | - ] |
522 | | - } |
523 | | - ] |
| 509 | + "outputs": [] |
524 | 510 | }, |
525 | 511 | { |
526 | 512 | "cell_type": "markdown", |
|
542 | 528 | ], |
543 | 529 | "outputs": [ |
544 | 530 | { |
545 | | - "output_type": "error", |
546 | | - "ename": "", |
547 | | - "evalue": "(1,1): error CS0103: The name 'evaluationMetrics' does not exist in the current context", |
548 | | - "traceback": [ |
549 | | - null |
550 | | - ] |
| 531 | + "output_type": "execute_result", |
| 532 | + "data": { |
| 533 | + "text/html": "<table><thead><tr><th>MeanAbsoluteError</th><th>MeanSquaredError</th><th>RootMeanSquaredError</th><th>LossFunction</th><th>RSquared</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">0.6107270253580241</div></td><td><div class=\"dni-plaintext\">6.673910566709432</div></td><td><div class=\"dni-plaintext\">2.58339129183123</div></td><td><div class=\"dni-plaintext\">6.673910534194763</div></td><td><div class=\"dni-plaintext\">0.9277130209892651</div></td></tr></tbody></table>" |
| 534 | + }, |
| 535 | + "execution_count": 1, |
| 536 | + "metadata": {} |
551 | 537 | } |
552 | 538 | ] |
553 | 539 | }, |
|
961 | 947 | { |
962 | 948 | "output_type": "execute_result", |
963 | 949 | "data": { |
964 | | - "text/html": "<table><thead><tr><th><i>index</i></th><th>Key</th><th>Value</th></tr></thead><tbody><tr><td>0</td><td>vendor_id.Bit2</td><td><div class=\"dni-plaintext\">-0.5100836529698106</div></td></tr><tr><td>1</td><td>vendor_id.Bit1</td><td><div class=\"dni-plaintext\">-0.2093168050110983</div></td></tr><tr><td>2</td><td>vendor_id.Bit0</td><td><div class=\"dni-plaintext\">-0.20509582275834345</div></td></tr><tr><td>3</td><td>payment_type.Bit3</td><td><div class=\"dni-plaintext\">-0.0014286018096295965</div></td></tr><tr><td>4</td><td>payment_type.Bit2</td><td><div class=\"dni-plaintext\">-0.0005391273248645708</div></td></tr><tr><td>5</td><td>payment_type.Bit1</td><td><div class=\"dni-plaintext\">-0.0001564149663926632</div></td></tr><tr><td>6</td><td>payment_type.Bit0</td><td><div class=\"dni-plaintext\">-7.393853265310242E-05</div></td></tr><tr><td>7</td><td>rate_code</td><td><div class=\"dni-plaintext\">-5.132793047627852E-07</div></td></tr><tr><td>8</td><td>passenger_count</td><td><div class=\"dni-plaintext\">0</div></td></tr><tr><td>9</td><td>trip_time_in_secs</td><td><div class=\"dni-plaintext\">0</div></td></tr><tr><td>10</td><td>trip_distance</td><td><div class=\"dni-plaintext\">0</div></td></tr></tbody></table>" |
| 950 | + "text/html": "<table><thead><tr><th><i>index</i></th><th>Key</th><th>Value</th></tr></thead><tbody><tr><td>0</td><td>vendor_id.Bit2</td><td><div class=\"dni-plaintext\">-0.5103167076996584</div></td></tr><tr><td>1</td><td>vendor_id.Bit1</td><td><div class=\"dni-plaintext\">-0.20920573710951015</div></td></tr><tr><td>2</td><td>vendor_id.Bit0</td><td><div class=\"dni-plaintext\">-0.20524726245559932</div></td></tr><tr><td>3</td><td>payment_type.Bit3</td><td><div class=\"dni-plaintext\">-0.0013735609832817113</div></td></tr><tr><td>4</td><td>payment_type.Bit2</td><td><div class=\"dni-plaintext\">-0.0005371983420188927</div></td></tr><tr><td>5</td><td>payment_type.Bit1</td><td><div class=\"dni-plaintext\">-0.00015402329111213753</div></td></tr><tr><td>6</td><td>payment_type.Bit0</td><td><div class=\"dni-plaintext\">-7.256291485776185E-05</div></td></tr><tr><td>7</td><td>rate_code</td><td><div class=\"dni-plaintext\">-5.605438192501921E-07</div></td></tr><tr><td>8</td><td>passenger_count</td><td><div class=\"dni-plaintext\">0</div></td></tr><tr><td>9</td><td>trip_time_in_secs</td><td><div class=\"dni-plaintext\">0</div></td></tr><tr><td>10</td><td>trip_distance</td><td><div class=\"dni-plaintext\">0</div></td></tr></tbody></table>" |
965 | 951 | }, |
966 | 952 | "execution_count": 1, |
967 | 953 | "metadata": {} |
|
0 commit comments