Skip to content

Commit 0aabcf8

Browse files
Merge branch 'main' into u/xiaoyun/searchSpace
2 parents 9bee2da + 8dcc586 commit 0aabcf8

14 files changed

+2046
-1482
lines changed

machine-learning/01-Intro to Machine Learning.ipynb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,13 +89,14 @@
8989
}
9090
},
9191
"source": [
92+
"#i \"nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/MachineLearning/nuget/v3/index.json\"\n",
9293
"#r \"nuget: Microsoft.ML, 2.0.0-preview.22356.1\""
9394
],
9495
"outputs": [
9596
{
9697
"output_type": "execute_result",
9798
"data": {
98-
"text/html": "<div><div></div><div></div><div><strong>Installed Packages</strong><ul><li><span>Microsoft.ML, 2.0.0-preview.22313.1</span></li></ul></div></div>"
99+
"text/html": "<div><div></div><div></div><div><strong>Installed Packages</strong><ul><li><span>Microsoft.ML, 2.0.0-preview.22356.1</span></li></ul></div></div>"
99100
},
100101
"execution_count": 1,
101102
"metadata": {}
@@ -327,7 +328,7 @@
327328
{
328329
"output_type": "execute_result",
329330
"data": {
330-
"text/plain": "Coefficient of determination for the trained model: 0.97\r\n"
331+
"text/plain": "Coefficient of determination for the trained model: 0.98\r\n"
331332
},
332333
"execution_count": 1,
333334
"metadata": {}
@@ -378,7 +379,7 @@
378379
{
379380
"output_type": "execute_result",
380381
"data": {
381-
"text/plain": "Predicted price for size: 2500 sq ft= $275.59k\r\n"
382+
"text/plain": "Predicted price for size: 2500 sq ft= $274.48k\r\n"
382383
},
383384
"execution_count": 1,
384385
"metadata": {}

machine-learning/02-Data Preparation and Feature Engineering.ipynb

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
}
5858
},
5959
"source": [
60+
"#i \"nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/MachineLearning/nuget/v3/index.json\"\n",
6061
"#r \"nuget: Microsoft.ML, 2.0.0-preview.22356.1\""
6162
],
6263
"outputs": [
@@ -85,6 +86,57 @@
8586
],
8687
"outputs": []
8788
},
89+
{
90+
"cell_type": "markdown",
91+
"metadata": {},
92+
"source": [
93+
"## Download or Locate Data\n",
94+
"The following code tries to locate the data file in a few known locations or it will download it from the known GitHub location."
95+
]
96+
},
97+
{
98+
"cell_type": "code",
99+
"execution_count": 1,
100+
"metadata": {
101+
"dotnet_interactive": {
102+
"language": "csharp"
103+
}
104+
},
105+
"source": [
106+
"using System;\n",
107+
"using System.IO;\n",
108+
"using System.Net;\n",
109+
"\n",
110+
"string EnsureDataSetDownloaded(string fileName)\n",
111+
"{\n",
112+
"\n",
113+
"\t// This is the path if the repo has been checked out.\n",
114+
"\tvar filePath = Path.Combine(Directory.GetCurrentDirectory(),\"data\", fileName);\n",
115+
"\n",
116+
"\tif (!File.Exists(filePath))\n",
117+
"\t{\n",
118+
"\t\t// This is the path if the file has already been downloaded.\n",
119+
"\t\tfilePath = Path.Combine(Directory.GetCurrentDirectory(), fileName);\n",
120+
"\t}\n",
121+
"\n",
122+
"\tif (!File.Exists(filePath))\n",
123+
"\t{\n",
124+
"\t\tusing (var client = new WebClient())\n",
125+
"\t\t{\n",
126+
"\t\t\tclient.DownloadFile($\"https://raw.githubusercontent.com/dotnet/csharp-notebooks/main/machine-learning/data/{fileName}\", filePath);\n",
127+
"\t\t}\n",
128+
"\t\tConsole.WriteLine($\"Downloaded {fileName} to : {filePath}\");\n",
129+
"\t}\n",
130+
"\telse\n",
131+
"\t{\n",
132+
"\t\tConsole.WriteLine($\"{fileName} found here: {filePath}\");\n",
133+
"\t}\n",
134+
"\n",
135+
"\treturn filePath;\n",
136+
"}"
137+
],
138+
"outputs": []
139+
},
88140
{
89141
"cell_type": "markdown",
90142
"metadata": {},
@@ -187,12 +239,14 @@
187239
}
188240
},
189241
"source": [
242+
"var trainDataPath = EnsureDataSetDownloaded(\"taxi-fare.csv\");\n",
243+
"\n",
190244
"// Create TextLoader based on the Model Input type. \n",
191245
"TextLoader textLoader = mlContext.Data.CreateTextLoader<ModelInput>(separatorChar: ',', hasHeader: true);\n",
192246
"\n",
193247
"// Load the data into an IDataView. Load() method can support multiple files. \n",
194248
"// Files must they have the same separator character, header, column names, etc. \n",
195-
"IDataView data = textLoader.Load(\"data/taxi-fare.csv\");\n",
249+
"IDataView data = textLoader.Load(trainDataPath);\n",
196250
"\n",
197251
"data.Preview(1); "
198252
],

machine-learning/03-Training and AutoML.ipynb

Lines changed: 558 additions & 491 deletions
Large diffs are not rendered by default.

machine-learning/04-Model Evaluation.ipynb

Lines changed: 68 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,8 @@
138138
"cell_type": "markdown",
139139
"metadata": {},
140140
"source": [
141-
"### Load your data\n",
142-
"\n",
143-
"Use the `#!value` and `#!share` magic commands to fetch the data from GitHub, store it in the `taxi_data` variable and load it into a `DataFrame` "
141+
"### Download or Locate Data\n",
142+
"The following code tries to locate the data file in a few known locations or it will download it from the known GitHub location."
144143
]
145144
},
146145
{
@@ -152,22 +151,48 @@
152151
}
153152
},
154153
"source": [
155-
"#!value --name taxi_data --from-url https://github.com/dotnet/csharp-notebooks/raw/main/machine-learning/data/taxi-fare.csv"
154+
"using System;\n",
155+
"using System.IO;\n",
156+
"using System.Net;\n",
157+
"\n",
158+
"string EnsureDataSetDownloaded(string fileName)\n",
159+
"{\n",
160+
"\n",
161+
"\t// This is the path if the repo has been checked out.\n",
162+
"\tvar filePath = Path.Combine(Directory.GetCurrentDirectory(),\"data\", fileName);\n",
163+
"\n",
164+
"\tif (!File.Exists(filePath))\n",
165+
"\t{\n",
166+
"\t\t// This is the path if the file has already been downloaded.\n",
167+
"\t\tfilePath = Path.Combine(Directory.GetCurrentDirectory(), fileName);\n",
168+
"\t}\n",
169+
"\n",
170+
"\tif (!File.Exists(filePath))\n",
171+
"\t{\n",
172+
"\t\tusing (var client = new WebClient())\n",
173+
"\t\t{\n",
174+
"\t\t\tclient.DownloadFile($\"https://raw.githubusercontent.com/dotnet/csharp-notebooks/main/machine-learning/data/{fileName}\", filePath);\n",
175+
"\t\t}\n",
176+
"\t\tConsole.WriteLine($\"Downloaded {fileName} to : {filePath}\");\n",
177+
"\t}\n",
178+
"\telse\n",
179+
"\t{\n",
180+
"\t\tConsole.WriteLine($\"{fileName} found here: {filePath}\");\n",
181+
"\t}\n",
182+
"\n",
183+
"\treturn filePath;\n",
184+
"}"
156185
],
157-
"outputs": []
158-
},
159-
{
160-
"cell_type": "code",
161-
"execution_count": 1,
162-
"metadata": {
163-
"dotnet_interactive": {
164-
"language": "csharp"
186+
"outputs": [
187+
{
188+
"output_type": "execute_result",
189+
"data": {
190+
"text/plain": "Train Data Path: C:\\dev\\csharp-notebooks\\machine-learning\\data\\taxi-fare.csv\r\n"
191+
},
192+
"execution_count": 1,
193+
"metadata": {}
165194
}
166-
},
167-
"source": [
168-
"#!share taxi_data --from value"
169-
],
170-
"outputs": []
195+
]
171196
},
172197
{
173198
"cell_type": "code",
@@ -178,7 +203,8 @@
178203
}
179204
},
180205
"source": [
181-
"var df = DataFrame.LoadCsvFromString(taxi_data);"
206+
"var trainDataPath = EnsureDataSetDownloaded(\"taxi-fare.csv\");\n",
207+
"var df = DataFrame.LoadCsv(trainDataPath);"
182208
],
183209
"outputs": []
184210
},
@@ -204,7 +230,7 @@
204230
{
205231
"output_type": "execute_result",
206232
"data": {
207-
"text/html": "<table id=\"table_637927149236214714\"><thead><tr><th><i>index</i></th><th>vendor_id</th><th>rate_code</th><th>passenger_count</th><th>trip_time_in_secs</th><th>trip_distance</th><th>payment_type</th><th>fare_amount</th></tr></thead><tbody><tr><td><i><div class=\"dni-plaintext\">0</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1271</div></td><td><div class=\"dni-plaintext\">3.8</div></td><td>CRD</td><td><div class=\"dni-plaintext\">17.5</div></td></tr><tr><td><i><div class=\"dni-plaintext\">1</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">474</div></td><td><div class=\"dni-plaintext\">1.5</div></td><td>CRD</td><td><div class=\"dni-plaintext\">8</div></td></tr><tr><td><i><div class=\"dni-plaintext\">2</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">637</div></td><td><div class=\"dni-plaintext\">1.4</div></td><td>CRD</td><td><div class=\"dni-plaintext\">8.5</div></td></tr><tr><td><i><div class=\"dni-plaintext\">3</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">181</div></td><td><div class=\"dni-plaintext\">0.6</div></td><td>CSH</td><td><div class=\"dni-plaintext\">4.5</div></td></tr><tr><td><i><div class=\"dni-plaintext\">4</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">661</div></td><td><div class=\"dni-plaintext\">1.1</div></td><td>CRD</td><td><div class=\"dni-plaintext\">8.5</div></td></tr></tbody></table>"
233+
"text/html": "<table id=\"table_637934937843853168\"><thead><tr><th><i>index</i></th><th>vendor_id</th><th>rate_code</th><th>passenger_count</th><th>trip_time_in_secs</th><th>trip_distance</th><th>payment_type</th><th>fare_amount</th></tr></thead><tbody><tr><td><i><div class=\"dni-plaintext\">0</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1271</div></td><td><div class=\"dni-plaintext\">3.8</div></td><td>CRD</td><td><div class=\"dni-plaintext\">17.5</div></td></tr><tr><td><i><div class=\"dni-plaintext\">1</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">474</div></td><td><div class=\"dni-plaintext\">1.5</div></td><td>CRD</td><td><div class=\"dni-plaintext\">8</div></td></tr><tr><td><i><div class=\"dni-plaintext\">2</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">637</div></td><td><div class=\"dni-plaintext\">1.4</div></td><td>CRD</td><td><div class=\"dni-plaintext\">8.5</div></td></tr><tr><td><i><div class=\"dni-plaintext\">3</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">181</div></td><td><div class=\"dni-plaintext\">0.6</div></td><td>CSH</td><td><div class=\"dni-plaintext\">4.5</div></td></tr><tr><td><i><div class=\"dni-plaintext\">4</div></i></td><td>CMT</td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">1</div></td><td><div class=\"dni-plaintext\">661</div></td><td><div class=\"dni-plaintext\">1.1</div></td><td>CRD</td><td><div class=\"dni-plaintext\">8.5</div></td></tr></tbody></table>"
208234
},
209235
"execution_count": 1,
210236
"metadata": {}
@@ -367,18 +393,9 @@
367393
}
368394
},
369395
"source": [
370-
"var result = await experiment.Run();"
396+
"var result = await experiment.RunAsync();"
371397
],
372-
"outputs": [
373-
{
374-
"output_type": "error",
375-
"ename": "",
376-
"evalue": "(1,14): error CS1061: 'TrialResult' does not contain a definition for 'GetAwaiter' and no accessible extension method 'GetAwaiter' accepting a first argument of type 'TrialResult' could be found (are you missing a using directive or an assembly reference?)",
377-
"traceback": [
378-
null
379-
]
380-
}
381-
]
398+
"outputs": []
382399
},
383400
{
384401
"cell_type": "markdown",
@@ -400,12 +417,12 @@
400417
],
401418
"outputs": [
402419
{
403-
"output_type": "error",
404-
"ename": "",
405-
"evalue": "(1,15): error CS0103: The name 'result' does not exist in the current context",
406-
"traceback": [
407-
null
408-
]
420+
"output_type": "execute_result",
421+
"data": {
422+
"text/plain": "R-Squared: 0.9329530384286037"
423+
},
424+
"execution_count": 1,
425+
"metadata": {}
409426
}
410427
]
411428
},
@@ -430,16 +447,7 @@
430447
"ITransformer bestModel = result.Model;\n",
431448
"var predictions = bestModel.Transform(testSet);"
432449
],
433-
"outputs": [
434-
{
435-
"output_type": "error",
436-
"ename": "",
437-
"evalue": "(1,26): error CS0103: The name 'result' does not exist in the current context",
438-
"traceback": [
439-
null
440-
]
441-
}
442-
]
450+
"outputs": []
443451
},
444452
{
445453
"cell_type": "markdown",
@@ -469,12 +477,12 @@
469477
],
470478
"outputs": [
471479
{
472-
"output_type": "error",
473-
"ename": "",
474-
"evalue": "(1,14): error CS0103: The name 'predictions' does not exist in the current context\r\n(2,17): error CS0103: The name 'predictions' does not exist in the current context",
475-
"traceback": [
476-
null
477-
]
480+
"output_type": "execute_result",
481+
"data": {
482+
"text/html": "<table><thead><tr><th><i>index</i></th><th>Actual</th><th>Predicted</th><th>Difference</th></tr></thead><tbody><tr><td>0</td><td><div class=\"dni-plaintext\">24.5</div></td><td><div class=\"dni-plaintext\">23.087162</div></td><td><div class=\"dni-plaintext\">1.412838</div></td></tr><tr><td>1</td><td><div class=\"dni-plaintext\">9.5</div></td><td><div class=\"dni-plaintext\">8.993666</div></td><td><div class=\"dni-plaintext\">0.5063343</div></td></tr><tr><td>2</td><td><div class=\"dni-plaintext\">4.5</div></td><td><div class=\"dni-plaintext\">4.808011</div></td><td><div class=\"dni-plaintext\">-0.30801105</div></td></tr><tr><td>3</td><td><div class=\"dni-plaintext\">8</div></td><td><div class=\"dni-plaintext\">7.994398</div></td><td><div class=\"dni-plaintext\">0.005601883</div></td></tr><tr><td>4</td><td><div class=\"dni-plaintext\">52</div></td><td><div class=\"dni-plaintext\">52.039684</div></td><td><div class=\"dni-plaintext\">-0.039684296</div></td></tr></tbody></table>"
483+
},
484+
"execution_count": 1,
485+
"metadata": {}
478486
}
479487
]
480488
},
@@ -487,19 +495,6 @@
487495
"With ML.NET, you don't have to manually calculate the evaluation metrics for your models. ML.NET provides a built-in `Evaluate` method for each of the machine learning tasks it supports. Use the `Evaluate` method for the regression task to calculate the evaluation metrics for the test set where the `fare_amount` column is the actual value and the `Score` column is the predicted value."
488496
]
489497
},
490-
{
491-
"cell_type": "code",
492-
"execution_count": 1,
493-
"metadata": {
494-
"dotnet_interactive": {
495-
"language": "csharp"
496-
}
497-
},
498-
"source": [
499-
""
500-
],
501-
"outputs": []
502-
},
503498
{
504499
"cell_type": "code",
505500
"execution_count": 1,
@@ -511,16 +506,7 @@
511506
"source": [
512507
"var evaluationMetrics = mlContext.Regression.Evaluate(predictions,\"fare_amount\", \"Score\");"
513508
],
514-
"outputs": [
515-
{
516-
"output_type": "error",
517-
"ename": "",
518-
"evalue": "(1,55): error CS0103: The name 'predictions' does not exist in the current context",
519-
"traceback": [
520-
null
521-
]
522-
}
523-
]
509+
"outputs": []
524510
},
525511
{
526512
"cell_type": "markdown",
@@ -542,12 +528,12 @@
542528
],
543529
"outputs": [
544530
{
545-
"output_type": "error",
546-
"ename": "",
547-
"evalue": "(1,1): error CS0103: The name 'evaluationMetrics' does not exist in the current context",
548-
"traceback": [
549-
null
550-
]
531+
"output_type": "execute_result",
532+
"data": {
533+
"text/html": "<table><thead><tr><th>MeanAbsoluteError</th><th>MeanSquaredError</th><th>RootMeanSquaredError</th><th>LossFunction</th><th>RSquared</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">0.6107270253580241</div></td><td><div class=\"dni-plaintext\">6.673910566709432</div></td><td><div class=\"dni-plaintext\">2.58339129183123</div></td><td><div class=\"dni-plaintext\">6.673910534194763</div></td><td><div class=\"dni-plaintext\">0.9277130209892651</div></td></tr></tbody></table>"
534+
},
535+
"execution_count": 1,
536+
"metadata": {}
551537
}
552538
]
553539
},
@@ -961,7 +947,7 @@
961947
{
962948
"output_type": "execute_result",
963949
"data": {
964-
"text/html": "<table><thead><tr><th><i>index</i></th><th>Key</th><th>Value</th></tr></thead><tbody><tr><td>0</td><td>vendor_id.Bit2</td><td><div class=\"dni-plaintext\">-0.5100836529698106</div></td></tr><tr><td>1</td><td>vendor_id.Bit1</td><td><div class=\"dni-plaintext\">-0.2093168050110983</div></td></tr><tr><td>2</td><td>vendor_id.Bit0</td><td><div class=\"dni-plaintext\">-0.20509582275834345</div></td></tr><tr><td>3</td><td>payment_type.Bit3</td><td><div class=\"dni-plaintext\">-0.0014286018096295965</div></td></tr><tr><td>4</td><td>payment_type.Bit2</td><td><div class=\"dni-plaintext\">-0.0005391273248645708</div></td></tr><tr><td>5</td><td>payment_type.Bit1</td><td><div class=\"dni-plaintext\">-0.0001564149663926632</div></td></tr><tr><td>6</td><td>payment_type.Bit0</td><td><div class=\"dni-plaintext\">-7.393853265310242E-05</div></td></tr><tr><td>7</td><td>rate_code</td><td><div class=\"dni-plaintext\">-5.132793047627852E-07</div></td></tr><tr><td>8</td><td>passenger_count</td><td><div class=\"dni-plaintext\">0</div></td></tr><tr><td>9</td><td>trip_time_in_secs</td><td><div class=\"dni-plaintext\">0</div></td></tr><tr><td>10</td><td>trip_distance</td><td><div class=\"dni-plaintext\">0</div></td></tr></tbody></table>"
950+
"text/html": "<table><thead><tr><th><i>index</i></th><th>Key</th><th>Value</th></tr></thead><tbody><tr><td>0</td><td>vendor_id.Bit2</td><td><div class=\"dni-plaintext\">-0.5103167076996584</div></td></tr><tr><td>1</td><td>vendor_id.Bit1</td><td><div class=\"dni-plaintext\">-0.20920573710951015</div></td></tr><tr><td>2</td><td>vendor_id.Bit0</td><td><div class=\"dni-plaintext\">-0.20524726245559932</div></td></tr><tr><td>3</td><td>payment_type.Bit3</td><td><div class=\"dni-plaintext\">-0.0013735609832817113</div></td></tr><tr><td>4</td><td>payment_type.Bit2</td><td><div class=\"dni-plaintext\">-0.0005371983420188927</div></td></tr><tr><td>5</td><td>payment_type.Bit1</td><td><div class=\"dni-plaintext\">-0.00015402329111213753</div></td></tr><tr><td>6</td><td>payment_type.Bit0</td><td><div class=\"dni-plaintext\">-7.256291485776185E-05</div></td></tr><tr><td>7</td><td>rate_code</td><td><div class=\"dni-plaintext\">-5.605438192501921E-07</div></td></tr><tr><td>8</td><td>passenger_count</td><td><div class=\"dni-plaintext\">0</div></td></tr><tr><td>9</td><td>trip_time_in_secs</td><td><div class=\"dni-plaintext\">0</div></td></tr><tr><td>10</td><td>trip_distance</td><td><div class=\"dni-plaintext\">0</div></td></tr></tbody></table>"
965951
},
966952
"execution_count": 1,
967953
"metadata": {}

0 commit comments

Comments
 (0)