diff --git a/NotebookExamples/fsharp/Samples/HousingML.ipynb b/NotebookExamples/fsharp/Samples/HousingML.ipynb new file mode 100644 index 000000000..887bf9f96 --- /dev/null +++ b/NotebookExamples/fsharp/Samples/HousingML.ipynb @@ -0,0 +1,499 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [], + "source": [ + "#r \"nuget:bl=true\"\n", + "#r \"nuget:RestoreSources=https://dotnet.myget.org/F/dotnet-corefxlab/api/v3/index.json\"\n", + "#r \"nuget:Microsoft.ML,version=1.4.0-preview\"\n", + "#r \"nuget:Microsoft.ML.AutoML\"\n", + "#r \"nuget:Microsoft.Data.DataFrame,version=0.1.1-e191008-1\"\n", + " \n", + "open Microsoft.Data\n", + "open XPlot.Plotly\n", + "open Microsoft.AspNetCore.Html\n", + "open System.IO" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "let register (df:DataFrame) (writer:TextWriter) =\n", + " let headers = new ResizeArray ()\n", + " headers.Add(th.innerHTML(i.innerHTML(\"index\")))\n", + " headers.AddRange(df.Columns.Select(fun c -> (th.innerHTML(c) :> IHtmlContent)))\n", + " let rows = ResizeArray>()\n", + " let take = 20\n", + " for i in 0 .. (Math.Min(take, int(df.RowCount)) - 1) do\n", + " let cells = ResizeArray()\n", + " cells.Add(td.innerHTML(i));\n", + " for o in df.[int64(i)] do\n", + " cells.Add(td.innerHTML(o))\n", + " rows.Add(cells)\n", + " \n", + " let t =\n", + " table.innerHTML([|\n", + " thead.innerHTML(headers)\n", + " tbody.innerHTML(rows.Select(fun r -> tr.innerHTML(r)))\n", + " |])\n", + "\n", + " writer.Write(t)\n", + "\n", + "Formatter.Register( (fun df writer -> register df writer), mimeType = \"text/html\")" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "open System.Net.Http\n", + "let housingPath = \"housing.csv\"\n", + "if not(File.Exists(housingPath)) then\n", + " let contents = HttpClient().GetStringAsync(\"https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv\").Result\n", + " File.WriteAllText(\"housing.csv\", contents)" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
indexlongitudelatitudehousing_median_agetotal_roomstotal_bedroomspopulationhouseholdsmedian_incomemedian_house_valueocean_proximity
0-122.2337.88418801293221268.3252452600NEAR BAY
1-122.2237.862170991106240111388.3014358500NEAR BAY
2-122.2437.855214671904961777.2574352100NEAR BAY
3-122.2537.855212742355582195.6431341300NEAR BAY
4-122.2537.855216272805652593.8462342200NEAR BAY
5-122.2537.85529192134131934.0368269700NEAR BAY
6-122.2537.8452253548910945143.6591299200NEAR BAY
7-122.2537.8452310468711576473.12241400NEAR BAY
8-122.2637.8442255566512065952.0804226700NEAR BAY
9-122.2537.8452354970715517143.6912261100NEAR BAY
10-122.2637.855222024349104023.2031281500NEAR BAY
11-122.2637.8552350375215047343.2705241800NEAR BAY
12-122.2637.8552249147410984683.075213500NEAR BAY
13-122.2637.84526961913451742.6736191300NEAR BAY
14-122.2637.8552264362612126201.9167159200NEAR BAY
15-122.2637.855011202836972642.125140000NEAR BAY
16-122.2737.855219663477933312.775152500NEAR BAY
17-122.2737.855212282936483032.1202155500NEAR BAY
18-122.2637.845022394559904191.9911158700NEAR BAY
19-122.2737.845215032986902752.6033162900NEAR BAY
" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "let housingData = DataFrame.ReadCsv(housingPath)\n", + "housingData" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
indexDescriptionlongitudelatitudehousing_median_agetotal_roomstotal_bedroomspopulationhouseholdsmedian_incomemedian_house_value
0Length206402064020640206402064020640206402064020640
1Max-114.3141.955239320644535682608215.0001500001
2Min-124.3532.54120310.499914999
3Mean-119.56911535.63186628.6394862635.7588532.47621425.4779499.539673.8706622206854.97
" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "housingData.Description()" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "let graph =\n", + " Histogram(x = housingData.[\"median_house_value\"],\n", + " nbinsx = 20)\n", + "graph |> Chart.Plot" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Microsoft.DotNet.Interactive.DisplayedValue" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "let graph =\n", + " Graph.Scattergl(\n", + " x = housingData.[\"longitude\"],\n", + " y = housingData.[\"latitude\"],\n", + " mode = \"markers\",\n", + " marker =\n", + " Graph.Marker(\n", + " color = housingData.[\"median_house_value\"],\n", + " colorscale = \"Jet\"))\n", + "\n", + "let plot = Chart.Plot(graph)\n", + "plot.Width <- 600\n", + "plot.Height <- 600\n", + "display(plot)" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "18575" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "2065" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Microsoft.DotNet.Interactive.DisplayedValue" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "let Shuffle (arr:int[]) =\n", + " let rnd = Random()\n", + " for i in 0 .. arr.Length - 1 do\n", + " let r = i + rnd.Next(arr.Length - i)\n", + " let temp = arr.[r]\n", + " arr.[r] <- arr.[i]\n", + " arr.[i] <- temp\n", + " arr\n", + "\n", + "let randomIndices = (Shuffle(Enumerable.Range(0, (int (housingData.RowCount) - 1)).ToArray()))\n", + "\n", + "let testSize = int (float (housingData.RowCount) * 0.1)\n", + "let trainRows = randomIndices.[testSize..]\n", + "let testRows = randomIndices.[..testSize]\n", + "\n", + "let housing_train = housingData.[trainRows]\n", + "let housing_test = housingData.[testRows]\n", + "\n", + "display(housing_train.RowCount)\n", + "display(housing_test.RowCount)" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Wall time: 15676.887700000001ms" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%%time\n", + "\n", + "open Microsoft.ML\n", + "open Microsoft.ML.Data\n", + "open Microsoft.ML.AutoML\n", + "\n", + "let mlContext = MLContext()\n", + "\n", + "let experiment = mlContext.Auto().CreateRegressionExperiment(maxExperimentTimeInSeconds = 15u)\n", + "let result = experiment.Execute(housing_train, labelColumnName = \"median_house_value\")" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best Trainer:SdcaRegression\n" + ] + }, + { + "data": { + "text/html": [ + "" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "// I don't know why it's only 3 trainers !!!!!!\n", + "type RunDetails = System.Collections.Generic.IEnumerable>\n", + "let scatters =\n", + " result.RunDetails\n", + " .Where(fun d -> not (d.ValidationMetrics = null))\n", + " .GroupBy(\n", + " (fun r -> r.TrainerName),\n", + " (fun (name:string) (details:RunDetails) -> \n", + " Graph.Scattergl(\n", + " name = name,\n", + " x = details.Select(fun r -> r.RuntimeInSeconds),\n", + " y = details.Select(fun r -> r.ValidationMetrics.MeanAbsoluteError),\n", + " mode = \"markers\",\n", + " marker = Graph.Marker(size = 12))))\n", + "\n", + "let chart = Chart.Plot(scatters)\n", + "chart.WithXTitle(\"Training Time\")\n", + "chart.WithYTitle(\"Error\")\n", + "display(chart)\n", + "\n", + "Console.WriteLine(\"Best Trainer:{0}\", result.BestRun.TrainerName);" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Microsoft.DotNet.Interactive.DisplayedValue" + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "let testResults = result.BestRun.Model.Transform(housing_test)\n", + "\n", + "let trueValues = testResults.GetColumn(\"median_house_value\")\n", + "let predictedValues = testResults.GetColumn(\"Score\")\n", + "\n", + "let predictedVsTrue =\n", + " Graph.Scattergl(\n", + " x = trueValues,\n", + " y = predictedValues,\n", + " mode = \"markers\")\n", + "\n", + "let maximumValue = Math.Max(trueValues.Max(), predictedValues.Max())\n", + "\n", + "let perfectLine =\n", + " Graph.Scattergl(\n", + " x = [| 0.0f; maximumValue |],\n", + " y = [| 0.0f; maximumValue |],\n", + " mode = \"lines\")\n", + "\n", + "let chart = Chart.Plot([| predictedVsTrue; perfectLine |])\n", + "chart.WithXTitle(\"True Values\")\n", + "chart.WithYTitle(\"Predicted Values\")\n", + "chart.WithLegend(false)\n", + "chart.Width = 600\n", + "chart.Height = 600\n", + "display(chart)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".NET (F#)", + "language": "F#", + "name": ".net-fsharp" + }, + "language_info": { + "file_extension": ".fs", + "mimetype": "text/x-fsharp", + "name": "C#", + "pygments_lexer": "fsharp", + "version": "4.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}