{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "# Multinomial Softmax Function in Python" ], "metadata": { "id": "SeZO7s-cCj_g" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Qr2F5BnQfHpv" }, "outputs": [], "source": [ "import math" ] }, { "cell_type": "code", "source": [ "def softmax(z):\n", " # this computes a list of exp(z_k) for each z_k in z.\n", " u = [math.exp(z_k) for z_k in z]\n", "\n", " # the sum of all elements in u.\n", " Z = sum(u)\n", "\n", " # compute the probabilities.\n", " p = [u_k / Z for u_k in u]\n", "\n", " return p" ], "metadata": { "id": "nONP-M4cfMfC" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "z = [0.6, 1.0, -1.5, 1.2, 10.0, -1.1]\n", "p = softmax(z)\n", "print('The softmax probabilities are:')\n", "for e in p:\n", " print(f'{e:.4f}', end = ' ')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "bpPEv3MSgPbv", "outputId": "cb82c5fb-60aa-4f8d-c420-301ce236bc05" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "The softmax probabilities are:\n", "0.0001 0.0001 0.0000 0.0002 0.9996 0.0000 " ] } ] }, { "cell_type": "markdown", "source": [ "## Softmax with temperature $T$" ], "metadata": { "id": "8Edp4UzsBCXp" } }, { "cell_type": "code", "source": [ "def tsoftmax(z, T):\n", " # this computes a list of exp(z_k) for each z_k in z.\n", " u = [math.exp(z_k / T) for z_k in z]\n", "\n", " # the sum of all elements in u.\n", " Z = sum(u)\n", "\n", " # compute the probabilities.\n", " p = [u_k / Z for u_k in u]\n", "\n", " return p" ], "metadata": { "id": "ZLhQtRQ4gi1b" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "z = [0.6, 1.0, -1.5, 1.2, 10.0, -1.1, -0.5, -0.1, 2.1, 3.1]\n", "p1 = tsoftmax(z, 1)\n", "for e in p:\n", " print(f'{e:.5f}', end = ' ')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wgGoiDUGTpdS", "outputId": "111fd35e-5aed-4112-dfc0-85813bfd602c" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.00008 0.00012 0.00001 0.00015 0.99817 0.00002 0.00003 0.00004 0.00037 0.00101 " ] } ] }, { "cell_type": "code", "source": [ "p2 = tsoftmax(z, 2)\n", "for e in p2:\n", " print(f'{e:.5f}', end = ' ')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "CaWnJibqTx9T", "outputId": "b3613160-d420-4a07-820e-957c6d4ef437" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.00825 0.01008 0.00289 0.01114 0.90727 0.00353 0.00476 0.00581 0.01747 0.02880 " ] } ] }, { "cell_type": "code", "source": [ "p2 = tsoftmax(z, 10)\n", "for e in p2:\n", " print(f'{e:.5f}', end = ' ')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7WSkAeE9UPIn", "outputId": "c92253a2-5eba-42e7-d95a-c70201055a6a" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.08627 0.08980 0.06993 0.09161 0.22086 0.07279 0.07729 0.08044 0.10024 0.11078 " ] } ] }, { "cell_type": "code", "source": [ "p2 = tsoftmax(z, 1000000)\n", "for e in p2:\n", " print(f'{e:.5f}', end = ' ')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "2aPzTVRBUtho", "outputId": "2b3bfad7-0ede-4c99-9e6f-20bd2c578ded" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.10000 0.10000 0.10000 0.10000 0.10000 0.10000 0.10000 0.10000 0.10000 0.10000 " ] } ] }, { "cell_type": "code", "source": [ "p2 = tsoftmax(z, 1)\n", "for e in p2:\n", " print(f'{e:.5f}', end = ' ')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "NWmg3SeWVDHa", "outputId": "2c0fdcea-f0a3-4041-b0ea-88263f8b24b6" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.00008 0.00012 0.00001 0.00015 0.99817 0.00002 0.00003 0.00004 0.00037 0.00101 " ] } ] }, { "cell_type": "code", "source": [ "p2 = tsoftmax(z, 0.5)\n", "for e in p2:\n", " print(f'{e:.5f}', end = ' ')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "keYowzT8VYc4", "outputId": "a9b92a85-f38a-48e8-c146-48de3240fd23" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.00000 0.00000 0.00000 0.00000 1.00000 0.00000 0.00000 0.00000 0.00000 0.00000 " ] } ] }, { "cell_type": "markdown", "source": [ "## The Ground Truth Matrix\n", "\n", "We use the SciPy [coo_matrix](https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.coo_matrix.html) function." ], "metadata": { "id": "gO39oPGuBJV-" } }, { "cell_type": "code", "source": [ "import numpy as np\n", "from scipy.sparse import coo_matrix\n", "\n", "# N training examples.\n", "N = 4\n", "\n", "# The vector of N labels.\n", "y = np.array([1, 1, 2, 0])\n", "\n", "groundTruth = coo_matrix((np.ones(N, dtype = np.uint8), (y, np.arange(N)))).toarray()" ], "metadata": { "id": "KzDfbr1PVdlY" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "groundTruth" ], "metadata": { "id": "SvEZsooOc2rS", "outputId": "478fce20-ff19-4827-fcfb-93642dd01ea3", "colab": { "base_uri": "https://localhost:8080/" } }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([[0, 0, 0, 1],\n", " [1, 1, 0, 0],\n", " [0, 0, 1, 0]], dtype=uint8)" ] }, "metadata": {}, "execution_count": 32 } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "Z6X8tKgWc4uy" }, "execution_count": null, "outputs": [] } ] }