From 5967f4ebd3ce38a3f7b9e0c5e78203cd75c65ccd Mon Sep 17 00:00:00 2001 From: rbianchetti <31360514+rbianchetti@users.noreply.github.com> Date: Fri, 1 Jul 2022 17:32:35 -0700 Subject: [PATCH 1/3] Updating label analysis jupyter notebook with co-occurrence analysis --- labels-survey/Label_Analysis.ipynb | 295 +++++++++++++++++++++++------ 1 file changed, 242 insertions(+), 53 deletions(-) diff --git a/labels-survey/Label_Analysis.ipynb b/labels-survey/Label_Analysis.ipynb index 7c0a4ad..791655b 100644 --- a/labels-survey/Label_Analysis.ipynb +++ b/labels-survey/Label_Analysis.ipynb @@ -4,7 +4,8 @@ "metadata": { "colab": { "name": "Label Analysis", - "provenance": [] + "provenance": [], + "collapsed_sections": [] }, "kernelspec": { "name": "python3", @@ -17,13 +18,13 @@ "cells": [ { "cell_type": "code", - "execution_count": 137, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "X36HL-zZXFv_", - "outputId": "cc495a64-aa37-4e5e-add5-dbf572433b5b" + "outputId": "0b41b843-d366-4566-fb01-235df5d4221c" }, "outputs": [ { @@ -36,14 +37,18 @@ } ], "source": [ - "!pip install multidict\n", "\n", + "import itertools\n", + "import collections\n", "import pandas as pd\n", "import numpy as np\n", "import plotly.express as px\n", "import matplotlib.pyplot as plt\n", "import multidict as multidict\n", "from wordcloud import WordCloud\n", + "from nltk import bigrams\n", + "import networkx as nx\n", + "\n", "\n", "url = 'https://raw.githubusercontent.com/hackforla/data-science/160-survey-repo-labels/labels-survey/output.csv' # GitHub URL to .csv data\n", "plt.rcParams[\"figure.figsize\"] = (24,5.5)\n", @@ -62,7 +67,7 @@ { "cell_type": "markdown", "source": [ - "Two Research Questions:\n", + "Three Research Questions:\n", "\n", "**1.- How is the usage of labels in HfLA?**\n", "\n", @@ -94,9 +99,9 @@ "height": 542 }, "id": "2noC3Tz7ZdMN", - "outputId": "a6cf92b5-e036-414e-a62f-05ceaecc203c" + "outputId": "d2eb9bac-b4c1-45a0-a23c-212b38a31942" }, - "execution_count": 22, + "execution_count": null, "outputs": [ { "output_type": "display_data", @@ -106,9 +111,9 @@ "
\n", "\n", "