diff --git a/GES_DISC_Cloud_Notebooks/Cloud_Workshop/UWG-F2F_S3_Bucket_Access.ipynb b/GES_DISC_Cloud_Notebooks/Cloud_Workshop/UWG-F2F_S3_Bucket_Access.ipynb new file mode 100644 index 0000000..c6f6fa8 --- /dev/null +++ b/GES_DISC_Cloud_Notebooks/Cloud_Workshop/UWG-F2F_S3_Bucket_Access.ipynb @@ -0,0 +1,532 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "occupational-discharge", + "metadata": { + "id": "4da9684d-260e-49f4-a9ae-164738c17f3d" + }, + "source": [ + "# How to Directly Access MERRA-2 Data from an S3 Bucket with Python\n", + "### Author: Chris Battisto\n", + "### Date Authored: 1-11-22\n", + "\n", + "### Timing\n", + "\n", + "Exercise: 30 minutes\n", + "\n", + "
\n", + "\n", + "~/root using the file explorer panel.\n",
+ "If you believe you have entered an incorrect username or password, you can open up a terminal instance by clicking the blue \"plus\" button in the top left corner, selecting \"Terminal\", and then remove the previously generated netrc by typing in rm .netrc. Then, this cell can be rerun to generate a new netrc file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "duplicate-korea",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "urs = 'urs.earthdata.nasa.gov' # Earthdata URL endpoint for authentication\n",
+ "prompts = ['Enter NASA Earthdata Login Username: ',\n",
+ " 'Enter NASA Earthdata Login Password: ']\n",
+ "\n",
+ "netrc_name = \".netrc\"\n",
+ "\n",
+ "# Determine if netrc file exists, and if so, if it includes NASA Earthdata Login Credentials\n",
+ "try:\n",
+ " netrcDir = os.path.expanduser(f\"~/{netrc_name}\")\n",
+ " netrc(netrcDir).authenticators(urs)[0]\n",
+ "\n",
+ "# Below, create a netrc file and prompt user for NASA Earthdata Login Username and Password\n",
+ "except FileNotFoundError:\n",
+ " homeDir = os.path.expanduser(\"~\")\n",
+ " Popen('touch {0}{2} | echo machine {1} >> {0}{2}'.format(homeDir + os.sep, urs, netrc_name), shell=True)\n",
+ " Popen('echo login {} >> {}{}'.format(getpass(prompt=prompts[0]), homeDir + os.sep, netrc_name), shell=True)\n",
+ " Popen('echo \\'password {} \\'>> {}{}'.format(getpass(prompt=prompts[1]), homeDir + os.sep, netrc_name), shell=True)\n",
+ " # Set restrictive permissions\n",
+ " Popen('chmod 0600 {0}{1}'.format(homeDir + os.sep, netrc_name), shell=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "intellectual-dragon",
+ "metadata": {},
+ "source": [
+ "### Obtain S3 URL from CMR"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "reserved-spirituality",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'href': 's3://gesdisc-cumulus-prod-protected/MERRA2/M2T1NXSLV.5.12.4/2019/03/MERRA2_400.tavg1_2d_slv_Nx.20190313.nc4',\n",
+ " 'hreflang': 'en-US',\n",
+ " 'rel': 'http://esipfed.org/ns/fedsearch/1.1/s3#',\n",
+ " 'title': 'This link provides direct download access via S3 to the granule'}\n"
+ ]
+ }
+ ],
+ "source": [
+ "cmr_url = 'https://cmr.earthdata.nasa.gov/search/granules'\n",
+ "\n",
+ "cmr_response = requests.get(cmr_url, \n",
+ " params={\n",
+ " 'concept_id': 'C1276812863-GES_DISC',\n",
+ " 'temporal': '2019-03-13T00:00:00Z',\n",
+ " 'page_size': 200,\n",
+ " },\n",
+ " headers={\n",
+ " 'Accept': 'application/json'\n",
+ " }\n",
+ " )\n",
+ "\n",
+ "# Save and print S3 URL\n",
+ "\n",
+ "for link in cmr_response.json()['feed']['entry'][0]['links']:\n",
+ " if link['href'].startswith('s3://'):\n",
+ " s3_link = link['href']\n",
+ " \n",
+ "print(s3_link)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "realistic-sheriff",
+ "metadata": {
+ "id": "bf3588b7-8acf-436f-b354-8d87e2af447b"
+ },
+ "source": [
+ "### Get S3 Token\n",
+ "\n",
+ "Retrieve your S3 access token and define an S3 direct access file system handler with S3FS. This token expires after one hour, and this block will need to be rerun if time expires. If you receive an error, double-check that your username and password were entered correctly in your netrc file, or that you can access the following URL: https://data.gesdisc.earthdata.nasa.gov"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "broadband-basics",
+ "metadata": {
+ "id": "81d98848",
+ "outputId": "9b2aabf3-b02b-4722-c6c5-d51f4c0257ec"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://data.gesdisc.earthdata.nasa.gov/s3credentials\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "s3fs.core.S3FileSystem"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "auth_link = \"https://data.gesdisc.earthdata.nasa.gov/s3credentials\"\n",
+ "\n",
+ "# Define a function for S3 access credentials\n",
+ "\n",
+ "def begin_s3_direct_access(url: str=auth_link):\n",
+ " print(auth_link)\n",
+ " response = requests.get(url).json()\n",
+ " return s3fs.S3FileSystem(key=response['accessKeyId'],\n",
+ " secret=response['secretAccessKey'],\n",
+ " token=response['sessionToken'])\n",
+ "\n",
+ "gesdisc_fs = begin_s3_direct_access()\n",
+ "\n",
+ "# Check that the file system is intact as an S3FileSystem object, which means that token is valid\n",
+ "# Common causes of rejected S3 access tokens include incorrect passwords stored in the netrc file, or a non-existent netrc file\n",
+ "type(gesdisc_fs)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "seven-muscle",
+ "metadata": {
+ "id": "33245906",
+ "tags": []
+ },
+ "source": [
+ "### Open the File in Xarray as an S3 File System Object\n",
+ "\n",
+ "First, check that the file itself is intact. By calling fs.info() with S3FS, we can see some of the metadata assigned to files uploaded to S3 buckets."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "equivalent-polyester",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'ETag': '\"ab39493d3182642efbf610439b3d1d29-2\"',\n",
+ " 'Key': 'gesdisc-cumulus-prod-protected/MERRA2/M2T1NXSLV.5.12.4/2019/03/MERRA2_400.tavg1_2d_slv_Nx.20190313.nc4',\n",
+ " 'LastModified': datetime.datetime(2021, 3, 18, 23, 32, 5, tzinfo=tzutc()),\n",
+ " 'Size': 415071782,\n",
+ " 'size': 415071782,\n",
+ " 'name': 'gesdisc-cumulus-prod-protected/MERRA2/M2T1NXSLV.5.12.4/2019/03/MERRA2_400.tavg1_2d_slv_Nx.20190313.nc4',\n",
+ " 'type': 'file',\n",
+ " 'StorageClass': 'STANDARD',\n",
+ " 'VersionId': None}"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Files are organized by s3://gesdisc-cumulus-prod-protected/MERRA2/M2T1NXSLV.5.12.4/year/mo/*.nc4\n",
+ "fn = 's3://gesdisc-cumulus-prod-protected/MERRA2/M2T1NXSLV.5.12.4/2019/03/MERRA2_400.tavg1_2d_slv_Nx.20190313.nc4'\n",
+ "\n",
+ "gesdisc_fs.info(fn)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "northern-shopping",
+ "metadata": {},
+ "source": [
+ "In order to open an S3 bucket file, or group of files in Xarray, fs.open() and fs.glob() must be used when calling their URLs in open_dataset().\n",
+ "Here, we open a single M2T1NXSLV.5.12.4 file and subset its sea-level pressure values at a particular time slice."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "wicked-jimmy",
+ "metadata": {
+ "id": "6206ec95",
+ "outputId": "5dbf940b-19ab-47ad-8648-4800a0e167f0"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[ 985.77765, 985.77765, 985.77765, ..., 985.77765, 985.77765,\n",
+ " 985.77765],\n",
+ " [ 985.1326 , 985.1326 , 985.1326 , ..., 985.13763, 985.13763,\n",
+ " 985.1326 ],\n",
+ " [ 983.9426 , 983.9377 , 983.9377 , ..., 983.95264, 983.94763,\n",
+ " 983.94763],\n",
+ " ...,\n",
+ " [1007.2477 , 1007.26013, 1007.27014, ..., 1007.21014, 1007.22266,\n",
+ " 1007.23517],\n",
+ " [1007.67017, 1007.6752 , 1007.6777 , ..., 1007.65765, 1007.66266,\n",
+ " 1007.66516],\n",
+ " [1007.97766, 1007.97766, 1007.97766, ..., 1007.97766, 1007.97766,\n",
+ " 1007.97766]], dtype=float32)"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ds = xr.open_dataset(fs.open(fn),\n",
+ " decode_cf=True,)\n",
+ "\n",
+ "# Subset SLP at 15:30Z, convert units\n",
+ "slp = ds.SLP.values[15,:,:] / 100 # convert to hPa\n",
+ "\n",
+ "slp"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "undefined-medicaid",
+ "metadata": {},
+ "source": [
+ "### Plot Sea-Level Pressure Contours\n",
+ "\n",
+ "Now that the file is inside a local Xarray dataset, we can perform any data analysis methods desired, such as plotting with Matplotlib."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "exempt-sympathy",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "plt.rcParams['figure.figsize'] = 10,10\n",
+ "\n",
+ "# Set up figure\n",
+ "fig = plt.figure()\n",
+ "\n",
+ "ax = fig.add_subplot(111, projection=ccrs.LambertConformal())\n",
+ "ax.set_extent([-121, -72, 23, 51], crs=ccrs.PlateCarree()) # CONUS extent\n",
+ "ax.coastlines(resolution='50m')\n",
+ "ax.add_feature(cfeature.BORDERS)\n",
+ "\n",
+ "# Set up filled and line contours\n",
+ "filled_c = ax.contourf(ds.lon, ds.lat, slp, levels=10, \n",
+ " transform=ccrs.PlateCarree())\n",
+ "line_c = ax.contour(ds.lon, ds.lat, slp, levels=10,\n",
+ " colors=['black'],\n",
+ " transform=ccrs.PlateCarree())\n",
+ "\n",
+ "# Set up labelling for contours\n",
+ "ax.clabel(line_c, \n",
+ " colors=['black'],\n",
+ " manual=False, \n",
+ " inline=True, \n",
+ " fmt=' {:.0f} '.format, \n",
+ " )\n",
+ "\n",
+ "# Set up colorbar and figure title\n",
+ "fig.colorbar(filled_c, orientation='horizontal')\n",
+ "fig.suptitle('MERRA-2 CONUS Sea-Level Pressure (hPa) on 13 March 2019 15:30Z', fontsize=16)\n",
+ "\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "similar-construction",
+ "metadata": {},
+ "source": [
+ "### Cross-DAAC Data Access"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "interested-wrist",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "s3://ornl-cumulus-prod-protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_pr_prcp_2019.nc\n"
+ ]
+ }
+ ],
+ "source": [
+ "cmr_url = 'https://cmr.earthdata.nasa.gov/search/granules'\n",
+ "\n",
+ "cmr_response = requests.get(cmr_url, \n",
+ " params={\n",
+ " 'concept_id': 'C2031536952-ORNL_CLOUD',\n",
+ " 'temporal': '2019-03-13T00:00:00Z',\n",
+ " \"bbox\": [7.9999, 2.9999, 9.0001, 4.0001]\n",
+ " 'page_size': 200,\n",
+ " },\n",
+ " headers={\n",
+ " 'Accept': 'application/json'\n",
+ " }\n",
+ " )\n",
+ "\n",
+ "# Save and print S3 URL\n",
+ "\n",
+ "for link in cmr_response.json()['feed']['entry'][0]['links']:\n",
+ " if link['href'].startswith('s3://'):\n",
+ " s3_link = link['href']\n",
+ " \n",
+ "print(s3_link)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "tutorial-blast",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://data.ornldaac.earthdata.nasa.gov/s3credentials\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "s3fs.core.S3FileSystem"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "auth_link = \"https://data.ornldaac.earthdata.nasa.gov/s3credentials\"\n",
+ "\n",
+ "ornl_fs = begin_s3_direct_access()\n",
+ "\n",
+ "# Check that the file system is intact as an S3FileSystem object, which means that token is valid\n",
+ "type(ornl_fs)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "finnish-warrior",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "PermissionError",
+ "evalue": "Forbidden",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mClientError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m/srv/conda/envs/notebook/lib/python3.8/site-packages/s3fs/core.py\u001b[0m in \u001b[0;36m_call_s3\u001b[0;34m(self, method, *akwarglist, **kwargs)\u001b[0m\n\u001b[1;32m 233\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 234\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0madditional_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 235\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mS3_RETRYABLE_ERRORS\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/srv/conda/envs/notebook/lib/python3.8/site-packages/aiobotocore/client.py\u001b[0m in \u001b[0;36m_make_api_call\u001b[0;34m(self, operation_name, api_params)\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[0merror_class\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexceptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_code\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merror_code\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 154\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0merror_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparsed_response\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moperation_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 155\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mClientError\u001b[0m: An error occurred (403) when calling the HeadObject operation: Forbidden",
+ "\nThe above exception was the direct cause of the following exception:\n",
+ "\u001b[0;31mPermissionError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m