# Import pandas
import pandas as pd

# DO NOT MODIFY THIS TEST CELL
points = 0
try:
    pd.DataFrame()
    points += 5
    print('\u2705 Great work! You correctly imported the pandas library.')
except:
    print('\u274C Oops - pandas was not imported correctly.')
print('You earned {} of 5 points for importing pandas'.format(points))

✅ Great work! You correctly imported the pandas library.
You earned 5 of 5 points for importing pandas

ncei_url = (
    'https://www.ncei.noaa.gov/access/services/data/v1'
    '?dataset=daily-summaries'
    '&dataTypes=TOBS,PRCP'
    '&stations=USC00396947'
    '&startDate=1949-10-01'
    '&endDate=2024-05-03'
    '&includeStationName=true'
    '&includeStationLocation=1'
    '&units=standard')
ncei_url

'https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=TOBS,PRCP&stations=USC00396947&startDate=1949-10-01&endDate=2024-05-03&includeStationName=true&includeStationLocation=1&units=standard'

# DO NOT MODIFY THIS TEST CELL
resp_url = _
points = 0

if type(resp_url)==str:
    points += 3
    print('\u2705 Great work! You correctly called your url variable.')
else:
    print('\u274C Oops - your url variable was not called correctly.')

if len(resp_url)==218:
    points += 3
    print('\u2705 Great work! Your url is the correct length.')
else:
    print('\u274C Oops - your url variable is not the correct length.')

print('You earned {} of 6 points for defining a url variable'.format(points))

✅ Great work! You correctly called your url variable.
✅ Great work! Your url is the correct length.
You earned 6 of 6 points for defining a url variable

# creating data frame for rapid city
rapid_df = pd.read_csv(
  ncei_url,
  index_col='DATE',
  parse_dates=True,
  na_values=['NaN'])
rapid_df

# DO NOT MODIFY THIS TEST CELL
tmax_df_resp = _
points = 0

if isinstance(tmax_df_resp, pd.DataFrame):
    points += 1
    print('\u2705 Great work! You called a DataFrame.')
else:
    print('\u274C Oops - make sure to call your DataFrame for testing.')

print('You earned {} of 2 points for downloading data'.format(points))

✅ Great work! You called a DataFrame.
You earned 1 of 2 points for downloading data

# Check that the data was imported into a pandas DataFrame
type(rapid_df)

pandas.core.frame.DataFrame

rapid_df = rapid_df[['PRCP','TOBS']]
rapid_df

# DO NOT MODIFY THIS TEST CELL
tmax_df_resp = _
points = 0

summary = [round(val, 2) for val in tmax_df_resp.mean().values]
if summary == [0.05, 54.53]:
    points += 4
    print('\u2705 Great work! You correctly downloaded data.')
else:
    print('\u274C Oops - your data are not correct.')
print('You earned {} of 5 points for downloading data'.format(points))

❌ Oops - your data are not correct.
You earned 0 of 5 points for downloading data

rapid_df.plot(y='PRCP')

<Axes: xlabel='DATE'>

# Plot the data using .plot
rapid_df.plot(
    y='PRCP',
    title='Rapit City Precipitation',
    xlabel='Date',
    ylabel='Precipitation in Inches')

<Axes: title={'center': 'Rapit City Precipitation'}, xlabel='Date', ylabel='Precipitation in Inches'>

# Plot the data using .plot
rapid_df.plot(
    y='TOBS',
    title='Rapit City Temperature',
    xlabel='Date',
    ylabel='Temperature in Degrees F',
    color='orange',
    figsize=(10,5))

<Axes: title={'center': 'Rapit City Temperature'}, xlabel='Date', ylabel='Temperature in Degrees F'>

# Convert to celcius
rapid_df['TCel'] = (rapid_df['TOBS'] - 32) * 5 / 9
rapid_df

/tmp/ipykernel_41552/1561760456.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rapid_df['TCel'] = (rapid_df['TOBS'] - 32) * 5 / 9

# DO NOT MODIFY THIS TEST CELL
tmax_df_resp = _
points = 0

if isinstance(tmax_df_resp, pd.DataFrame):
    points += 1
    print('\u2705 Great work! You called a DataFrame.')
else:
    print('\u274C Oops - make sure to call your DataFrame for testing.')

summary = [round(val, 2) for val in tmax_df_resp.mean().values]
if summary == [0.05, 54.53, 12.52]:
    points += 4
    print('\u2705 Great work! You correctly converted to Celcius.')
else:
    print('\u274C Oops - your data are not correct.')
print('You earned {} of 5 points for converting to Celcius'.format(points))

✅ Great work! You called a DataFrame.
❌ Oops - your data are not correct.
You earned 1 of 5 points for converting to Celcius

def convert_to_celcius(fahrenheit):
    """Convert temperature to Celcius"""
    return (fahrenheit - 32) * 5 / 9

rapid_df['celcius_column'] = rapid_df['TOBS'].apply(convert_to_celcius)
rapid_df

/tmp/ipykernel_41552/1212548583.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rapid_df['celcius_column'] = rapid_df['TOBS'].apply(convert_to_celcius)

# Subset the data
rapid_89_23 = rapid_df['1989':'2023']
rapid_89_23

# DO NOT MODIFY THIS TEST CELL
df_resp = _
points = 0

if isinstance(df_resp, pd.DataFrame):
    points += 1
    print('\u2705 Great work! You called a DataFrame.')
else:
    print('\u274C Oops - make sure to call your DataFrame for testing.')

summary = [round(val, 2) for val in df_resp.mean().values]
if summary == [0.06, 55.67, 13.15]:
    points += 5
    print('\u2705 Great work! You correctly converted to Celcius.')
else:
    print('\u274C Oops - your data are not correct.')
print('You earned {} of 5 points for subsetting'.format(points))

✅ Great work! You called a DataFrame.
❌ Oops - your data are not correct.
You earned 1 of 5 points for subsetting

# Resample the data to look at yearly mean values
rapid_annual_avg_temp = rapid_89_23.resample('YS').mean()
rapid_annual_avg_temp

# DO NOT MODIFY THIS TEST CELL
df_resp = _
points = 0

if isinstance(df_resp, pd.DataFrame):
    points += 1
    print('\u2705 Great work! You called a DataFrame.')
else:
    print('\u274C Oops - make sure to call your DataFrame for testing.')

summary = [round(val, 2) for val in df_resp.mean().values]
if summary == [0.06, 55.37, 12.99]:
    points += 5
    print('\u2705 Great work! You correctly converted to Celcius.')
else:
    print('\u274C Oops - your data are not correct.')
print('You earned {} of 5 points for resampling'.format(points))

✅ Great work! You called a DataFrame.
❌ Oops - your data are not correct.
You earned 1 of 5 points for resampling

# Plot mean annual temperature values
rapid_annual_avg_temp.plot(
    y='TOBS',
    title='Rapid City Annual Average Temperature',
    xlabel='Date',
    ylabel='Temperature (F)',
    legend=False,
    color='lightblue',
    figsize=(10,5))
rapid_annual_avg_temp

%%capture
%%bash
jupyter nbconvert *.ipynb --to html

	STATION	NAME	LATITUDE	LONGITUDE	ELEVATION	PRCP	TOBS
DATE
1949-10-01	USC00396947	RAPID CITY 4 NW, SD US	44.12055	-103.28417	1060.4	0.00	51.0
1949-10-02	USC00396947	RAPID CITY 4 NW, SD US	44.12055	-103.28417	1060.4	0.00	51.0
1949-10-03	USC00396947	RAPID CITY 4 NW, SD US	44.12055	-103.28417	1060.4	0.00	52.0
1949-10-04	USC00396947	RAPID CITY 4 NW, SD US	44.12055	-103.28417	1060.4	0.00	45.0
1949-10-05	USC00396947	RAPID CITY 4 NW, SD US	44.12055	-103.28417	1060.4	0.00	50.0
...	...	...	...	...	...	...	...
2024-04-28	USC00396947	RAPID CITY 4 NW, SD US	44.12055	-103.28417	1060.4	0.00	NaN
2024-04-29	USC00396947	RAPID CITY 4 NW, SD US	44.12055	-103.28417	1060.4	0.37	30.0
2024-04-30	USC00396947	RAPID CITY 4 NW, SD US	44.12055	-103.28417	1060.4	0.00	44.0
2024-05-01	USC00396947	RAPID CITY 4 NW, SD US	44.12055	-103.28417	1060.4	0.00	33.0
2024-05-02	USC00396947	RAPID CITY 4 NW, SD US	44.12055	-103.28417	1060.4	0.35	39.0

	PRCP	TOBS
DATE
1949-10-01	0.00	51.0
1949-10-02	0.00	51.0
1949-10-03	0.00	52.0
1949-10-04	0.00	45.0
1949-10-05	0.00	50.0
...	...	...
2024-04-28	0.00	NaN
2024-04-29	0.37	30.0
2024-04-30	0.00	44.0
2024-05-01	0.00	33.0
2024-05-02	0.35	39.0

	PRCP	TOBS	TCel
DATE
1949-10-01	0.00	51.0	10.555556
1949-10-02	0.00	51.0	10.555556
1949-10-03	0.00	52.0	11.111111
1949-10-04	0.00	45.0	7.222222
1949-10-05	0.00	50.0	10.000000
...	...	...	...
2024-04-28	0.00	NaN	NaN
2024-04-29	0.37	30.0	-1.111111
2024-04-30	0.00	44.0	6.666667
2024-05-01	0.00	33.0	0.555556
2024-05-02	0.35	39.0	3.888889

	PRCP	TOBS	TCel	celcius_column
DATE
1949-10-01	0.00	51.0	10.555556	10.555556
1949-10-02	0.00	51.0	10.555556	10.555556
1949-10-03	0.00	52.0	11.111111	11.111111
1949-10-04	0.00	45.0	7.222222	7.222222
1949-10-05	0.00	50.0	10.000000	10.000000
...	...	...	...	...
2024-04-28	0.00	NaN	NaN	NaN
2024-04-29	0.37	30.0	-1.111111	-1.111111
2024-04-30	0.00	44.0	6.666667	6.666667
2024-05-01	0.00	33.0	0.555556	0.555556
2024-05-02	0.35	39.0	3.888889	3.888889

	PRCP	TOBS	TCel	celcius_column
DATE
1989-01-01	0.00	7.0	-13.888889	-13.888889
1989-01-02	0.00	25.0	-3.888889	-3.888889
1989-01-03	0.00	19.0	-7.222222	-7.222222
1989-01-04	0.00	47.0	8.333333	8.333333
1989-01-05	0.00	27.0	-2.777778	-2.777778
...	...	...	...	...
2023-12-27	0.31	32.0	0.000000	0.000000
2023-12-28	0.00	17.0	-8.333333	-8.333333
2023-12-29	0.00	28.0	-2.222222	-2.222222
2023-12-30	0.00	NaN	NaN	NaN
2023-12-31	0.00	NaN	NaN	NaN

Climate change is impacting the way people live around the world¶

Get started with open reproducible science!¶

Human-readable and Machine-readable¶

What the fork?! Who wrote this?¶

Python packages let you use code written by experts around the world¶

There are more Earth Observation data online than any one person could ever look at¶

You can access NCEI GHCNd Data from the internet using its API 🖥️ 📡 🖥️¶

Download and get started working with NCEI data¶

Plot the precpitation column (PRCP) vs time to explore the data¶

Subsetting and Resampling¶

Now we are ready to calculate annual statistics¶

YOUR Rapid City PLOT HEADLINE HERE 📰 🗞️ 📻¶

Rapit City, SD: Average Temp from 1989 to 2023¶

THIS ISN’T THE END! 😄¶

Your turn: pick a new location and/or measurement to plot 🌏 📈¶

Congratulations, you’re almost done with this coding challenge 🤩 – now make sure that your code is reproducible¶

BONUS: Create a shareable Markdown of your work¶

	PRCP	TOBS	TCel	celcius_column
DATE
1989-01-01	0.056359	38.072829	3.373794	3.373794
1990-01-01	0.039068	40.363112	4.646174	4.646174
1991-01-01	0.056875	39.945869	4.414372	4.414372
1992-01-01	0.036714	39.525862	4.181034	4.181034
1993-01-01	0.055881	35.522581	1.956989	1.956989
1994-01-01	0.034540	39.479769	4.155427	4.155427
1995-01-01	0.063609	39.150568	3.972538	3.972538
1996-01-01	0.058785	36.547486	2.526381	2.526381
1997-01-01	0.057634	38.825073	3.791707	3.791707
1998-01-01	0.068343	40.563739	4.757633	4.757633
1999-01-01	0.073104	41.688202	5.382335	5.382335
2000-01-01	0.050771	39.750751	4.305973	4.305973
2001-01-01	0.049639	43.371134	6.317297	6.317297
2002-01-01	0.036126	33.482143	0.823413	0.823413
2003-01-01	0.039186	40.455253	4.697363	4.697363
2004-01-01	0.030242	38.877828	3.821016	3.821016
2005-01-01	0.044620	40.627119	4.792844	4.792844
2006-01-01	0.042870	40.873278	4.929599	4.929599
2007-01-01	0.038515	34.806931	1.559406	1.559406
2008-01-01	0.025892	34.204969	1.224983	1.224983
2009-01-01	0.053828	35.871324	2.150735	2.150735
2010-01-01	0.056767	39.012384	3.895769	3.895769
2011-01-01	0.060282	40.313846	4.618803	4.618803
2012-01-01	0.019341	42.008746	5.560415	5.560415
2013-01-01	0.060685	38.392638	3.551466	3.551466
2014-01-01	0.057726	39.211310	4.006283	4.006283
2015-01-01	0.057260	41.351275	5.195153	5.195153
2016-01-01	0.039508	42.161644	5.645358	5.645358
2017-01-01	0.034082	41.013889	5.007716	5.007716
2018-01-01	0.057335	36.670732	2.594851	2.594851
2019-01-01	0.085056	36.159544	2.310858	2.310858
2020-01-01	0.044006	41.023438	5.013021	5.013021
2021-01-01	0.032225	40.363248	4.646249	4.646249
2022-01-01	0.028421	39.331395	4.072997	4.072997
2023-01-01	0.046313	40.144578	4.524766	4.524766