import pandas as pd


age = {'Anna': 6, 'Teresa': 9}
age

{'Anna': 6, 'Teresa': 9}


age['Anna']

6


df = pd.DataFrame({'item': ('milk', 'apples', 'bread'),
                   'price': (3.50, 0.40, 3.10),
                   'n': (2, 5, 1)})
df


df.to_csv(path_or_buf='groceries.csv', index=False)
groceries = pd.read_csv(filepath_or_buffer='groceries.csv')
groceries


df = pd.read_csv('http://www.stat.wisc.edu/~jgillett/451/data/DJIA.csv', index_col='Symbol')
df


df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, MMM to WMT
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Exchange   30 non-null     object 
 1   Industry   30 non-null     object 
 2   State      30 non-null     object 
 3   Price      30 non-null     float64
 4   MarketCap  30 non-null     float64
 5   AvgVol     30 non-null     float64
dtypes: float64(3), object(3)
memory usage: 1.6+ KB


df.shape

(30, 6)


df.head()


df.tail()


df.index

Index(['MMM', 'AXP', 'AMGN', 'AAPL', 'BA', 'CAT', 'CVX', 'CSCO', 'KO', 'DIS',
       'DOW', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'JPM', 'MCD', 'MRK',
       'MSFT', 'NKE', 'PG', 'CRM', 'TRV', 'UNH', 'VZ', 'V', 'WBA', 'WMT'],
      dtype='object', name='Symbol')


df.columns

Index(['Exchange', 'Industry', 'State', 'Price', 'MarketCap', 'AvgVol'], dtype='object')


df.describe()


df.describe(include=[object])


groceries.describe()


groceries.describe(include=[object])


pd.crosstab(df['State'], df['Exchange'])


df.max()

Exchange                NYSE
Industry             Telecom
State             Washington
Price                  478.0
MarketCap    2820000000000.0
AvgVol           103560000.0
dtype: object


df['Price'].max() # peek ahead to "Select a subset" first bullet

478.0


df['MarketCap'].idxmax()

'AAPL'


df['Price'].mean()

176.93333333333334


df.Exchange.value_counts()

Exchange
NYSE      23
NASDAQ     7
Name: count, dtype: int64


df.groupby('Exchange').groups.keys()

dict_keys(['NASDAQ', 'NYSE'])


df.groupby('Exchange').mean(numeric_only=True)


df.Price

Symbol
MMM     157.0
AXP     198.0
AMGN    224.0
AAPL    173.0
BA      218.0
CAT     203.0
CVX     134.0
CSCO     54.0
KO       61.0
DIS     155.0
DOW      62.0
GS      364.0
HD      353.0
HON     190.0
IBM     130.0
INTC     48.0
JNJ     167.0
JPM     155.0
MCD     254.0
MRK      78.0
MSFT    300.0
NKE     146.0
PG      157.0
CRM     214.0
TRV     172.0
UNH     478.0
VZ       53.0
V       228.0
WBA      48.0
WMT     134.0
Name: Price, dtype: float64


df[['Price', 'MarketCap']]


df.loc['AAPL']

Exchange              NASDAQ
Industry                  IT
State             California
Price                  173.0
MarketCap    2820000000000.0
AvgVol           103560000.0
Name: AAPL, dtype: object


df[0:3]


df.iloc[0:3, [0, 2, 3]]


df.loc['AAPL', 'State']

'California'


df.loc[['AAPL', 'MMM'], ['State', 'Industry']]


df[df.Exchange == 'NASDAQ']


df.sort_index(axis=0, ascending=True) # also try 1, False


df.sort_values(by=['Industry', 'State'], axis=0, ascending=False) # try [False, True]


groceries['weight'] = (8, 0.3, 1.0)
groceries


new_row = pd.DataFrame({'item':'popcorn', 'price':3, 'n':1, 'weight':2}, index=[0])
new_row


pd.concat([groceries, new_row], ignore_index=True) # ignore to reset second 0


# average Price of NASDAQ stocks
df[df.Exchange == 'NASDAQ'].Price.mean() # try NYSE

148.14285714285714


# highest Price among the NASDAQ stocks with AvgVol under 10 million
df[(df.Exchange == 'NASDAQ') & (df.AvgVol < 1e7)].Price.max()

224.0


# average MarketCap of 3 highest-Price stocks
df.sort_values(by='Price', ascending=False)[0:3].MarketCap.mean()

313570333333.3333

	Price	MarketCap	AvgVol
count	30.000000	3.000000e+01	3.000000e+01
mean	176.933333	3.803452e+11	1.358633e+07
std	100.994344	6.047850e+11	1.927975e+07
min	48.000000	4.139000e+10	1.580000e+06
25%	131.000000	1.254535e+11	3.665000e+06
50%	162.000000	2.175160e+11	7.530000e+06
75%	217.000000	3.717665e+11	1.393750e+07
max	478.000000	2.820000e+12	1.035600e+08

	price	n
count	3.000000	3.000000
mean	2.333333	2.666667
std	1.686219	2.081666
min	0.400000	1.000000
25%	1.750000	1.500000
50%	3.100000	2.000000
75%	3.300000	3.500000
max	3.500000	5.000000

Exchange	NASDAQ	NYSE
State
Arkansas	0	1
California	4	4
Georgia	0	2
Illinois	1	3
Michigan	0	1
Minnesota	0	2
New Jersey	0	2
New York	0	6
North Carolina	1	0
Ohio	0	1
Oregon	0	1
Washington	1	0

Pandas¶

Background: dictionary¶

Create, write, and read a DataFrame¶

Inspect¶

Select a subset¶

Sort¶

Add to a DataFrame¶

Examples¶

	Exchange	Industry	State	Price	MarketCap	AvgVol
Symbol
MMM	NYSE	Conglomerate	Minnesota	157.0	8.985700e+10	2690000.0
AXP	NYSE	Financial	New York	198.0	1.503370e+11	4330000.0
AMGN	NASDAQ	Biopharmaceutical	California	224.0	1.249590e+11	3110000.0
AAPL	NASDAQ	IT	California	173.0	2.820000e+12	103560000.0
BA	NYSE	Aerospace	Illinois	218.0	1.269370e+11	10350000.0
CAT	NYSE	Construction	Illinois	203.0	1.100330e+11	3450000.0
CVX	NYSE	Petroleum	California	134.0	2.588120e+11	12440000.0
CSCO	NASDAQ	IT	California	54.0	2.288900e+11	23770000.0
KO	NYSE	Soft Drink	Georgia	61.0	2.634360e+11	19550000.0
DIS	NYSE	Broadcasting	California	155.0	2.816880e+11	13540000.0
DOW	NYSE	Chemical	Michigan	62.0	4.551300e+10	5700000.0
GS	NYSE	Financial	New York	364.0	1.214020e+11	3150000.0
HD	NYSE	Home Improvement	Georgia	353.0	3.688880e+11	4310000.0
HON	NASDAQ	Conglomerate	North Carolina	190.0	1.299830e+11	3450000.0
IBM	NYSE	IT	New York	130.0	1.164680e+11	5920000.0
INTC	NASDAQ	Semiconductor	California	48.0	1.972480e+11	36540000.0
JNJ	NYSE	Drug	New Jersey	167.0	4.404600e+11	7940000.0
JPM	NYSE	Financial	New York	155.0	4.555110e+11	14440000.0
MCD	NYSE	Food	Illinois	254.0	1.896580e+11	2810000.0
MRK	NYSE	Drug	New Jersey	78.0	1.965430e+11	14070000.0
MSFT	NASDAQ	IT	Washington	300.0	2.253000e+12	35580000.0
NKE	NYSE	Apparel	Oregon	146.0	2.302060e+11	5890000.0
PG	NYSE	Consumer	Ohio	157.0	3.759090e+11	8610000.0
CRM	NYSE	IT	California	214.0	2.110360e+11	7120000.0
TRV	NYSE	Insurance	New York	172.0	4.139000e+10	1580000.0
UNH	NYSE	Health	Minnesota	478.0	4.504210e+11	3370000.0
VZ	NYSE	Telecom	New York	53.0	2.239960e+11	23190000.0
V	NYSE	Financial	California	228.0	4.933530e+11	10880000.0
WBA	NASDAQ	Retailing	Illinois	48.0	4.169600e+10	6620000.0
WMT	NYSE	Retailing	Arkansas	134.0	3.727260e+11	9630000.0

	Price	MarketCap	AvgVol
Exchange
NASDAQ	148.142857	8.279680e+11	3.037571e+07
NYSE	185.695652	2.441122e+11	8.476522e+06

	item	price	n
0	milk	3.5	2
1	apples	0.4	5
2	bread	3.1	1

	item	price	n
0	milk	3.5	2
1	apples	0.4	5
2	bread	3.1	1

	item
count	3
unique	3
top	milk
freq	1

	item	price	n	weight
0	milk	3.5	2	8.0
1	apples	0.4	5	0.3
2	bread	3.1	1	1.0