import pandas as pd
age = {'Anna': 6, 'Teresa': 9}
age
{'Anna': 6, 'Teresa': 9}
age['Anna']
6
df = pd.DataFrame({'item': ('milk', 'apples', 'bread'),
'price': (3.50, 0.40, 3.10),
'n': (2, 5, 1)})
df
item | price | n | |
---|---|---|---|
0 | milk | 3.5 | 2 |
1 | apples | 0.4 | 5 |
2 | bread | 3.1 | 1 |
df.to_csv(path_or_buf='groceries.csv', index=False)
groceries = pd.read_csv(filepath_or_buffer='groceries.csv')
groceries
item | price | n | |
---|---|---|---|
0 | milk | 3.5 | 2 |
1 | apples | 0.4 | 5 |
2 | bread | 3.1 | 1 |
df = pd.read_csv('http://www.stat.wisc.edu/~jgillett/451/data/DJIA.csv', index_col='Symbol')
df
Exchange | Industry | State | Price | MarketCap | AvgVol | |
---|---|---|---|---|---|---|
Symbol | ||||||
MMM | NYSE | Conglomerate | Minnesota | 157.0 | 8.985700e+10 | 2690000.0 |
AXP | NYSE | Financial | New York | 198.0 | 1.503370e+11 | 4330000.0 |
AMGN | NASDAQ | Biopharmaceutical | California | 224.0 | 1.249590e+11 | 3110000.0 |
AAPL | NASDAQ | IT | California | 173.0 | 2.820000e+12 | 103560000.0 |
BA | NYSE | Aerospace | Illinois | 218.0 | 1.269370e+11 | 10350000.0 |
CAT | NYSE | Construction | Illinois | 203.0 | 1.100330e+11 | 3450000.0 |
CVX | NYSE | Petroleum | California | 134.0 | 2.588120e+11 | 12440000.0 |
CSCO | NASDAQ | IT | California | 54.0 | 2.288900e+11 | 23770000.0 |
KO | NYSE | Soft Drink | Georgia | 61.0 | 2.634360e+11 | 19550000.0 |
DIS | NYSE | Broadcasting | California | 155.0 | 2.816880e+11 | 13540000.0 |
DOW | NYSE | Chemical | Michigan | 62.0 | 4.551300e+10 | 5700000.0 |
GS | NYSE | Financial | New York | 364.0 | 1.214020e+11 | 3150000.0 |
HD | NYSE | Home Improvement | Georgia | 353.0 | 3.688880e+11 | 4310000.0 |
HON | NASDAQ | Conglomerate | North Carolina | 190.0 | 1.299830e+11 | 3450000.0 |
IBM | NYSE | IT | New York | 130.0 | 1.164680e+11 | 5920000.0 |
INTC | NASDAQ | Semiconductor | California | 48.0 | 1.972480e+11 | 36540000.0 |
JNJ | NYSE | Drug | New Jersey | 167.0 | 4.404600e+11 | 7940000.0 |
JPM | NYSE | Financial | New York | 155.0 | 4.555110e+11 | 14440000.0 |
MCD | NYSE | Food | Illinois | 254.0 | 1.896580e+11 | 2810000.0 |
MRK | NYSE | Drug | New Jersey | 78.0 | 1.965430e+11 | 14070000.0 |
MSFT | NASDAQ | IT | Washington | 300.0 | 2.253000e+12 | 35580000.0 |
NKE | NYSE | Apparel | Oregon | 146.0 | 2.302060e+11 | 5890000.0 |
PG | NYSE | Consumer | Ohio | 157.0 | 3.759090e+11 | 8610000.0 |
CRM | NYSE | IT | California | 214.0 | 2.110360e+11 | 7120000.0 |
TRV | NYSE | Insurance | New York | 172.0 | 4.139000e+10 | 1580000.0 |
UNH | NYSE | Health | Minnesota | 478.0 | 4.504210e+11 | 3370000.0 |
VZ | NYSE | Telecom | New York | 53.0 | 2.239960e+11 | 23190000.0 |
V | NYSE | Financial | California | 228.0 | 4.933530e+11 | 10880000.0 |
WBA | NASDAQ | Retailing | Illinois | 48.0 | 4.169600e+10 | 6620000.0 |
WMT | NYSE | Retailing | Arkansas | 134.0 | 3.727260e+11 | 9630000.0 |
df.info()
<class 'pandas.core.frame.DataFrame'> Index: 30 entries, MMM to WMT Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Exchange 30 non-null object 1 Industry 30 non-null object 2 State 30 non-null object 3 Price 30 non-null float64 4 MarketCap 30 non-null float64 5 AvgVol 30 non-null float64 dtypes: float64(3), object(3) memory usage: 1.6+ KB
df.shape
(30, 6)
df.head()
Exchange | Industry | State | Price | MarketCap | AvgVol | |
---|---|---|---|---|---|---|
Symbol | ||||||
MMM | NYSE | Conglomerate | Minnesota | 157.0 | 8.985700e+10 | 2690000.0 |
AXP | NYSE | Financial | New York | 198.0 | 1.503370e+11 | 4330000.0 |
AMGN | NASDAQ | Biopharmaceutical | California | 224.0 | 1.249590e+11 | 3110000.0 |
AAPL | NASDAQ | IT | California | 173.0 | 2.820000e+12 | 103560000.0 |
BA | NYSE | Aerospace | Illinois | 218.0 | 1.269370e+11 | 10350000.0 |
df.tail()
Exchange | Industry | State | Price | MarketCap | AvgVol | |
---|---|---|---|---|---|---|
Symbol | ||||||
UNH | NYSE | Health | Minnesota | 478.0 | 4.504210e+11 | 3370000.0 |
VZ | NYSE | Telecom | New York | 53.0 | 2.239960e+11 | 23190000.0 |
V | NYSE | Financial | California | 228.0 | 4.933530e+11 | 10880000.0 |
WBA | NASDAQ | Retailing | Illinois | 48.0 | 4.169600e+10 | 6620000.0 |
WMT | NYSE | Retailing | Arkansas | 134.0 | 3.727260e+11 | 9630000.0 |
df.index
Index(['MMM', 'AXP', 'AMGN', 'AAPL', 'BA', 'CAT', 'CVX', 'CSCO', 'KO', 'DIS', 'DOW', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'JPM', 'MCD', 'MRK', 'MSFT', 'NKE', 'PG', 'CRM', 'TRV', 'UNH', 'VZ', 'V', 'WBA', 'WMT'], dtype='object', name='Symbol')
df.columns
Index(['Exchange', 'Industry', 'State', 'Price', 'MarketCap', 'AvgVol'], dtype='object')
df.describe()
Price | MarketCap | AvgVol | |
---|---|---|---|
count | 30.000000 | 3.000000e+01 | 3.000000e+01 |
mean | 176.933333 | 3.803452e+11 | 1.358633e+07 |
std | 100.994344 | 6.047850e+11 | 1.927975e+07 |
min | 48.000000 | 4.139000e+10 | 1.580000e+06 |
25% | 131.000000 | 1.254535e+11 | 3.665000e+06 |
50% | 162.000000 | 2.175160e+11 | 7.530000e+06 |
75% | 217.000000 | 3.717665e+11 | 1.393750e+07 |
max | 478.000000 | 2.820000e+12 | 1.035600e+08 |
df.describe(include=[object])
Exchange | Industry | State | |
---|---|---|---|
count | 30 | 30 | 30 |
unique | 2 | 20 | 12 |
top | NYSE | IT | California |
freq | 23 | 5 | 8 |
groceries.describe()
price | n | |
---|---|---|
count | 3.000000 | 3.000000 |
mean | 2.333333 | 2.666667 |
std | 1.686219 | 2.081666 |
min | 0.400000 | 1.000000 |
25% | 1.750000 | 1.500000 |
50% | 3.100000 | 2.000000 |
75% | 3.300000 | 3.500000 |
max | 3.500000 | 5.000000 |
groceries.describe(include=[object])
item | |
---|---|
count | 3 |
unique | 3 |
top | milk |
freq | 1 |
pd.crosstab(df['State'], df['Exchange'])
Exchange | NASDAQ | NYSE |
---|---|---|
State | ||
Arkansas | 0 | 1 |
California | 4 | 4 |
Georgia | 0 | 2 |
Illinois | 1 | 3 |
Michigan | 0 | 1 |
Minnesota | 0 | 2 |
New Jersey | 0 | 2 |
New York | 0 | 6 |
North Carolina | 1 | 0 |
Ohio | 0 | 1 |
Oregon | 0 | 1 |
Washington | 1 | 0 |
df.max()
Exchange NYSE Industry Telecom State Washington Price 478.0 MarketCap 2820000000000.0 AvgVol 103560000.0 dtype: object
df['Price'].max() # peek ahead to "Select a subset" first bullet
478.0
df['MarketCap'].idxmax()
'AAPL'
df['Price'].mean()
176.93333333333334
df.Exchange.value_counts()
Exchange NYSE 23 NASDAQ 7 Name: count, dtype: int64
df.groupby('Exchange').groups.keys()
dict_keys(['NASDAQ', 'NYSE'])
df.groupby('Exchange').mean(numeric_only=True)
Price | MarketCap | AvgVol | |
---|---|---|---|
Exchange | |||
NASDAQ | 148.142857 | 8.279680e+11 | 3.037571e+07 |
NYSE | 185.695652 | 2.441122e+11 | 8.476522e+06 |
df.Price
Symbol MMM 157.0 AXP 198.0 AMGN 224.0 AAPL 173.0 BA 218.0 CAT 203.0 CVX 134.0 CSCO 54.0 KO 61.0 DIS 155.0 DOW 62.0 GS 364.0 HD 353.0 HON 190.0 IBM 130.0 INTC 48.0 JNJ 167.0 JPM 155.0 MCD 254.0 MRK 78.0 MSFT 300.0 NKE 146.0 PG 157.0 CRM 214.0 TRV 172.0 UNH 478.0 VZ 53.0 V 228.0 WBA 48.0 WMT 134.0 Name: Price, dtype: float64
df[['Price', 'MarketCap']]
Price | MarketCap | |
---|---|---|
Symbol | ||
MMM | 157.0 | 8.985700e+10 |
AXP | 198.0 | 1.503370e+11 |
AMGN | 224.0 | 1.249590e+11 |
AAPL | 173.0 | 2.820000e+12 |
BA | 218.0 | 1.269370e+11 |
CAT | 203.0 | 1.100330e+11 |
CVX | 134.0 | 2.588120e+11 |
CSCO | 54.0 | 2.288900e+11 |
KO | 61.0 | 2.634360e+11 |
DIS | 155.0 | 2.816880e+11 |
DOW | 62.0 | 4.551300e+10 |
GS | 364.0 | 1.214020e+11 |
HD | 353.0 | 3.688880e+11 |
HON | 190.0 | 1.299830e+11 |
IBM | 130.0 | 1.164680e+11 |
INTC | 48.0 | 1.972480e+11 |
JNJ | 167.0 | 4.404600e+11 |
JPM | 155.0 | 4.555110e+11 |
MCD | 254.0 | 1.896580e+11 |
MRK | 78.0 | 1.965430e+11 |
MSFT | 300.0 | 2.253000e+12 |
NKE | 146.0 | 2.302060e+11 |
PG | 157.0 | 3.759090e+11 |
CRM | 214.0 | 2.110360e+11 |
TRV | 172.0 | 4.139000e+10 |
UNH | 478.0 | 4.504210e+11 |
VZ | 53.0 | 2.239960e+11 |
V | 228.0 | 4.933530e+11 |
WBA | 48.0 | 4.169600e+10 |
WMT | 134.0 | 3.727260e+11 |
df.loc['AAPL']
Exchange NASDAQ Industry IT State California Price 173.0 MarketCap 2820000000000.0 AvgVol 103560000.0 Name: AAPL, dtype: object
df[0:3]
Exchange | Industry | State | Price | MarketCap | AvgVol | |
---|---|---|---|---|---|---|
Symbol | ||||||
MMM | NYSE | Conglomerate | Minnesota | 157.0 | 8.985700e+10 | 2690000.0 |
AXP | NYSE | Financial | New York | 198.0 | 1.503370e+11 | 4330000.0 |
AMGN | NASDAQ | Biopharmaceutical | California | 224.0 | 1.249590e+11 | 3110000.0 |
df.iloc[0:3, [0, 2, 3]]
Exchange | State | Price | |
---|---|---|---|
Symbol | |||
MMM | NYSE | Minnesota | 157.0 |
AXP | NYSE | New York | 198.0 |
AMGN | NASDAQ | California | 224.0 |
df.loc['AAPL', 'State']
'California'
df.loc[['AAPL', 'MMM'], ['State', 'Industry']]
State | Industry | |
---|---|---|
Symbol | ||
AAPL | California | IT |
MMM | Minnesota | Conglomerate |
df[df.Exchange == 'NASDAQ']
Exchange | Industry | State | Price | MarketCap | AvgVol | |
---|---|---|---|---|---|---|
Symbol | ||||||
AMGN | NASDAQ | Biopharmaceutical | California | 224.0 | 1.249590e+11 | 3110000.0 |
AAPL | NASDAQ | IT | California | 173.0 | 2.820000e+12 | 103560000.0 |
CSCO | NASDAQ | IT | California | 54.0 | 2.288900e+11 | 23770000.0 |
HON | NASDAQ | Conglomerate | North Carolina | 190.0 | 1.299830e+11 | 3450000.0 |
INTC | NASDAQ | Semiconductor | California | 48.0 | 1.972480e+11 | 36540000.0 |
MSFT | NASDAQ | IT | Washington | 300.0 | 2.253000e+12 | 35580000.0 |
WBA | NASDAQ | Retailing | Illinois | 48.0 | 4.169600e+10 | 6620000.0 |
df.sort_index(axis=0, ascending=True) # also try 1, False
Exchange | Industry | State | Price | MarketCap | AvgVol | |
---|---|---|---|---|---|---|
Symbol | ||||||
AAPL | NASDAQ | IT | California | 173.0 | 2.820000e+12 | 103560000.0 |
AMGN | NASDAQ | Biopharmaceutical | California | 224.0 | 1.249590e+11 | 3110000.0 |
AXP | NYSE | Financial | New York | 198.0 | 1.503370e+11 | 4330000.0 |
BA | NYSE | Aerospace | Illinois | 218.0 | 1.269370e+11 | 10350000.0 |
CAT | NYSE | Construction | Illinois | 203.0 | 1.100330e+11 | 3450000.0 |
CRM | NYSE | IT | California | 214.0 | 2.110360e+11 | 7120000.0 |
CSCO | NASDAQ | IT | California | 54.0 | 2.288900e+11 | 23770000.0 |
CVX | NYSE | Petroleum | California | 134.0 | 2.588120e+11 | 12440000.0 |
DIS | NYSE | Broadcasting | California | 155.0 | 2.816880e+11 | 13540000.0 |
DOW | NYSE | Chemical | Michigan | 62.0 | 4.551300e+10 | 5700000.0 |
GS | NYSE | Financial | New York | 364.0 | 1.214020e+11 | 3150000.0 |
HD | NYSE | Home Improvement | Georgia | 353.0 | 3.688880e+11 | 4310000.0 |
HON | NASDAQ | Conglomerate | North Carolina | 190.0 | 1.299830e+11 | 3450000.0 |
IBM | NYSE | IT | New York | 130.0 | 1.164680e+11 | 5920000.0 |
INTC | NASDAQ | Semiconductor | California | 48.0 | 1.972480e+11 | 36540000.0 |
JNJ | NYSE | Drug | New Jersey | 167.0 | 4.404600e+11 | 7940000.0 |
JPM | NYSE | Financial | New York | 155.0 | 4.555110e+11 | 14440000.0 |
KO | NYSE | Soft Drink | Georgia | 61.0 | 2.634360e+11 | 19550000.0 |
MCD | NYSE | Food | Illinois | 254.0 | 1.896580e+11 | 2810000.0 |
MMM | NYSE | Conglomerate | Minnesota | 157.0 | 8.985700e+10 | 2690000.0 |
MRK | NYSE | Drug | New Jersey | 78.0 | 1.965430e+11 | 14070000.0 |
MSFT | NASDAQ | IT | Washington | 300.0 | 2.253000e+12 | 35580000.0 |
NKE | NYSE | Apparel | Oregon | 146.0 | 2.302060e+11 | 5890000.0 |
PG | NYSE | Consumer | Ohio | 157.0 | 3.759090e+11 | 8610000.0 |
TRV | NYSE | Insurance | New York | 172.0 | 4.139000e+10 | 1580000.0 |
UNH | NYSE | Health | Minnesota | 478.0 | 4.504210e+11 | 3370000.0 |
V | NYSE | Financial | California | 228.0 | 4.933530e+11 | 10880000.0 |
VZ | NYSE | Telecom | New York | 53.0 | 2.239960e+11 | 23190000.0 |
WBA | NASDAQ | Retailing | Illinois | 48.0 | 4.169600e+10 | 6620000.0 |
WMT | NYSE | Retailing | Arkansas | 134.0 | 3.727260e+11 | 9630000.0 |
df.sort_values(by=['Industry', 'State'], axis=0, ascending=False) # try [False, True]
Exchange | Industry | State | Price | MarketCap | AvgVol | |
---|---|---|---|---|---|---|
Symbol | ||||||
VZ | NYSE | Telecom | New York | 53.0 | 2.239960e+11 | 23190000.0 |
KO | NYSE | Soft Drink | Georgia | 61.0 | 2.634360e+11 | 19550000.0 |
INTC | NASDAQ | Semiconductor | California | 48.0 | 1.972480e+11 | 36540000.0 |
WBA | NASDAQ | Retailing | Illinois | 48.0 | 4.169600e+10 | 6620000.0 |
WMT | NYSE | Retailing | Arkansas | 134.0 | 3.727260e+11 | 9630000.0 |
CVX | NYSE | Petroleum | California | 134.0 | 2.588120e+11 | 12440000.0 |
TRV | NYSE | Insurance | New York | 172.0 | 4.139000e+10 | 1580000.0 |
MSFT | NASDAQ | IT | Washington | 300.0 | 2.253000e+12 | 35580000.0 |
IBM | NYSE | IT | New York | 130.0 | 1.164680e+11 | 5920000.0 |
AAPL | NASDAQ | IT | California | 173.0 | 2.820000e+12 | 103560000.0 |
CSCO | NASDAQ | IT | California | 54.0 | 2.288900e+11 | 23770000.0 |
CRM | NYSE | IT | California | 214.0 | 2.110360e+11 | 7120000.0 |
HD | NYSE | Home Improvement | Georgia | 353.0 | 3.688880e+11 | 4310000.0 |
UNH | NYSE | Health | Minnesota | 478.0 | 4.504210e+11 | 3370000.0 |
MCD | NYSE | Food | Illinois | 254.0 | 1.896580e+11 | 2810000.0 |
AXP | NYSE | Financial | New York | 198.0 | 1.503370e+11 | 4330000.0 |
GS | NYSE | Financial | New York | 364.0 | 1.214020e+11 | 3150000.0 |
JPM | NYSE | Financial | New York | 155.0 | 4.555110e+11 | 14440000.0 |
V | NYSE | Financial | California | 228.0 | 4.933530e+11 | 10880000.0 |
JNJ | NYSE | Drug | New Jersey | 167.0 | 4.404600e+11 | 7940000.0 |
MRK | NYSE | Drug | New Jersey | 78.0 | 1.965430e+11 | 14070000.0 |
PG | NYSE | Consumer | Ohio | 157.0 | 3.759090e+11 | 8610000.0 |
CAT | NYSE | Construction | Illinois | 203.0 | 1.100330e+11 | 3450000.0 |
HON | NASDAQ | Conglomerate | North Carolina | 190.0 | 1.299830e+11 | 3450000.0 |
MMM | NYSE | Conglomerate | Minnesota | 157.0 | 8.985700e+10 | 2690000.0 |
DOW | NYSE | Chemical | Michigan | 62.0 | 4.551300e+10 | 5700000.0 |
DIS | NYSE | Broadcasting | California | 155.0 | 2.816880e+11 | 13540000.0 |
AMGN | NASDAQ | Biopharmaceutical | California | 224.0 | 1.249590e+11 | 3110000.0 |
NKE | NYSE | Apparel | Oregon | 146.0 | 2.302060e+11 | 5890000.0 |
BA | NYSE | Aerospace | Illinois | 218.0 | 1.269370e+11 | 10350000.0 |
groceries['weight'] = (8, 0.3, 1.0)
groceries
item | price | n | weight | |
---|---|---|---|---|
0 | milk | 3.5 | 2 | 8.0 |
1 | apples | 0.4 | 5 | 0.3 |
2 | bread | 3.1 | 1 | 1.0 |
new_row = pd.DataFrame({'item':'popcorn', 'price':3, 'n':1, 'weight':2}, index=[0])
new_row
item | price | n | weight | |
---|---|---|---|---|
0 | popcorn | 3 | 1 | 2 |
pd.concat([groceries, new_row], ignore_index=True) # ignore to reset second 0
item | price | n | weight | |
---|---|---|---|---|
0 | milk | 3.5 | 2 | 8.0 |
1 | apples | 0.4 | 5 | 0.3 |
2 | bread | 3.1 | 1 | 1.0 |
3 | popcorn | 3.0 | 1 | 2.0 |
# average Price of NASDAQ stocks
df[df.Exchange == 'NASDAQ'].Price.mean() # try NYSE
148.14285714285714
# highest Price among the NASDAQ stocks with AvgVol under 10 million
df[(df.Exchange == 'NASDAQ') & (df.AvgVol < 1e7)].Price.max()
224.0
# average MarketCap of 3 highest-Price stocks
df.sort_values(by='Price', ascending=False)[0:3].MarketCap.mean()
313570333333.3333