成为华尔街金融巨鳄第三课: Pandas2:学会使用Pandas-DataFrame

import pandas as pd
import numpy as np
pd.DataFrame({"one":[1,2,3],'two':[4,5,6]})

onetwo014125236

pd.DataFrame({"one":[1,2,3],'two':[4,5,6]},index=['a','b','c'])

onetwoa14b25c36

pd.DataFrame({'one':pd.Series([1,2,3],index=['a','b','c']),'two':pd.Series([1,2,3,4],index=['d','c','a','b'])})

onetwoa1.03b2.04c3.02dNaN1

pd.read_csv('test.csv')

abc012312462369

df = pd.DataFrame({'one':pd.Series([1,2,3],index=['a','b','c']),'two':pd.Series([1,2,3,4],index=['d','c','a','b'])})
df

onetwoa1.03b2.04c3.02dNaN1


df.to_csv('test2.csv')

1、index、columns和vlues属性

df = pd.DataFrame({'one':pd.Series([1,2,3],index=['a','b','c']),'two':pd.Series([1,2,3,4],index=['d','c','a','b'])})
df

onetwoa1.03b2.04c3.02dNaN1


df.index
Index(['a', 'b', 'c', 'd'], dtype='object')

df.columns
Index(['one', 'two'], dtype='object')

df.values
array([[ 1.,  3.],
       [ 2.,  4.],
       [ 3.,  2.],
       [nan,  1.]])

2.T属性

df.T

abcdone1.02.03.0NaNtwo3.04.02.01.0

3.describe()方法

df.describe()

onetwocount3.04.000000mean2.02.500000std1.01.290994min1.01.00000025%1.51.75000050%2.02.50000075%2.53.250000max3.04.000000

count: 该列数据共有多少条
mean:该列数据平均值
std:该列数据标准差
min:该列数据最小值
25%:该列数据从小到大25%位置上的数
50%:该列数据中位数
75%:该列数据从小到大75%位置上的数
max:该列数据最大值
df = pd.DataFrame({'one':pd.Series([1,2,3],index=['a','b','c']),'two':pd.Series([1,2,3,4],index=['d','c','a','b'])})

onetwoa1.03b2.04c3.02dNaN1

eg:取第one行第a列的1.0

df['one']['a']
1.0
df.loc['a','one']
1.0

df['one']
a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

df['a']

c:\users\lenovo\appdata\local\programs\python\python37\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   2904             if self.columns.nlevels > 1:
   2905                 return self._getitem_multilevel(key)
-> 2906             indexer = self.columns.get_loc(key)
   2907             if is_integer(indexer):
   2908                 indexer = [indexer]

c:\users\lenovo\appdata\local\programs\python\python37\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   2895                 return self._engine.get_loc(casted_key)
   2896             except KeyError as err:
-> 2897                 raise KeyError(key) from err
   2898
   2899         if tolerance is not None:

KeyError: 'a'
</module></ipython-input-34-9637ce7feee6>

df.loc['a',:]
one    1.0
two    3.0
Name: a, dtype: float64
df = pd.DataFrame({'one':pd.Series([1,2,3],index=['a','b','c']),'two':pd.Series([1,2,3,4],index=['d','c','a','b'])})
df

onetwoa1.03b2.04c3.02dNaN1

eg:


df.loc[['a','b'],'one':'two']

onetwoa1.03b2.04

df = pd.DataFrame({'one':pd.Series([1,2,3],index=['a','b','c']),'two':pd.Series([1,2,3,4],index=['d','c','a','b'])})
df

onetwoa1.03b2.04c3.02dNaN1

df2 = pd.DataFrame({'two':[1,2,3,4],'one':[5,6,7,8]},index = ['d','c','a','b'])
df2

twooned15c26a37b48

df + df2

onetwoa8.06b10.08c9.04dNaN2

df

onetwoa1.03b2.04c3.02dNaN1


df.fillna(0)

onetwoa1.03b2.04c3.02d0.01


df.dropna()

onetwoa1.03b2.04c3.02

df

onetwoa1.03b2.04c3.02dNaN1

df.loc['c','two'] = np.nan
df.loc['d','two'] = np.nan
df

onetwoa1.03.0b2.04.0c3.0NaNdNaNNaN


df.dropna(how = 'all')

onetwoa1.03.0b2.04.0c3.0NaN

df2

twooned15c26a37b48

df2.loc['a','two'] = np.nan
df2

twooned1.05c2.06aNaN7b4.08


df2.dropna(axis=1)

oned5c6a7b8


df2.isnull()

twoonedFalseFalsecFalseFalseaTrueFalsebFalseFalse

1.求平均值

df = df2
df

twooned1.05c2.06aNaN7b4.08

df.mean()
two    2.333333
one    6.500000
dtype: float64
df.mean(axis=1)
d    3.0
c    4.0
a    7.0
b    6.0
dtype: float64

2.按值排序 : sort_values()

df

twooned1.05c2.06aNaN7b4.08


df.sort_values(by = 'two')

twooned1.05c2.06b4.08aNaN7


df.sort_values(by = 'two',ascending=False)

twooneb4.08c2.06d1.05aNaN7


df.sort_values(by = 'd',ascending=False,axis=1)

onetwod51.0c62.0a7NaNb84.0

3.按列排序 : sort_index()

df

twooned1.05c2.06aNaN7b4.08

df.sort_index()

twooneaNaN7b4.08c2.06d1.05


df.sort_index(ascending = False)

twooned1.05c2.06b4.08aNaN7


df.sort_index(ascending = True , axis = 1)

onetwod51.0c62.0a7NaNb84.0

1.pandas时间对象处理


pd.to_datetime(["2021-01-10","2021/MAY/1"])
DatetimeIndex(['2021-01-10', '2021-05-01'], dtype='datetime64[ns]', freq=None)

2.pandas时间对象自动生成

start&#xFF1A;&#x5F00;&#x59CB;&#x65F6;&#x95F4;
end&#xFF1A;&#x7ED3;&#x675F;&#x65F6;&#x95F4;
periods&#xFF1A;&#x53EF;&#x4EE5;&#x6307;&#x5B9A;start&#x4E0D;&#x6307;&#x5B9A;end&#x6539;&#x6307;&#x5B9A;periods&#xFF0C;&#x6307;&#x7684;&#x662F;&#x751F;&#x6210;&#x4ECE;start&#x5F00;&#x59CB;&#x7684;periods&#x5929;&#x65F6;&#x95F4;&#xFF0C;&#x540C;&#x6837;&#x7684;&#xFF0C;&#x53EF;&#x4EE5;&#x6307;&#x5B9A;end&#x4E0D;&#x6307;&#x5B9A;start&#x6539;&#x6307;&#x5B9A;periods

pd.date_range('2021-01-01','2021-03-01')
DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',
               '2021-01-05', '2021-01-06', '2021-01-07', '2021-01-08',
               '2021-01-09', '2021-01-10', '2021-01-11', '2021-01-12',
               '2021-01-13', '2021-01-14', '2021-01-15', '2021-01-16',
               '2021-01-17', '2021-01-18', '2021-01-19', '2021-01-20',
               '2021-01-21', '2021-01-22', '2021-01-23', '2021-01-24',
               '2021-01-25', '2021-01-26', '2021-01-27', '2021-01-28',
               '2021-01-29', '2021-01-30', '2021-01-31', '2021-02-01',
               '2021-02-02', '2021-02-03', '2021-02-04', '2021-02-05',
               '2021-02-06', '2021-02-07', '2021-02-08', '2021-02-09',
               '2021-02-10', '2021-02-11', '2021-02-12', '2021-02-13',
               '2021-02-14', '2021-02-15', '2021-02-16', '2021-02-17',
               '2021-02-18', '2021-02-19', '2021-02-20', '2021-02-21',
               '2021-02-22', '2021-02-23', '2021-02-24', '2021-02-25',
               '2021-02-26', '2021-02-27', '2021-02-28', '2021-03-01'],
              dtype='datetime64[ns]', freq='D')

pd.date_range('2021-01-01',periods=60)
DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',
               '2021-01-05', '2021-01-06', '2021-01-07', '2021-01-08',
               '2021-01-09', '2021-01-10', '2021-01-11', '2021-01-12',
               '2021-01-13', '2021-01-14', '2021-01-15', '2021-01-16',
               '2021-01-17', '2021-01-18', '2021-01-19', '2021-01-20',
               '2021-01-21', '2021-01-22', '2021-01-23', '2021-01-24',
               '2021-01-25', '2021-01-26', '2021-01-27', '2021-01-28',
               '2021-01-29', '2021-01-30', '2021-01-31', '2021-02-01',
               '2021-02-02', '2021-02-03', '2021-02-04', '2021-02-05',
               '2021-02-06', '2021-02-07', '2021-02-08', '2021-02-09',
               '2021-02-10', '2021-02-11', '2021-02-12', '2021-02-13',
               '2021-02-14', '2021-02-15', '2021-02-16', '2021-02-17',
               '2021-02-18', '2021-02-19', '2021-02-20', '2021-02-21',
               '2021-02-22', '2021-02-23', '2021-02-24', '2021-02-25',
               '2021-02-26', '2021-02-27', '2021-02-28', '2021-03-01'],
              dtype='datetime64[ns]', freq='D')
freq&#xFF1A;&#x6307;&#x5B9A;&#x751F;&#x6210;&#x7684;&#x65F6;&#x95F4;&#x95F4;&#x9694;&#x5355;&#x4F4D;&#xFF0C;&#x9ED8;&#x8BA4;&#x4E3A;D&#xFF08;&#x5929;&#xFF09;&#xFF0C;&#x6B64;&#x5916;&#xFF0C;&#x8FD8;&#x6709;&#x2018;H&#x2019;&#xFF08;&#x5C0F;&#x65F6;&#xFF09;&#xFF0C;&#x2018;W&#x2019;&#xFF08;&#x5468;&#xFF09;&#x7B49;
W&#xFF08;&#x5468;&#xFF09;&#x5206;&#x4E3A;&#xFF0C;&#x2018;W-MON&#x2019;&#x8868;&#x793A;&#x4ECE;start&#x5F00;&#x59CB;&#x8F93;&#x51FA;&#x6BCF;&#x5468;&#x5468;&#x4E00;&#xFF0C;&#x9ED8;&#x8BA4;&#x53EA;&#x8F93;&#x5165;W&#x8868;&#x793A;'W-SUN'
B&#xFF1A;&#x53EA;&#x8F93;&#x51FA;&#x5DE5;&#x4F5C;&#x65E5;

pd.date_range('2021-01-01',periods=60,freq='H')
DatetimeIndex(['2021-01-01 00:00:00', '2021-01-01 01:00:00',
               '2021-01-01 02:00:00', '2021-01-01 03:00:00',
               '2021-01-01 04:00:00', '2021-01-01 05:00:00',
               '2021-01-01 06:00:00', '2021-01-01 07:00:00',
               '2021-01-01 08:00:00', '2021-01-01 09:00:00',
               '2021-01-01 10:00:00', '2021-01-01 11:00:00',
               '2021-01-01 12:00:00', '2021-01-01 13:00:00',
               '2021-01-01 14:00:00', '2021-01-01 15:00:00',
               '2021-01-01 16:00:00', '2021-01-01 17:00:00',
               '2021-01-01 18:00:00', '2021-01-01 19:00:00',
               '2021-01-01 20:00:00', '2021-01-01 21:00:00',
               '2021-01-01 22:00:00', '2021-01-01 23:00:00',
               '2021-01-02 00:00:00', '2021-01-02 01:00:00',
               '2021-01-02 02:00:00', '2021-01-02 03:00:00',
               '2021-01-02 04:00:00', '2021-01-02 05:00:00',
               '2021-01-02 06:00:00', '2021-01-02 07:00:00',
               '2021-01-02 08:00:00', '2021-01-02 09:00:00',
               '2021-01-02 10:00:00', '2021-01-02 11:00:00',
               '2021-01-02 12:00:00', '2021-01-02 13:00:00',
               '2021-01-02 14:00:00', '2021-01-02 15:00:00',
               '2021-01-02 16:00:00', '2021-01-02 17:00:00',
               '2021-01-02 18:00:00', '2021-01-02 19:00:00',
               '2021-01-02 20:00:00', '2021-01-02 21:00:00',
               '2021-01-02 22:00:00', '2021-01-02 23:00:00',
               '2021-01-03 00:00:00', '2021-01-03 01:00:00',
               '2021-01-03 02:00:00', '2021-01-03 03:00:00',
               '2021-01-03 04:00:00', '2021-01-03 05:00:00',
               '2021-01-03 06:00:00', '2021-01-03 07:00:00',
               '2021-01-03 08:00:00', '2021-01-03 09:00:00',
               '2021-01-03 10:00:00', '2021-01-03 11:00:00'],
              dtype='datetime64[ns]', freq='H')

pd.date_range('2021-01-01',periods=60,freq='W')
DatetimeIndex(['2021-01-03', '2021-01-10', '2021-01-17', '2021-01-24',
               '2021-01-31', '2021-02-07', '2021-02-14', '2021-02-21',
               '2021-02-28', '2021-03-07', '2021-03-14', '2021-03-21',
               '2021-03-28', '2021-04-04', '2021-04-11', '2021-04-18',
               '2021-04-25', '2021-05-02', '2021-05-09', '2021-05-16',
               '2021-05-23', '2021-05-30', '2021-06-06', '2021-06-13',
               '2021-06-20', '2021-06-27', '2021-07-04', '2021-07-11',
               '2021-07-18', '2021-07-25', '2021-08-01', '2021-08-08',
               '2021-08-15', '2021-08-22', '2021-08-29', '2021-09-05',
               '2021-09-12', '2021-09-19', '2021-09-26', '2021-10-03',
               '2021-10-10', '2021-10-17', '2021-10-24', '2021-10-31',
               '2021-11-07', '2021-11-14', '2021-11-21', '2021-11-28',
               '2021-12-05', '2021-12-12', '2021-12-19', '2021-12-26',
               '2022-01-02', '2022-01-09', '2022-01-16', '2022-01-23',
               '2022-01-30', '2022-02-06', '2022-02-13', '2022-02-20'],
              dtype='datetime64[ns]', freq='W-SUN')

pd.date_range('2021-01-01',periods=60,freq='W-Fri')
DatetimeIndex(['2021-01-01', '2021-01-08', '2021-01-15', '2021-01-22',
               '2021-01-29', '2021-02-05', '2021-02-12', '2021-02-19',
               '2021-02-26', '2021-03-05', '2021-03-12', '2021-03-19',
               '2021-03-26', '2021-04-02', '2021-04-09', '2021-04-16',
               '2021-04-23', '2021-04-30', '2021-05-07', '2021-05-14',
               '2021-05-21', '2021-05-28', '2021-06-04', '2021-06-11',
               '2021-06-18', '2021-06-25', '2021-07-02', '2021-07-09',
               '2021-07-16', '2021-07-23', '2021-07-30', '2021-08-06',
               '2021-08-13', '2021-08-20', '2021-08-27', '2021-09-03',
               '2021-09-10', '2021-09-17', '2021-09-24', '2021-10-01',
               '2021-10-08', '2021-10-15', '2021-10-22', '2021-10-29',
               '2021-11-05', '2021-11-12', '2021-11-19', '2021-11-26',
               '2021-12-03', '2021-12-10', '2021-12-17', '2021-12-24',
               '2021-12-31', '2022-01-07', '2022-01-14', '2022-01-21',
               '2022-01-28', '2022-02-04', '2022-02-11', '2022-02-18'],
              dtype='datetime64[ns]', freq='W-FRI')

pd.date_range('2021-01-01',periods=60,freq='B')
DatetimeIndex(['2021-01-01', '2021-01-04', '2021-01-05', '2021-01-06',
               '2021-01-07', '2021-01-08', '2021-01-11', '2021-01-12',
               '2021-01-13', '2021-01-14', '2021-01-15', '2021-01-18',
               '2021-01-19', '2021-01-20', '2021-01-21', '2021-01-22',
               '2021-01-25', '2021-01-26', '2021-01-27', '2021-01-28',
               '2021-01-29', '2021-02-01', '2021-02-02', '2021-02-03',
               '2021-02-04', '2021-02-05', '2021-02-08', '2021-02-09',
               '2021-02-10', '2021-02-11', '2021-02-12', '2021-02-15',
               '2021-02-16', '2021-02-17', '2021-02-18', '2021-02-19',
               '2021-02-22', '2021-02-23', '2021-02-24', '2021-02-25',
               '2021-02-26', '2021-03-01', '2021-03-02', '2021-03-03',
               '2021-03-04', '2021-03-05', '2021-03-08', '2021-03-09',
               '2021-03-10', '2021-03-11', '2021-03-12', '2021-03-15',
               '2021-03-16', '2021-03-17', '2021-03-18', '2021-03-19',
               '2021-03-22', '2021-03-23', '2021-03-24', '2021-03-25'],
              dtype='datetime64[ns]', freq='B')

pd.date_range('2021-01-01',periods=60,freq='1h20min')
DatetimeIndex(['2021-01-01 00:00:00', '2021-01-01 01:20:00',
               '2021-01-01 02:40:00', '2021-01-01 04:00:00',
               '2021-01-01 05:20:00', '2021-01-01 06:40:00',
               '2021-01-01 08:00:00', '2021-01-01 09:20:00',
               '2021-01-01 10:40:00', '2021-01-01 12:00:00',
               '2021-01-01 13:20:00', '2021-01-01 14:40:00',
               '2021-01-01 16:00:00', '2021-01-01 17:20:00',
               '2021-01-01 18:40:00', '2021-01-01 20:00:00',
               '2021-01-01 21:20:00', '2021-01-01 22:40:00',
               '2021-01-02 00:00:00', '2021-01-02 01:20:00',
               '2021-01-02 02:40:00', '2021-01-02 04:00:00',
               '2021-01-02 05:20:00', '2021-01-02 06:40:00',
               '2021-01-02 08:00:00', '2021-01-02 09:20:00',
               '2021-01-02 10:40:00', '2021-01-02 12:00:00',
               '2021-01-02 13:20:00', '2021-01-02 14:40:00',
               '2021-01-02 16:00:00', '2021-01-02 17:20:00',
               '2021-01-02 18:40:00', '2021-01-02 20:00:00',
               '2021-01-02 21:20:00', '2021-01-02 22:40:00',
               '2021-01-03 00:00:00', '2021-01-03 01:20:00',
               '2021-01-03 02:40:00', '2021-01-03 04:00:00',
               '2021-01-03 05:20:00', '2021-01-03 06:40:00',
               '2021-01-03 08:00:00', '2021-01-03 09:20:00',
               '2021-01-03 10:40:00', '2021-01-03 12:00:00',
               '2021-01-03 13:20:00', '2021-01-03 14:40:00',
               '2021-01-03 16:00:00', '2021-01-03 17:20:00',
               '2021-01-03 18:40:00', '2021-01-03 20:00:00',
               '2021-01-03 21:20:00', '2021-01-03 22:40:00',
               '2021-01-04 00:00:00', '2021-01-04 01:20:00',
               '2021-01-04 02:40:00', '2021-01-04 04:00:00',
               '2021-01-04 05:20:00', '2021-01-04 06:40:00'],
              dtype='datetime64[ns]', freq='80T')

3.时间序列


sr = pd.Series(np.arange(1000),index=pd.date_range('2020-01-01',periods=1000))
sr
2020-01-01      0
2020-01-02      1
2020-01-03      2
2020-01-04      3
2020-01-05      4
             ...

2022-09-22    995
2022-09-23    996
2022-09-24    997
2022-09-25    998
2022-09-26    999
Freq: D, Length: 1000, dtype: int32

sr.index
DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08',
               '2020-01-09', '2020-01-10',
               ...

               '2022-09-17', '2022-09-18', '2022-09-19', '2022-09-20',
               '2022-09-21', '2022-09-22', '2022-09-23', '2022-09-24',
               '2022-09-25', '2022-09-26'],
              dtype='datetime64[ns]', length=1000, freq='D')

sr['2020']
2020-01-01      0
2020-01-02      1
2020-01-03      2
2020-01-04      3
2020-01-05      4
             ...

2020-12-27    361
2020-12-28    362
2020-12-29    363
2020-12-30    364
2020-12-31    365
Freq: D, Length: 366, dtype: int32

sr['2020-3']
2020-03-01    60
2020-03-02    61
2020-03-03    62
2020-03-04    63
2020-03-05    64
2020-03-06    65
2020-03-07    66
2020-03-08    67
2020-03-09    68
2020-03-10    69
2020-03-11    70
2020-03-12    71
2020-03-13    72
2020-03-14    73
2020-03-15    74
2020-03-16    75
2020-03-17    76
2020-03-18    77
2020-03-19    78
2020-03-20    79
2020-03-21    80
2020-03-22    81
2020-03-23    82
2020-03-24    83
2020-03-25    84
2020-03-26    85
2020-03-27    86
2020-03-28    87
2020-03-29    88
2020-03-30    89
2020-03-31    90
Freq: D, dtype: int32

sr['2020-3-19']
78

sr['2020-03':'2021-05-1']
2020-03-01     60
2020-03-02     61
2020-03-03     62
2020-03-04     63
2020-03-05     64
             ...

2021-04-27    482
2021-04-28    483
2021-04-29    484
2021-04-30    485
2021-05-01    486
Freq: D, Length: 427, dtype: int32
sr
2020-01-01      0
2020-01-02      1
2020-01-03      2
2020-01-04      3
2020-01-05      4
             ...

2022-09-22    995
2022-09-23    996
2022-09-24    997
2022-09-25    998
2022-09-26    999
Freq: D, Length: 1000, dtype: int32

sr.resample('W').sum()
2020-01-05      10
2020-01-12      56
2020-01-19     105
2020-01-26     154
2020-02-02     203
              ...

2022-09-04    6818
2022-09-11    6867
2022-09-18    6916
2022-09-25    6965
2022-10-02     999
Freq: W-SUN, Length: 144, dtype: int32

sr.resample('W').mean()
2020-01-05      2
2020-01-12      8
2020-01-19     15
2020-01-26     22
2020-02-02     29
             ...

2022-09-04    974
2022-09-11    981
2022-09-18    988
2022-09-25    995
2022-10-02    999
Freq: W-SUN, Length: 144, dtype: int32

pd.read_csv('maotai.csv')

日期收盘开盘高低交易量涨跌幅02021/11/121,773.781,778.001,785.051,767.001.76M0.24%12021/11/111,769.601,752.931,769.601,741.502.27M0.89%22021/11/101,753.991,790.011,795.001,735.003.53M-2.01%32021/11/91,790.011,819.981,827.871,782.002.74M-1.65%42021/11/81,820.101,820.001,830.801,802.051.77M0.01%……………………2392020/11/181,693.651,715.001,720.531,683.163.52M-1.29%2402020/11/171,715.801,740.001,742.351,701.072.52M-0.82%2412020/11/161,730.051,711.001,730.051,697.263.06M1.47%2422020/11/131,705.001,724.001,728.881,691.002.82M-1.72%2432020/11/121,734.791,730.011,750.001,722.272.35M0.20%

244 rows × 7 columns


df = pd.read_csv('maotai.csv',index_col=0)
df

收盘开盘高低交易量涨跌幅日期2021/11/121,773.781,778.001,785.051,767.001.76M0.24%2021/11/111,769.601,752.931,769.601,741.502.27M0.89%2021/11/101,753.991,790.011,795.001,735.003.53M-2.01%2021/11/91,790.011,819.981,827.871,782.002.74M-1.65%2021/11/81,820.101,820.001,830.801,802.051.77M0.01%…………………2020/11/181,693.651,715.001,720.531,683.163.52M-1.29%2020/11/171,715.801,740.001,742.351,701.072.52M-0.82%2020/11/161,730.051,711.001,730.051,697.263.06M1.47%2020/11/131,705.001,724.001,728.881,691.002.82M-1.72%2020/11/121,734.791,730.011,750.001,722.272.35M0.20%

244 rows × 6 columns


df.index
Index(['2021/11/12', '2021/11/11', '2021/11/10', '2021/11/9', '2021/11/8',
       '2021/11/5', '2021/11/4', '2021/11/3', '2021/11/2', '2021/11/1',
       ...

       '2020/11/25', '2020/11/24', '2020/11/23', '2020/11/20', '2020/11/19',
       '2020/11/18', '2020/11/17', '2020/11/16', '2020/11/13', '2020/11/12'],
      dtype='object', name='&#x65E5;&#x671F;', length=244)

df = pd.read_csv('maotai.csv',index_col=0,thousands=',',parse_dates=True)
df

收盘开盘高低交易量涨跌幅日期2021-11-121773.781778.001785.051767.001.76M0.24%2021-11-111769.601752.931769.601741.502.27M0.89%2021-11-101753.991790.011795.001735.003.53M-2.01%2021-11-091790.011819.981827.871782.002.74M-1.65%2021-11-081820.101820.001830.801802.051.77M0.01%…………………2020-11-181693.651715.001720.531683.163.52M-1.29%2020-11-171715.801740.001742.351701.072.52M-0.82%2020-11-161730.051711.001730.051697.263.06M1.47%2020-11-131705.001724.001728.881691.002.82M-1.72%2020-11-121734.791730.011750.001722.272.35M0.20%

244 rows × 6 columns

df.index
DatetimeIndex(['2021-11-12', '2021-11-11', '2021-11-10', '2021-11-09',
               '2021-11-08', '2021-11-05', '2021-11-04', '2021-11-03',
               '2021-11-02', '2021-11-01',
               ...

               '2020-11-25', '2020-11-24', '2020-11-23', '2020-11-20',
               '2020-11-19', '2020-11-18', '2020-11-17', '2020-11-16',
               '2020-11-13', '2020-11-12'],
              dtype='datetime64[ns]', name='&#x65E5;&#x671F;', length=244, freq=None)

df = pd.read_csv('maotai.csv',index_col=0,parse_dates=[0])
df.index
DatetimeIndex(['2021-11-12', '2021-11-11', '2021-11-10', '2021-11-09',
               '2021-11-08', '2021-11-05', '2021-11-04', '2021-11-03',
               '2021-11-02', '2021-11-01',
               ...

               '2020-11-25', '2020-11-24', '2020-11-23', '2020-11-20',
               '2020-11-19', '2020-11-18', '2020-11-17', '2020-11-16',
               '2020-11-13', '2020-11-12'],
              dtype='datetime64[ns]', name='&#x65E5;&#x671F;', length=244, freq=None)

pd.read_csv('maotai.csv',index_col=0,header=None)

1234560日期收盘开盘高低交易量涨跌幅2021/11/121,773.781,778.001,785.051,767.001.76M0.24%2021/11/111,769.601,752.931,769.601,741.502.27M0.89%2021/11/101,753.991,790.011,795.001,735.003.53M-2.01%2021/11/91,790.011,819.981,827.871,782.002.74M-1.65%…………………2020/11/181,693.651,715.001,720.531,683.163.52M-1.29%2020/11/171,715.801,740.001,742.351,701.072.52M-0.82%2020/11/161,730.051,711.001,730.051,697.263.06M1.47%2020/11/131,705.001,724.001,728.881,691.002.82M-1.72%2020/11/121,734.791,730.011,750.001,722.272.35M0.20%

245 rows × 6 columns


pd.read_csv('maotai.csv',index_col=0,header=None,names=['a','b','c','d','e','f','g'])

bcdefga日期收盘开盘高低交易量涨跌幅2021/11/121,773.781,778.001,785.051,767.001.76M0.24%2021/11/111,769.601,752.931,769.601,741.502.27M0.89%2021/11/101,753.991,790.011,795.001,735.003.53M-2.01%2021/11/91,790.011,819.981,827.871,782.002.74M-1.65%…………………2020/11/181,693.651,715.001,720.531,683.163.52M-1.29%2020/11/171,715.801,740.001,742.351,701.072.52M-0.82%2020/11/161,730.051,711.001,730.051,697.263.06M1.47%2020/11/131,705.001,724.001,728.881,691.002.82M-1.72%2020/11/121,734.791,730.011,750.001,722.272.35M0.20%

245 rows × 6 columns


pd.read_csv('maotai.csv',index_col=0,header=None,names=['a','b','c','d','e','f','g'],na_values=['收盘','开盘'])

bcdefga日期NaNNaN高低交易量涨跌幅2021/11/121,773.781,778.001,785.051,767.001.76M0.24%2021/11/111,769.601,752.931,769.601,741.502.27M0.89%2021/11/101,753.991,790.011,795.001,735.003.53M-2.01%2021/11/91,790.011,819.981,827.871,782.002.74M-1.65%…………………2020/11/181,693.651,715.001,720.531,683.163.52M-1.29%2020/11/171,715.801,740.001,742.351,701.072.52M-0.82%2020/11/161,730.051,711.001,730.051,697.263.06M1.47%2020/11/131,705.001,724.001,728.881,691.002.82M-1.72%2020/11/121,734.791,730.011,750.001,722.272.35M0.20%

245 rows × 6 columns

Original: https://blog.csdn.net/qq_49259434/article/details/121310751
Author: 这个人不主动
Title: 成为华尔街金融巨鳄第三课: Pandas2:学会使用Pandas-DataFrame

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/738664/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球