改进的SIR差分模型及三个模型的应用

2021-07-09 约 4324 字预计阅读 9 分钟

前言

感谢组员的共同协作，做组长的有些东西帮不上实在抱歉。

改进的SIR差分模型

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114


%% SIR差分模型
frame=importdata('美国covid19疫情数据l-history改.csv');
date_row=size(frame.data,1);
data=frame.data;
date=frame.textdata(:,1);
real_date=date(2:date_row+1,:);
E=zeros(2,90);
l=0.000125;    %日接触率
m=0.01;   %日治愈率0.0005
E(1,1)=data(345,12)/300000000;
E(2,1)=1-E(1,1);
for i=1:90
    E(1,i+1)=l*E(2,i)-m*E(1,i)+E(1,i);
    E(2,i+1)=E(2,i)-l*E(1,i)*E(2,i);
end
X=flip(data(311:345,12)');% 5.31  4.2 
rate=flip(data(311:345,12)')%现阳性率
rate(isnan(rate))=0;
n=length(rate);
rt=0:1:n-1;
%a0=[100,10]; %初值
figure(1)
h1 = plot(rt,X/300000000,'*'); %画点
hold on;
a=E(1,:);
b=E(2,:);
h2 = plot(a,'r')
xlabel('日期');         %设置横坐标名
ylabel('感染人数');   %设置纵坐标名
legend([h1 h2],'3~5月感染人数','SIR模型迭代曲线','Location','NorthWest');
hold on
%%  预测
X=flip(data(255:345,12)');%311行后为第一题
rate=flip(data(255:345,12)')%现阳性率
rate(isnan(rate))=0;
n=length(rate);
rt=0:1:n-1;
figure(2)
h3 = plot(rt,X/300000000,'*'); %画点
hold on;
a=E(1,:);
b=E(2,:);
h4=plot(a,'r')
xlabel('日期');
ylabel('感染人数'); 
legend([h3 h4],'3~6月感染人数','SIR模型迭代曲线','Location','NorthWest');
hold on
%% 接种疫苗前
frame=importdata('usc.csv');
date_row=size(frame.data,1);
data=frame.data;
date=frame.textdata(:,1);
real_date=date(2:date_row+1,:);
E=zeros(3,150);
l=0.0018;    %日接触率 0.0018
m=0.01;   %日治愈率 0.01
E(1,1)=data(320,1)/300000000;
E(3,1)=28664448/300000000%43714928  33714928  28664448
E(2,1)=1-E(1,1)-E(3,1);

for i=1:150
    E(1,i+1)=E(1,i)+l*E(2,i)-m*E(1,i);
    E(2,i+1)=E(2,i)-l*E(1,i)*E(2,i);
    E(3,i+1)=1-E(1,i+1)- E(2,i+1);
end
X=data(320:486,1)';%80-482  3.1-7.1
rate=data(320:486,1)'%
rate(isnan(rate))=0;
n=length(rate);
rt=0:1:n-1;
figure(6)
h11 = plot(rt,X/300000000,'*'); %画点
hold on;
a=E(1,:);
b=E(2,:);
c=E(3,:);
t=1:151;
h1=plot(t,a,'r')%感染人数
%plot(t,b,'b')%健康人数
plot(t,c,'r')%移除者
%legend('i(t)','s(t)','r(t)')
hold on
%% 接种疫苗后
E=zeros(3,150);
l=0.0018;    %日接触率 0.0018
m=0.01;   %日治愈率 0.01
E(1,1)=data(320,1)/300000000;
E(3,1)=53714928/300000000%43714928  33714928  28664448
E(2,1)=1-E(1,1)-E(3,1);
for i=1:150
    E(1,i+1)=E(1,i)+l*E(2,i)-m*E(1,i);
    E(2,i+1)=E(2,i)-l*E(1,i)*E(2,i);
    E(3,i+1)=1-E(1,i+1)- E(2,i+1);
end
X=data(320:486,1)';%80-482  3.1-7.1
rate=data(320:486,1)'%
rate(isnan(rate))=0;
n=length(rate);
rt=0:1:n-1;
figure(6)
%h11 = plot(rt,X/300000000,'*'); %画点
hold on;
a=E(1,:);
b=E(2,:);
c=E(3,:);
t=1:151;
h2=plot(t,a,'b')%感染人数
%plot(t,b,'b')%健康人数
plot(t,c,'b')%移除者
%legend('i(t)','s(t)','r(t)')
hold on
xlabel('日期');         %设置横坐标名
ylabel('r(t)与i（t）');   %设置纵坐标名
legend([h1 h2 h11],'接种疫苗前','接种疫苗后','感染人数原始数据','Location','NorthWest');

logistic模型预测美国人口

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45


%改进的指数增长模型
x=flip([324985536 322941312 320635168 318300992 315993728 313830976 311556864 309321664 306771520 304093952 301231200 298379904 295516608 292805312 290107936 287625184 284968960 282162400 279040000 275854016 272656992])
y=flip([0.006330017,0.007192424,0.007333235,0.007301613,0.006891455,0.007299188,0.007226135,0.008312845,0.008805068,0.009503504,0.009555925,0.00968912,0.009259723,0.009297836,0.008631901,0.009321099,0.009946612,0.011189794,0.011549529,0.011725443,0.012112401])
n=length(x);
t=0:1:n-1;
rk=zeros(1,n);
rk(1)=(-3*x(1)+4*x(2)-x(3))/2;
rk(n)=(x(n-2)-4*x(n-1)+3*x(n))/2;
for i=2:n-1
rk(i)=(x(i+1)-x(i-1))/2;
end
rk=rk./x;
p=polyfit(t,rk,2);
r0=p(1);
r1=p(2);
r2=p(3);
x0=x(1);
R=r0*t.^2+r1*t+r2;
X=x0*exp((r0*t.^3)/3+(r1*t.^2)/2+r2*t);
figure(1)
hold on;
xlabel('year');         %设置横坐标名
ylabel('rate');         %设置纵坐标名
grid on                 %网格线
plot(t,y,'r*')
figure(2)
hold on;
xlabel('year');         %设置横坐标名
ylabel('population');   %设置纵坐标名
grid on                 %网格线
plot(t,x,'r*',t,X)
figure(3)
hold on;
xlabel('year');         %设置横坐标名
ylabel('rate');         %设置纵坐标名
grid on                 %网格线
plot(t,y,'r*',t,R')
%2018
t1=t(21)+1;
x2018=x0*exp((r0*t1.^3)/3+(r1*t1.^2)/2+r2*t1)
y2018=r0*t1.^2+r1*t1+r2
%2019
t1=t(21)+2;
x2019=x0*exp((r0*t1.^3)/3+(r1*t1.^2)/2+r2*t1)
y2019=r0*t1.^2+r1*t1+r2

药物中毒急救建模

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22


clc;
clear all;
syms x t;
f(t)=6*(exp(-0.1155*t)-exp(-0.1386*t));
h(t)=exp(-0.1386*t);
figure(1);
fplot(f,[0 25]);
hold on;
fplot(h,[0,25]);
grid on;
xlabel('时间t/h');
ylabel('药量');
legend('血液中的药量y/mg','胃肠道中的药量x/mg');
figure(2)
fplot(f,[0,25]);
hold on;
fplot(h,[0,25]);
f(t)=-(exp(-0.693*t)-exp(-0.1386*t))/4;
fplot(f,[0,25]);
xlabel('时间t/h');
ylabel('药量');
legend('血液中的药量y/mg','胃肠道中的药量x/mg','施救后血液中的药量y/mg');

热传导差分偏微分模型应用

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129


clear;
close all;
clc;
pho=[300;862;74.2;1.18];
c=[1377;2100;1726;1005];
lamda=[0.082;0.37;0.045;0.028];
a=lamda./(pho.*c);
 
d=[0.6;6;3.6;5]*10^-3;
 
TT=273.15;
T_in=37;
T_out=75;
T_s=48.08;
 
xmin=0;
xmax=sum(d);
 
N=5400;
h=0.05*10^-3;
k=1;
r=k/h^2;
I=round((xmax-xmin)/h);
 
A=zeros(1,I);
B=zeros(1,I+1);
C=zeros(1,I);
 
N1=round(d(1)/h);
N2=round(d(2)/h);
N3=round(d(3)/h);
N4=round(d(4)/h);
 
for i=1:N1
    A(i)=-a(1)*r;
    B(i)=2+2*r*a(1);
    C(i)=-r*a(1);
end
for i=N1+1:N1+N2
    A(i)=-a(2)*r;
    B(i)=2+2*r*a(2);
    C(i)=-r*a(2);
end
for i=N1+N2+1:N1+N2+N3
    A(i)=-a(3)*r;
    B(i)=2+2*r*a(3);
    C(i)=-r*a(3);
end
for i=N1+N2+N3+1:N1+N2+N3+N4
    A(i)=-a(4)*r;
    B(i)=2+2*r*a(4);
    C(i)=-r*a(4);
end
 
T=zeros(I+1,N+1);
T(:,1)=(T_in+TT)*ones(I+1,1);
 
T_xt=xlsread('CUMCM-2018-Problem-A-Chinese-Appendix.xlsx');
 
h_min=110;
h_max=120;
delta_h=0.1;
H1=h_min:delta_h:h_max;
delta=zeros(1,length(H1));
 
for j=1:length(H1)
    h1=h_min+(j-1)*delta_h;
    k1=lamda(1);k2=lamda(2);k3=lamda(3);k4=lamda(4);
    x1=d(1);x2=d(1)+d(2);x3=d(1)+d(2)+d(3);x4=d(1)+d(2)+d(2)+d(4);
    t1=T_out+TT;t2=T_in+TT;t3=T_s+TT;
    
    h5=-((h1*k2*k3*k4*t1)/(k1*k2*k3*k4-h1*k1*k2*k3*x3-h1*k1*k2*k4*x2 ...
        -h1*k1*k3*k4*x1+h1*k1*k2*k3*x4+h1*k1*k2*k4*x3+h1*k1*k3*k4*x2+h1*k2*k3*k4*x1)-(h1*k2*k3*k4*t3)...
        /(k1*k2*k3*k4-h1*k1*k2*k3*x3-h1*k1*k2*k4*x2-h1*k1*k3*k4*x1+h1*k1*k2*k3*x4+h1*k1*k2*k4*x3+h1*k1*k3*k4*x2+h1*k2*k3*k4*x1))...
        /(t2/k1-t3/k1);
    
    AA=diag(B)+diag(A,1)+diag(C,-1);
    AA(1,1)=lamda(1)/h+h1;
    AA(1,2)=-lamda(1)/h;
    AA(I+1,I)=-lamda(4)/h;
    AA(I+1,I+1)=lamda(4)/h+h5;
    
    AA(N1+1,N1)=-lamda(1);
    AA(N1+1,N1+1)=lamda(1)+lamda(2);
    AA(N1+1,N1+2)=-lamda(2);
    
    AA(N1+N2+1,N1+N2)=-lamda(2);
    AA(N1+N2+1,N1+N2+1)=lamda(2)+lamda(3);
    AA(N1+N2+1,N1+N2+2)=-lamda(3);
    
    AA(N1+N2+N3+1,N1+N2+N3)=-lamda(3);
    AA(N1+N2+N3+1,N1+N2+N3+1)=lamda(3)+lamda(4);
    AA(N1+N2+N3+1,N1+N2+N3+2)=-lamda(4);
    
    for n=1:k:N
        D=zeros(I+1,1);
        D(1)=h1*(T_out+TT);
        D(I+1)=h5*(T_in+TT);
        for i=2:1:N1
            D(i)=r*a(1)*T(i-1,n)+(2-2*r*a(1))*T(i,n)+r*a(1)*T(i+1,n);
        end
        for i=N1+1:1:N1+N2
            D(i)=r*a(2)*T(i-1,n)+(2-2*r*a(2))*T(i,n)+r*a(2)*T(i+1,n);
        end
        for i=N1+N2+1:1:N1+N2+N3
            D(i)=r*a(3)*T(i-1,n)+(2-2*r*a(3))*T(i,n)+r*a(3)*T(i+1,n);
        end
        for i=N1+N2+N3+1:1:N1+N2+N3+N4
            D(i)=r*a(4)*T(i-1,n)+(2-2*r*a(4))*T(i,n)+r*a(4)*T(i+1,n);
        end
        D(N1+1)=0;
        D(N1+N2+1)=0;
        D(N1+N2+N3+1)=0;
        T(:,n+1)=AA\D;
    end
   delta(j)=sqrt(sum((T_xt(:,2)-T(end,:)'+TT).^2)/length(T_xt(:,1)));
end
%图二 
figure(1);
mesh(0:k:N,1000*(0:h:sum(d)),(T-TT));
%图三
T_problem1=zeros(N+1,4);
T_problem1(:,1)=T(1,:)';
T_problem1(:,2)=T(N1+1,:)';
T_problem1(:,3)=T(N2+N1+1,:)';
T_problem1(:,4)=T(N3+N2+N1+1,:)';
T_problem1=T_problem1-TT;
figure(2);
plot(0:k:N,T_problem1(:,1)',0:k:N,T_problem1(:,2)',0:k:N,T_problem1(:,3)',0:k:N,T_problem1(:,4)',0:k:N,T_xt(:,2)');

python数据处理(写的很乱)

为了写第一个模型整合各种数据。。。有点乱，将就看吧。

1
2
3
4
5
6
7
8


#导入需要的数据库和文件
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import MultipleLocator
odata=pd.read_csv(r'us_state_vaccinations.csv')
plt.rcParams['font.sans-serif'] = ['SimHei'] 
plt.rcParams['axes.unicode_minus'] = False 

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11


r_hex = '#dc2624'     # red,       RGB = 220,38,36
dt_hex = '#2b4750'    # dark teal, RGB = 43,71,80
tl_hex = '#45a0a2'    # teal,      RGB = 69,160,162
r1_hex = '#e87a59'    # red,       RGB = 232,122,89
tl1_hex = '#7dcaa9'   # teal,      RGB = 125,202,169
g_hex = '#649E7D'     # green,     RGB = 100,158,125
o_hex = '#dc8018'     # orange,    RGB = 220,128,24
tn_hex = '#C89F91'    # tan,       RGB = 200,159,145
g50_hex = '#6c6d6c'   # grey-50,   RGB = 108,109,108
bg_hex = '#4f6268'    # blue grey, RGB = 79,98,104
g25_hex = '#c7cccf'   # grey-25,   RGB = 199,204,207

1

odata

	date	location	total_vaccinations	total_distributed	people_vaccinated	people_fully_vaccinated_per_hundred	total_vaccinations_per_hundred	people_fully_vaccinated	people_vaccinated_per_hundred	distributed_per_hundred	daily_vaccinations_raw	daily_vaccinations	daily_vaccinations_per_million	share_doses_used
0	2021-01-12	Alabama	78134.0	377025.0	70861.0	0.15	1.59	7270.0	1.45	7.69	NaN	NaN	NaN	0.207
1	2021-01-13	Alabama	84040.0	378975.0	74792.0	0.19	1.71	9245.0	1.53	7.73	5906.0	5906.0	1205.0	0.222
2	2021-01-14	Alabama	92300.0	435350.0	80480.0	NaN	1.88	NaN	1.64	8.88	8260.0	7083.0	1445.0	0.212
3	2021-01-15	Alabama	100567.0	444650.0	86956.0	0.28	2.05	13488.0	1.77	9.07	8267.0	7478.0	1525.0	0.226
4	2021-01-16	Alabama	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	7557.0	7498.0	1529.0	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
11328	2021-06-30	Wyoming	421749.0	516025.0	226911.0	34.38	72.87	198958.0	39.21	89.16	55.0	913.0	1578.0	0.817
11329	2021-07-01	Wyoming	423238.0	516325.0	227741.0	34.51	73.13	199743.0	39.35	89.21	1489.0	1113.0	1923.0	0.820
11330	2021-07-02	Wyoming	424025.0	516865.0	228162.0	34.59	73.26	200184.0	39.42	89.31	787.0	847.0	1463.0	0.820
11331	2021-07-03	Wyoming	431008.0	517365.0	230914.0	35.34	74.47	204522.0	39.90	89.39	6983.0	1680.0	2903.0	0.833
11332	2021-07-04	Wyoming	431101.0	517365.0	230993.0	35.35	74.49	204598.0	39.91	89.39	93.0	1682.0	2906.0	0.833

11333 rows × 14 columns

1
2


states = list(set(odata['location']))
len(states)

1
2
3


date = list(set(odata['date']))
date.sort()
date

1

date[0]

'2020-12-20'

1
2


US = odata.loc[odata['date'] == '2021-01-12']
US

	date	location	total_vaccinations	total_distributed	people_vaccinated	people_fully_vaccinated_per_hundred	total_vaccinations_per_hundred	people_fully_vaccinated	people_vaccinated_per_hundred	distributed_per_hundred	daily_vaccinations_raw	daily_vaccinations	daily_vaccinations_per_million	share_doses_used
0	2021-01-12	Alabama	78134.0	377025.0	70861.0	0.15	1.59	7270.0	1.45	7.69	NaN	NaN	NaN	0.207
174	2021-01-12	Alaska	35838.0	141600.0	22486.0	0.74	4.90	5400.0	3.07	19.36	NaN	NaN	NaN	0.253
348	2021-01-12	American Samoa	2124.0	10650.0	842.0	0.47	3.81	260.0	1.51	19.12	NaN	NaN	NaN	0.199
522	2021-01-12	Arizona	141355.0	563025.0	95141.0	0.11	1.94	8343.0	1.31	7.74	NaN	NaN	NaN	0.251
696	2021-01-12	Arkansas	40879.0	274400.0	39357.0	0.00	1.35	8.0	1.30	9.09	NaN	NaN	NaN	0.149
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
10463	2021-01-12	Virginia	190607.0	797150.0	NaN	NaN	2.23	NaN	NaN	9.34	NaN	NaN	NaN	0.239
10637	2021-01-12	Washington	195567.0	567725.0	162105.0	0.23	2.57	17689.0	2.13	7.46	NaN	NaN	NaN	0.344
10811	2021-01-12	West Virginia	103330.0	160975.0	NaN	NaN	5.77	NaN	NaN	8.98	NaN	NaN	NaN	0.642
10985	2021-01-12	Wisconsin	137253.0	429500.0	125895.0	0.19	2.36	11343.0	2.16	7.38	NaN	NaN	NaN	0.320
11159	2021-01-12	Wyoming	16467.0	47800.0	13577.0	0.37	2.85	2116.0	2.35	8.26	NaN	NaN	NaN	0.344

65 rows × 14 columns

1

US['people_vaccinated'].sum()

15746663.0

1

data=[]

1
2
3
4


for i in date:
    temp = odata.loc[odata['date'] == i]
    data.append(temp['people_vaccinated'].sum())
data

1
2


dataframe=pd.DataFrame(data=data, index=date, columns=['vaccinations'], dtype=None, copy=False)
dataframe

	vaccinations
2020-12-20	0.0
2020-12-21	0.0
2020-12-22	0.0
2020-12-23	0.0
2020-12-24	0.0
...	...
2021-06-30	366017545.0
2021-07-01	367355455.0
2021-07-02	367983174.0
2021-07-03	368905977.0
2021-07-04	369524458.0

197 rows × 1 columns

1
2
3
4
5


dataframe.dropna(subset=['vaccinations'],inplace=True)
dataframe.columns = ['vaccinations']
dataframe.drop(dataframe[dataframe['vaccinations']<0.01].index)
vus = dataframe
vus

	vaccinations
2020-12-20	0.0
2020-12-21	0.0
2020-12-22	0.0
2020-12-23	0.0
2020-12-24	0.0
...	...
2021-06-30	366017545.0
2021-07-01	367355455.0
2021-07-02	367983174.0
2021-07-03	368905977.0
2021-07-04	369524458.0

197 rows × 1 columns

 1
 2
 3
 4
 5
 6
 7
 8
 9
10


fig = plt.figure( figsize=(16,6), dpi=100)
ax = fig.add_subplot(1,1,1)
x = dataframe.index
y = dataframe['vaccinations']
ax.plot( x, y, color=dt_hex, linewidth=2, linestyle='-' )
ax.set_title('接种疫苗人数(/千万)',fontdict={
      'color':'black',
      'size':24
})
ax.set_xticks( range(0,len(x),30))

[<matplotlib.axis.XTick at 0x208ab850c18>,
 <matplotlib.axis.XTick at 0x208ab850be0>,
 <matplotlib.axis.XTick at 0x208ab8426a0>,
 <matplotlib.axis.XTick at 0x208ad89a588>,
 <matplotlib.axis.XTick at 0x208ad89aa20>,
 <matplotlib.axis.XTick at 0x208ad89aeb8>,
 <matplotlib.axis.XTick at 0x208ad89a978>]

1

cdata=pd.read_csv(r'DXYArea.csv')

1

cdata

	continentName	continentEnglishName	countryName	countryEnglishName	provinceName	provinceEnglishName	province_zipCode	province_confirmedCount	province_suspectedCount	province_curedCount	province_deadCount	updateTime	cityName	cityEnglishName	city_zipCode	city_confirmedCount	city_suspectedCount	city_curedCount	city_deadCount
0	北美洲	North America	美国	United States of America	美国	United States of America	971002	33718538	0.0	29096816	605526	2021-07-05 19:13:09	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	欧洲	Europe	法国	France	法国	France	961002	5921696	0.0	353370	111190	2021-07-05 19:12:06	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	南美洲	South America	巴西	Brazil	巴西	Brazil	973003	18769808	0.0	17082876	524475	2021-07-05 19:12:06	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	欧洲	Europe	瑞典	Sweden	瑞典	Sweden	962005	1090880	0.0	4971	14631	2021-07-05 19:12:06	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	欧洲	Europe	俄罗斯	Russia	俄罗斯	Russia	964006	5635294	0.0	5083441	138579	2021-07-05 19:12:06	NaN	NaN	NaN	NaN	NaN	NaN	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
554367	亚洲	Asia	中国	China	辽宁省	Liaoning	210000	0	1.0	0	0	2020-01-22 03:28:10	NaN	NaN	NaN	NaN	NaN	NaN	NaN
554368	亚洲	Asia	中国	China	台湾	Taiwan	710000	1	0.0	0	0	2020-01-22 03:28:10	NaN	NaN	NaN	NaN	NaN	NaN	NaN
554369	亚洲	Asia	中国	Hongkong	香港	Hongkong	810000	0	117.0	0	0	2020-01-22 03:28:10	NaN	NaN	NaN	NaN	NaN	NaN	NaN
554370	亚洲	Asia	中国	China	黑龙江省	Heilongjiang	230000	0	1.0	0	0	2020-01-22 03:28:10	NaN	NaN	NaN	NaN	NaN	NaN	NaN
554371	亚洲	Asia	中国	China	湖南省	Hunan	430000	1	0.0	0	0	2020-01-22 03:28:10	NaN	NaN	NaN	NaN	NaN	NaN	NaN

554372 rows × 19 columns

1
2
3


cus = cdata.loc[cdata['countryName'] == '美国']
comfirm=cus.copy()
cus

	continentName	continentEnglishName	countryName	countryEnglishName	provinceName	provinceEnglishName	province_zipCode	province_confirmedCount	province_suspectedCount	province_curedCount	province_deadCount	updateTime	cityName	cityEnglishName	city_zipCode	city_confirmedCount	city_suspectedCount	city_curedCount	city_deadCount
0	北美洲	North America	美国	United States of America	美国	United States of America	971002	33718538	0.0	29096816	605526	2021-07-05 19:13:09	NaN	NaN	NaN	NaN	NaN	NaN	NaN
106	北美洲	North America	美国	United States of America	美国	United States of America	971002	33716933	0.0	29096816	605526	2021-07-05 11:05:16	NaN	NaN	NaN	NaN	NaN	NaN	NaN
308	北美洲	North America	美国	United States of America	美国	United States of America	971002	33716933	0.0	29096816	605526	2021-07-05 10:01:19	NaN	NaN	NaN	NaN	NaN	NaN	NaN
437	北美洲	North America	美国	United States of America	美国	United States of America	971002	33716933	0.0	29096816	605526	2021-07-05 09:39:02	NaN	NaN	NaN	NaN	NaN	NaN	NaN
728	北美洲	North America	美国	United States of America	美国	United States of America	971002	33714928	0.0	29087421	605524	2021-07-04 21:41:13	NaN	NaN	NaN	NaN	NaN	NaN	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
541769	北美洲	North America	美国	United States of America	美国	United States of America	971002	9	0.0	0	0	2020-02-03 09:28:34	NaN	NaN	NaN	NaN	NaN	NaN	NaN
543505	北美洲	North America	美国	United States of America	美国	United States of America	971002	8	0.0	0	0	2020-02-02 07:41:43	NaN	NaN	NaN	NaN	NaN	NaN	NaN
545557	北美洲	North America	美国	United States of America	美国	United States of America	971002	6	0.0	0	0	2020-02-01 02:48:13	NaN	NaN	NaN	NaN	NaN	NaN	NaN
547090	NaN	NaN	美国	United States of America	美国	United States of America	971002	6	0.0	0	0	2020-01-31 07:17:36	NaN	NaN	NaN	NaN	NaN	NaN	NaN
550409	NaN	NaN	美国	United States of America	美国	United States of America	971002	5	0.0	0	0	2020-01-27 17:20:43	NaN	NaN	NaN	NaN	NaN	NaN	NaN

3241 rows × 19 columns

1
2
3
4
5
6
7
8


cus['updateTime'] = pd.to_datetime(cus.updateTime,format="%Y-%m-%d",errors='coerce').dt.date
cus.drop_duplicates(subset=['provinceName', 'updateTime'], keep='first', inplace=True)
cus.drop(['provinceName','continentName','continentEnglishName','countryEnglishName','cityName','province_deadCount','province_suspectedCount','city_deadCount','city_curedCount','city_suspectedCount','city_confirmedCount','province_confirmedCount','countryName','provinceEnglishName','province_zipCode','cityEnglishName','city_zipCode'],axis=1,inplace=True)
cus.dropna(subset=['province_curedCount'],inplace=True)
cus = cus.drop(cus[cus['province_curedCount']<0.01].index)
#cus = cus.drop(cus[cus['updateTime']<0.01].index)
cus = cus.sort_values(by='updateTime', ascending=True)
cus

	province_curedCount	updateTime
520054	3	2020-02-12
517924	3	2020-02-13
515650	3	2020-02-14
514566	3	2020-02-15
511738	3	2020-02-16
...	...	...
3425	29026688	2021-07-01
2131	29052087	2021-07-02
1499	29072881	2021-07-03
728	29087421	2021-07-04
0	29096816	2021-07-05

477 rows × 2 columns

1

cus['province_curedCount']

520054           3
517924           3
515650           3
514566           3
511738           3
            ...   
3425      29026688
2131      29052087
1499      29072881
728       29087421
0         29096816
Name: province_curedCount, Length: 477, dtype: int64

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11


#cus.set_index('updateTime',inplace=True)
#cus.index.name = None
temp = cus['updateTime'].copy()
cout=0
temp_list = []
for i in temp:
    temp_list.append(str(i))
    cout+=1
cus["updateTime"]=temp_list
cus.set_index('updateTime',inplace=True)
cus

	province_curedCount
updateTime
2020-02-12	3
2020-02-13	3
2020-02-14	3
2020-02-15	3
2020-02-16	3
...	...
2021-07-01	29026688
2021-07-02	29052087
2021-07-03	29072881
2021-07-04	29087421
2021-07-05	29096816

477 rows × 1 columns

1
2
3
4
5
6
7
8
9


fig, axes = plt.subplots(1,1,figsize=(16, 4))
x=cus.index
y=cus.values
plot=axes.plot(x,y,color=dt_hex,linewidth=2,linestyle='-',label='治愈')
axes.set_xticks(range(0,len(x),40))
plt.xlabel('日期',fontsize=10)
plt.ylabel('人数',fontsize=10)
axes.legend(loc=0,frameon=True)
plt.show()

1
2
3


v_cus = cus.loc[cus.index > '2021-03-01']
v_cus.columns = ['curedCount']
v_cus

	curedCount
updateTime
2021-03-02	19817532
2021-03-03	19905322
2021-03-04	19997983
2021-03-05	20093442
2021-03-06	20183329
...	...
2021-07-01	29026688
2021-07-02	29052087
2021-07-03	29072881
2021-07-04	29087421
2021-07-05	29096816

106 rows × 1 columns

1
2
3


v_vus=vus.loc[vus.index > '2021-03-01']
v_vus.index.name = "updateTime"
v_vus

	vaccinations
updateTime
2021-03-02	105774893.0
2021-03-03	108010927.0
2021-03-04	110417023.0
2021-03-05	113492161.0
2021-03-06	117171344.0
...	...
2021-06-30	366017545.0
2021-07-01	367355455.0
2021-07-02	367983174.0
2021-07-03	368905977.0
2021-07-04	369524458.0

125 rows × 1 columns

1
2
3
4


result = pd.merge( v_cus, v_vus, how='left',on='updateTime')
result.dropna(subset=['curedCount'],inplace=True)
result.dropna(subset=['vaccinations'],inplace=True)
result

	curedCount	vaccinations
updateTime
2021-03-02	19817532	105774893.0
2021-03-03	19905322	108010927.0
2021-03-04	19997983	110417023.0
2021-03-05	20093442	113492161.0
2021-03-06	20183329	117171344.0
...	...	...
2021-06-30	29007495	366017545.0
2021-07-01	29026688	367355455.0
2021-07-02	29052087	367983174.0
2021-07-03	29072881	368905977.0
2021-07-04	29087421	369524458.0

105 rows × 2 columns

1

#result.to_csv("cvus.csv")

1
2
3
4


pusdata=pd.read_csv(r'美国covid19疫情数据l-history改.csv')
pusdata['date'] = pd.to_datetime(pusdata.date,format="%Y-%m-%d",errors='coerce').dt.date
pusdata = pusdata.sort_values(by='date', ascending=True)
pusdata

	date	death	deathIncrease	inIcuCumulative	inIcuCurrently	hospitalizedIncrease	hospitalizedCurrently	hospitalizedCumulative	negative	negativeIncrease	onVentilatorCumulative	onVentilatorCurrently	positive	positiveIncrease	Unnamed: 14	states	totalTestResults	totalTestResultsIncrease
419	2020-01-13	NaN	0	NaN	NaN	0	NaN	NaN	NaN	0	NaN	NaN	NaN	0	NaN	1	0	0
418	2020-01-14	NaN	0	NaN	NaN	0	NaN	NaN	NaN	0	NaN	NaN	0.0	0	NaN	1	0	0
417	2020-01-15	NaN	0	NaN	NaN	0	NaN	NaN	NaN	0	NaN	NaN	0.0	0	NaN	1	0	0
416	2020-01-16	NaN	0	NaN	NaN	0	NaN	NaN	NaN	0	NaN	NaN	0.0	0	NaN	1	0	0
415	2020-01-17	NaN	0	NaN	NaN	0	NaN	NaN	NaN	0	NaN	NaN	0.0	0	NaN	1	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
4	2021-03-03	508665.0	2449	45214.0	9359.0	2172	45462.0	873073.0	73857281.0	267001	4260.0	3094.0	28520365.0	66836	0.002349	56	357888671	1406795
3	2021-03-04	510408.0	1743	45293.0	8970.0	1530	44172.0	874603.0	74035238.0	177957	4267.0	2973.0	28585852.0	65487	0.002296	56	359479655	1590984
2	2021-03-05	512629.0	2221	45373.0	8634.0	2781	42541.0	877384.0	74307155.0	271917	4275.0	2889.0	28654639.0	68787	0.002406	56	361224072	1744417
1	2021-03-06	514309.0	1680	45453.0	8409.0	503	41401.0	877887.0	74450990.0	143835	4280.0	2811.0	28714654.0	60015	0.002094	56	362655064	1430992
0	2021-03-07	515151.0	842	45475.0	8134.0	726	40199.0	878613.0	74582825.0	131835	4281.0	2802.0	28756489.0	41835	0.001457	56	363825123	1170059

420 rows × 18 columns

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11


temp = pusdata['date'].copy()
cout=0
temp_list = []
for i in temp:
    temp_list.append(str(i))
    cout+=1
pusdata["date"]=temp_list
pusdata.set_index('date',inplace=True)
pusdata=pusdata[['positive']]
pusdata.index.name = "updateTime"
pusdata

	positive
updateTime
2020-01-13	NaN
2020-01-14	0.0
2020-01-15	0.0
2020-01-16	0.0
2020-01-17	0.0
...	...
2021-03-03	28520365.0
2021-03-04	28585852.0
2021-03-05	28654639.0
2021-03-06	28714654.0
2021-03-07	28756489.0

420 rows × 1 columns

1

#pd.merge( result,pusdata, how='left',on='updateTime').dropna(subset=['positive'],inplace=True)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17


#confirm
comfirm['updateTime'] = pd.to_datetime(comfirm.updateTime,format="%Y-%m-%d",errors='coerce').dt.date
comfirm.drop_duplicates(subset=['provinceName', 'updateTime'], keep='first', inplace=True)
comfirm.drop(['provinceName','continentName','continentEnglishName','countryEnglishName','cityName','province_deadCount','province_suspectedCount','city_deadCount','city_curedCount','city_suspectedCount','city_confirmedCount','province_curedCount','countryName','provinceEnglishName','province_zipCode','cityEnglishName','city_zipCode'],axis=1,inplace=True)
comfirm.dropna(subset=['province_confirmedCount'],inplace=True)
comfirm = comfirm.drop(comfirm[comfirm['province_confirmedCount']<0.01].index)
comfirm = comfirm.sort_values(by='updateTime', ascending=True)
temp = comfirm['updateTime'].copy()
cout=0
temp_list = []
for i in temp:
    temp_list.append(str(i))
    cout+=1
comfirm["updateTime"]=temp_list
comfirm.set_index('updateTime',inplace=True)
comfirm.columns = ['confirmedCount']
comfirm

	confirmedCount
updateTime
2020-01-27	5
2020-01-31	6
2020-02-01	6
2020-02-02	8
2020-02-03	9
...	...
2021-07-01	33665047
2021-07-02	33679489
2021-07-03	33709176
2021-07-04	33714928
2021-07-05	33718538

486 rows × 1 columns

1
2
3


result = pd.merge( result,comfirm, how='left',on='updateTime')
result.dropna(subset=['confirmedCount'],inplace=True)
result

	curedCount	vaccinations	confirmedCount
updateTime
2021-03-02	19817532	105774893.0	28664448
2021-03-03	19905322	108010927.0	28719624
2021-03-04	19997983	110417023.0	28771556
2021-03-05	20093442	113492161.0	28827140
2021-03-06	20183329	117171344.0	28894787
...	...	...	...
2021-06-30	29007495	366017545.0	33653426
2021-07-01	29026688	367355455.0	33665047
2021-07-02	29052087	367983174.0	33679489
2021-07-03	29072881	368905977.0	33709176
2021-07-04	29087421	369524458.0	33714928

105 rows × 3 columns

1

#result.to_csv("cvus.csv")

完整文档详见：博客相关资源-常微分偏微分建模练习

目录