<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 6 entries, 2019-01-01 to 2019-01-06
Freq: D
Data columns (total 4 columns):
A 6 non-null float64
B 6 non-null float64
C 6 non-null float64
D 5 non-null float64
dtypes: float64(4)
memory usage: 240.0 bytes
2、Numpy通用函数同样适用于Pandas
(1)向量化运算
x = pd.DataFrame(np.arange(4).reshape(1, 4))
x
0
1
2
3
0
0
1
2
3
x+5
0
1
2
3
0
5
6
7
8
np.exp(x)
0
1
2
3
0
1.0
2.718282
7.389056
20.085537
y = pd.DataFrame(np.arange(4,8).reshape(1, 4))
y
0
1
2
3
0
4
5
6
7
x*y
0
1
2
3
0
0
5
12
21
(2)矩阵化运算
np.random.seed(42)
x = pd.DataFrame(np.random.randint(10, size=(30, 30)))
x
0
1
2
3
4
5
6
7
8
9
...
20
21
22
23
24
25
26
27
28
29
0
6
3
7
4
6
9
2
6
7
4
...
4
0
9
5
8
0
9
2
6
3
1
8
2
4
2
6
4
8
6
1
3
...
2
0
3
1
7
3
1
5
5
9
2
3
5
1
9
1
9
3
7
6
8
...
6
8
7
0
7
7
2
0
7
2
3
2
0
4
9
6
9
8
6
8
7
...
0
2
4
2
0
4
9
6
6
8
4
9
9
2
6
0
3
3
4
6
6
...
9
6
8
6
0
0
8
8
3
8
5
2
6
5
7
8
4
0
2
9
7
...
2
0
4
0
7
0
0
1
1
5
6
6
4
0
0
2
1
4
9
5
6
...
5
0
8
5
2
3
3
2
9
2
7
2
3
6
3
8
0
7
6
1
7
...
3
0
1
0
4
4
6
8
8
2
8
2
2
3
7
5
7
0
7
3
0
...
1
1
5
2
8
3
0
3
0
4
9
3
7
7
6
2
0
0
2
5
6
...
4
2
3
2
0
0
4
5
2
8
10
4
7
0
4
2
0
3
4
6
0
...
5
6
1
9
1
9
0
7
0
8
11
5
6
9
6
9
2
1
8
7
9
...
6
5
2
8
9
5
9
9
5
0
12
3
9
5
5
4
0
7
4
4
6
...
0
7
2
9
6
9
4
9
4
6
13
8
4
0
9
9
0
1
5
8
7
...
5
8
4
0
3
4
9
9
4
6
14
3
0
4
6
9
9
5
4
3
1
...
6
1
0
3
7
1
2
0
0
2
15
4
2
0
0
7
9
1
2
1
2
...
6
3
9
4
1
7
3
8
4
8
16
3
9
4
8
7
2
0
2
3
1
...
8
0
0
3
8
5
2
0
3
8
17
2
8
6
3
2
9
4
4
2
8
...
6
9
4
2
6
1
8
9
9
0
18
5
6
7
9
8
1
9
1
4
4
...
3
5
2
5
6
9
9
2
6
2
19
1
9
3
7
8
6
0
2
8
0
...
4
3
2
2
3
8
1
8
0
0
20
4
5
5
2
6
8
9
7
5
7
...
3
5
0
8
0
4
3
2
5
1
21
2
4
8
1
9
7
1
4
6
7
...
0
1
8
2
0
4
6
5
0
4
22
4
5
2
4
6
4
4
4
9
9
...
1
7
6
9
9
1
5
5
2
1
23
0
5
4
8
0
6
4
4
1
2
...
8
5
0
7
6
9
2
0
4
3
24
9
7
0
9
0
3
7
4
1
5
...
3
7
8
2
2
1
9
2
2
4
25
4
1
9
5
4
5
0
4
8
9
...
9
3
0
7
0
2
3
7
5
9
26
6
7
1
9
7
2
6
2
6
1
...
0
6
5
9
8
0
3
8
3
9
27
2
8
1
3
5
1
7
7
0
2
...
8
0
4
5
4
5
5
6
3
7
28
6
8
6
2
2
7
4
3
7
5
...
1
7
9
2
4
5
9
5
3
2
29
3
0
3
0
0
9
5
4
3
2
...
1
3
0
4
8
0
8
7
5
6
30 rows × 30 columns
转置
z = x.T
z
0
1
2
3
4
5
6
7
8
9
...
20
21
22
23
24
25
26
27
28
29
0
6
8
3
2
9
2
6
2
2
3
...
4
2
4
0
9
4
6
2
6
3
1
3
2
5
0
9
6
4
3
2
7
...
5
4
5
5
7
1
7
8
8
0
2
7
4
1
4
2
5
0
6
3
7
...
5
8
2
4
0
9
1
1
6
3
3
4
2
9
9
6
7
0
3
7
6
...
2
1
4
8
9
5
9
3
2
0
4
6
6
1
6
0
8
2
8
5
2
...
6
9
6
0
0
4
7
5
2
0
5
9
4
9
9
3
4
1
0
7
0
...
8
7
4
6
3
5
2
1
7
9
6
2
8
3
8
3
0
4
7
0
0
...
9
1
4
4
7
0
6
7
4
5
7
6
6
7
6
4
2
9
6
7
2
...
7
4
4
4
4
4
2
7
3
4
8
7
1
6
8
6
9
5
1
3
5
...
5
6
9
1
1
8
6
0
7
3
9
4
3
8
7
6
7
6
7
0
6
...
7
7
9
2
5
9
1
2
5
2
10
3
8
7
1
3
5
3
0
7
5
...
4
0
2
6
4
1
9
9
1
0
11
7
1
4
0
6
7
6
8
3
5
...
7
5
0
5
1
0
5
8
3
5
12
7
9
1
6
2
8
7
8
5
5
...
9
0
4
1
2
9
2
4
3
1
13
2
8
4
6
5
3
0
1
7
2
...
3
1
8
5
8
8
2
5
5
7
14
5
9
7
7
1
0
5
6
3
5
...
9
0
0
1
6
9
8
3
5
9
15
4
4
9
4
9
0
7
9
2
7
...
7
4
2
1
6
8
6
9
0
4
16
1
1
8
2
8
9
4
2
8
1
...
9
9
3
1
5
8
4
1
7
6
17
7
3
8
7
4
3
3
6
2
4
...
1
8
0
2
7
5
9
7
5
9
18
5
6
0
5
5
6
1
9
8
0
...
4
5
0
1
3
7
6
5
2
1
19
1
7
8
2
3
1
5
8
1
0
...
8
0
7
3
7
0
8
4
8
7
20
4
2
6
0
9
2
5
3
1
4
...
3
0
1
8
3
9
0
8
1
1
21
0
0
8
2
6
0
0
0
1
2
...
5
1
7
5
7
3
6
0
7
3
22
9
3
7
4
8
4
8
1
5
3
...
0
8
6
0
8
0
5
4
9
0
23
5
1
0
2
6
0
5
0
2
2
...
8
2
9
7
2
7
9
5
2
4
24
8
7
7
0
0
7
2
4
8
0
...
0
0
9
6
2
0
8
4
4
8
25
0
3
7
4
0
0
3
4
3
0
...
4
4
1
9
1
2
0
5
5
0
26
9
1
2
9
8
0
3
6
0
4
...
3
6
5
2
9
3
3
5
9
8
27
2
5
0
6
8
1
2
8
3
5
...
2
5
5
0
2
7
8
6
5
7
28
6
5
7
6
3
1
9
8
0
2
...
5
0
2
4
2
5
3
3
3
5
29
3
9
2
8
8
5
2
2
4
8
...
1
4
1
3
4
9
9
7
2
6
30 rows × 30 columns
np.random.seed(1)
y = pd.DataFrame(np.random.randint(10, size=(30, 30)))
y
0
1
2
3
4
5
6
7
8
9
...
20
21
22
23
24
25
26
27
28
29
0
5
8
9
5
0
0
1
7
6
9
...
1
7
0
6
9
9
7
6
9
1
1
0
1
8
8
3
9
8
7
3
6
...
9
2
0
4
9
2
7
7
9
8
2
6
9
3
7
7
4
5
9
3
6
...
7
7
1
1
3
0
8
6
4
5
3
6
2
5
7
8
4
4
7
7
4
...
0
1
9
8
2
3
1
2
7
2
4
6
0
9
2
6
6
2
7
7
0
...
1
5
4
0
7
8
9
5
7
0
5
9
3
9
1
4
4
6
8
8
9
...
1
8
7
0
3
4
2
0
3
5
6
1
2
4
3
0
6
0
7
2
8
...
4
3
3
6
7
3
5
3
2
4
7
4
0
3
3
8
3
5
6
7
5
...
1
7
3
1
6
6
9
6
9
6
8
0
0
2
9
6
0
6
7
0
3
...
6
7
9
5
4
9
5
2
5
6
9
6
8
7
7
7
2
6
0
5
2
...
7
0
6
2
4
3
6
7
6
3
10
0
6
4
7
6
2
9
5
9
9
...
4
9
3
9
1
2
5
4
0
8
11
2
3
9
9
4
4
8
2
1
6
...
0
5
9
8
6
6
0
4
7
3
12
0
1
6
0
6
1
6
4
2
5
...
8
8
0
7
2
0
7
1
1
9
13
5
1
5
9
6
4
9
8
7
5
...
2
4
3
2
0
0
4
2
5
0
14
0
3
8
5
3
1
4
7
3
2
...
8
5
5
7
5
9
1
3
9
3
15
3
3
6
1
3
0
5
0
5
2
...
7
1
7
7
3
8
3
0
6
3
16
0
6
5
9
6
4
6
6
2
2
...
3
6
8
6
5
1
3
2
6
3
17
6
7
2
8
0
1
8
6
0
0
...
5
6
2
5
4
3
0
6
2
1
18
9
4
4
0
9
8
7
7
6
1
...
7
9
9
7
1
1
4
6
5
6
19
4
1
1
5
1
2
6
2
3
3
...
0
0
0
9
8
5
9
3
4
0
20
9
8
6
3
9
9
0
8
1
6
...
2
9
0
1
3
9
4
8
8
8
21
2
8
6
4
9
0
5
5
6
1
...
6
7
5
6
8
7
4
2
4
0
22
0
3
5
9
0
3
6
5
1
1
...
6
2
5
3
9
3
9
5
1
9
23
7
7
0
8
6
1
2
0
4
4
...
1
9
6
0
2
8
3
7
2
5
24
6
0
4
2
3
1
0
5
7
0
...
1
1
2
7
5
2
9
4
7
3
25
5
0
2
1
4
9
4
6
9
3
...
5
5
3
5
9
2
7
4
1
6
26
9
8
1
8
1
6
2
6
1
8
...
2
5
1
2
5
3
3
6
1
8
27
1
8
6
4
6
9
5
4
7
2
...
9
3
1
5
1
1
7
1
2
6
28
0
7
7
4
3
2
7
8
5
2
...
0
2
8
3
7
3
9
2
3
8
29
8
0
2
6
8
3
6
4
9
7
...
6
7
8
5
7
2
5
3
4
5
30 rows × 30 columns
x.dot(y)
0
1
2
3
4
5
6
7
8
9
...
20
21
22
23
24
25
26
27
28
29
0
616
560
723
739
612
457
681
799
575
590
...
523
739
613
580
668
602
733
585
657
700
1
520
438
691
600
612
455
666
764
707
592
...
555
681
503
679
641
506
779
494
633
590
2
557
570
786
807
690
469
804
828
704
573
...
563
675
712
758
793
672
754
550
756
638
3
605
507
664
701
660
496
698
806
651
575
...
582
685
668
586
629
534
678
484
591
626
4
599
681
753
873
721
563
754
770
620
654
...
633
747
661
677
726
649
716
610
735
706
5
422
354
602
627
613
396
617
627
489
423
...
456
572
559
537
499
384
589
436
574
507
6
359
446
599
599
481
357
577
572
451
464
...
449
550
495
532
633
554
663
476
565
602
7
531
520
698
590
607
537
665
696
571
472
...
576
588
551
665
652
527
742
528
650
599
8
449
322
547
533
593
399
584
638
587
424
...
402
596
523
523
447
362
561
386
529
484
9
373
433
525
601
522
345
551
521
434
447
...
508
498
438
478
459
418
488
407
503
496
10
500
427
574
607
667
477
652
656
615
477
...
622
702
531
610
558
532
598
471
582
561
11
664
694
772
841
779
574
730
810
711
608
...
591
760
616
638
721
676
846
678
754
708
12
545
547
687
701
721
576
689
724
710
532
...
674
684
648
694
710
564
757
571
671
656
13
574
586
723
750
691
494
696
787
667
523
...
618
681
568
682
715
644
756
557
690
604
14
502
382
645
557
570
403
538
677
500
501
...
369
650
507
576
546
531
554
437
616
463
15
510
505
736
651
649
510
719
733
694
557
...
605
717
574
642
678
576
755
455
598
654
16
567
376
614
612
643
514
598
724
547
464
...
456
639
520
560
569
442
596
517
659
532
17
626
716
828
765
740
603
809
852
692
591
...
664
716
655
721
742
612
819
593
744
712
18
600
559
667
664
641
556
624
815
638
564
...
581
701
559
677
710
554
748
597
614
657
19
445
431
661
681
641
552
690
719
602
474
...
515
637
576
620
572
512
599
455
622
538
20
523
569
784
725
713
501
740
772
638
640
...
589
775
664
686
726
672
747
548
723
645
21
487
465
553
639
517
449
592
609
454
398
...
492
567
534
404
554
417
561
466
498
492
22
479
449
574
686
583
377
566
614
563
455
...
453
539
491
501
596
520
722
478
565
501
23
483
386
476
526
550
426
492
585
536
482
...
322
541
438
456
487
408
502
426
474
481
24
523
551
658
767
537
444
663
731
576
577
...
522
590
525
664
691
548
635
526
641
538
25
652
656
738
753
853
508
752
815
669
576
...
694
833
693
606
575
616
704
559
728
672
26
578
577
744
856
699
497
779
800
733
587
...
630
754
704
834
760
680
765
592
731
629
27
554
494
665
689
630
574
695
703
636
599
...
554
685
532
658
649
554
693
577
634
668
28
498
552
659
784
552
492
690
775
544
551
...
567
636
518
599
742
521
733
533
605
604
29
513
491
563
642
477
367
589
647
516
484
...
428
574
504
548
553
483
540
407
547
455
30 rows × 30 columns
%timeit x.dot(y)
218 µs ± 18.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%timeit np.dot(x, y)
81.1 µs ± 2.85 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
执行相同运算,Numpy与Pandas的对比
x1 = np.array(x)
x1
y1 = np.array(y)
y1
%timeit x1.dot(y1)
22.1 µs ± 992 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
%timeit np.dot(x1, y1)
22.6 µs ± 766 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
%timeit np.dot(x.values, y.values)
42.9 µs ± 1.24 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
x2 = list(x1)
y2 = list(y1)
x3 = []
y3 = []
for i in x2:
res = []
for j in i:
res.append(int(j))
x3.append(res)
for i in y2:
res = []
for j in i:
res.append(int(j))
y3.append(res)
def f(x, y):
res = []
for i in range(len(x)):
row = []
for j in range(len(y[0])):
sum_row = 0
for k in range(len(x[0])):
sum_row += x[i][k]*y[k][j]
row.append(sum_row)
res.append(row)
return res
%timeit f(x3, y3)
4.29 ms ± 207 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
一般来说,纯粹的计算在Numpy里执行的更快
Numpy更侧重于计算,Pandas更侧重于数据处理
(3)广播运算
np.random.seed(42)
x = pd.DataFrame(np.random.randint(10, size=(3, 3)), columns=list("ABC"))
x
A
B
C
0
6
3
7
1
4
6
9
2
2
6
7
按行广播
x.iloc[0]
A 6
B 3
C 7
Name: 0, dtype: int32
x/x.iloc[0]
A
B
C
0
1.000000
1.0
1.000000
1
0.666667
2.0
1.285714
2
0.333333
2.0
1.000000
按列广播
x.A
0 6
1 4
2 2
Name: A, dtype: int32
x.div(x.A, axis=0) # add sub div mul
A
B
C
0
1.0
0.5
1.166667
1
1.0
1.5
2.250000
2
1.0
3.0
3.500000
x.div(x.iloc[0], axis=1)
A
B
C
0
1.000000
1.0
1.000000
1
0.666667
2.0
1.285714
2
0.333333
2.0
1.000000
3、新的用法
(1)索引对齐
A = pd.DataFrame(np.random.randint(0, 20, size=(2, 2)), columns=list("AB"))
A
A
B
0
3
7
1
2
1
B = pd.DataFrame(np.random.randint(0, 10, size=(3, 3)), columns=list("ABC"))
B
A 6.000000
B 7.666667
C 6.333333
D 7.750000
dtype: float64
data.fillna(value=fill)
A
B
C
D
0
1.0
7.666667
2.000000
3.0
1
6.0
4.000000
5.000000
6.0
2
7.0
8.000000
6.333333
9.0
3
10.0
11.000000
12.000000
13.0
fill = data.stack().mean()
fill
7.0
data.fillna(value=fill)
A
B
C
D
0
1.0
7.0
2.0
3.0
1
7.0
4.0
5.0
6.0
2
7.0
8.0
7.0
9.0
3
10.0
11.0
12.0
13.0
12.5 合并数据
构造一个生产DataFrame的函数
import pandas as pd
import numpy as np
def make_df(cols, ind):
"一个简单的DataFrame"
data = {c: [str(c)+str(i) for i in ind] for c in cols}
return pd.DataFrame(data, ind)
make_df("ABC", range(3))
暂无评论内容