1
+ import argparse
2
+ import os .path
3
+ import numpy as np
4
+ import cv2 as cv
5
+
6
+ backends = (cv .dnn .DNN_BACKEND_DEFAULT , cv .dnn .DNN_BACKEND_INFERENCE_ENGINE , cv .dnn .DNN_BACKEND_OPENCV )
7
+ targets = (cv .dnn .DNN_TARGET_CPU , cv .dnn .DNN_TARGET_OPENCL , cv .dnn .DNN_TARGET_OPENCL_FP16 , cv .dnn .DNN_TARGET_MYRIAD , cv .dnn .DNN_TARGET_HDDL )
8
+ def preprocess (image ):
9
+ """
10
+ Create 4-dimensional blob from image and flip image
11
+ :param image: input image
12
+ """
13
+ image_rev = np .flip (image , axis = 1 )
14
+ input = cv .dnn .blobFromImages ([image , image_rev ], mean = (104.00698793 , 116.66876762 , 122.67891434 ))
15
+ return input
16
+
17
+ def run_net (input , model_path , backend , target ):
18
+ """
19
+ Read network and infer model
20
+ :param model_path: path to JPPNet model
21
+ :param backend: computation backend
22
+ :param target: computation device
23
+ """
24
+ net = cv .dnn .readNet (model_path )
25
+ net .setPreferableBackend (backend )
26
+ net .setPreferableTarget (target )
27
+ net .setInput (input )
28
+ out = net .forward ()
29
+ return out
30
+
31
+
32
+ def postprocess (out , input_shape ):
33
+ """
34
+ Create a grayscale human segmentation
35
+ :param out: network output
36
+ :param input_shape: input image width and height
37
+ """
38
+ # LIP classes
39
+ # 0 Background
40
+ # 1 Hat
41
+ # 2 Hair
42
+ # 3 Glove
43
+ # 4 Sunglasses
44
+ # 5 UpperClothes
45
+ # 6 Dress
46
+ # 7 Coat
47
+ # 8 Socks
48
+ # 9 Pants
49
+ # 10 Jumpsuits
50
+ # 11 Scarf
51
+ # 12 Skirt
52
+ # 13 Face
53
+ # 14 LeftArm
54
+ # 15 RightArm
55
+ # 16 LeftLeg
56
+ # 17 RightLeg
57
+ # 18 LeftShoe
58
+ # 19 RightShoe
59
+ head_output , tail_output = np .split (out , indices_or_sections = [1 ], axis = 0 )
60
+ head_output = head_output .squeeze (0 )
61
+ tail_output = tail_output .squeeze (0 )
62
+
63
+ head_output = np .stack ([cv .resize (img , dsize = input_shape ) for img in head_output [:, ...]])
64
+ tail_output = np .stack ([cv .resize (img , dsize = input_shape ) for img in tail_output [:, ...]])
65
+
66
+ tail_list = np .split (tail_output , indices_or_sections = list (range (1 , 20 )), axis = 0 )
67
+ tail_list = [arr .squeeze (0 ) for arr in tail_list ]
68
+ tail_list_rev = [tail_list [i ] for i in range (14 )]
69
+ tail_list_rev .extend ([tail_list [15 ], tail_list [14 ], tail_list [17 ], tail_list [16 ], tail_list [19 ], tail_list [18 ]])
70
+ tail_output_rev = np .stack (tail_list_rev , axis = 0 )
71
+ tail_output_rev = np .flip (tail_output_rev , axis = 2 )
72
+ raw_output_all = np .mean (np .stack ([head_output , tail_output_rev ], axis = 0 ), axis = 0 , keepdims = True )
73
+ raw_output_all = np .argmax (raw_output_all , axis = 1 )
74
+ raw_output_all = raw_output_all .transpose (1 , 2 , 0 )
75
+ return raw_output_all
76
+
77
+
78
+ def decode_labels (gray_image ):
79
+ """
80
+ Colorize image according to labels
81
+ :param gray_image: grayscale human segmentation result
82
+ """
83
+ height , width , _ = gray_image .shape
84
+ colors = [(0 , 0 , 0 ), (128 , 0 , 0 ), (255 , 0 , 0 ), (0 , 85 , 0 ), (170 , 0 , 51 ), (255 , 85 , 0 ),
85
+ (0 , 0 , 85 ), (0 , 119 , 221 ), (85 , 85 , 0 ), (0 , 85 , 85 ), (85 , 51 , 0 ), (52 , 86 , 128 ),
86
+ (0 , 128 , 0 ), (0 , 0 , 255 ), (51 , 170 , 221 ), (0 , 255 , 255 ),(85 , 255 , 170 ),
87
+ (170 , 255 , 85 ), (255 , 255 , 0 ), (255 , 170 , 0 )]
88
+
89
+ segm = np .stack ([colors [idx ] for idx in gray_image .flatten ()])
90
+ segm = segm .reshape (height , width , 3 ).astype (np .uint8 )
91
+ segm = cv .cvtColor (segm , cv .COLOR_BGR2RGB )
92
+ return segm
93
+
94
+
95
+ def parse_human (image , model_path , backend = cv .dnn .DNN_BACKEND_OPENCV , target = cv .dnn .DNN_TARGET_CPU ):
96
+ """
97
+ Prepare input for execution, run net and postprocess output to parse human.
98
+ :param image: input image
99
+ :param model_path: path to JPPNet model
100
+ :param backend: name of computation backend
101
+ :param target: name of computation target
102
+ """
103
+ input = preprocess (image )
104
+ input_h , input_w = input .shape [2 :]
105
+ output = run_net (input , model_path , backend , target )
106
+ grayscale_out = postprocess (output , (input_w , input_h ))
107
+ segmentation = decode_labels (grayscale_out )
108
+ return segmentation
109
+
110
+ if __name__ == '__main__' :
111
+ parser = argparse .ArgumentParser (description = 'Use this script to run human parsing using JPPNet' , formatter_class = argparse .ArgumentDefaultsHelpFormatter )
112
+ parser .add_argument ('--input' , '-i' , required = True , help = 'Path to input image.' )
113
+ parser .add_argument ('--model' , '-m' , default = 'lip_jppnet_384.pb' , help = 'Path to pb model.' )
114
+ parser .add_argument ('--backend' , choices = backends , default = cv .dnn .DNN_BACKEND_DEFAULT , type = int ,
115
+ help = "Choose one of computation backends: "
116
+ "%d: automatically (by default), "
117
+ "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
118
+ "%d: OpenCV implementation" % backends )
119
+ parser .add_argument ('--target' , choices = targets , default = cv .dnn .DNN_TARGET_CPU , type = int ,
120
+ help = 'Choose one of target computation devices: '
121
+ '%d: CPU target (by default), '
122
+ '%d: OpenCL, '
123
+ '%d: OpenCL fp16 (half-float precision), '
124
+ '%d: NCS2 VPU, '
125
+ '%d: HDDL VPU' % targets )
126
+ args , _ = parser .parse_known_args ()
127
+
128
+ if not os .path .isfile (args .model ):
129
+ raise OSError ("Model not exist" )
130
+
131
+ image = cv .imread (args .input )
132
+ output = parse_human (image , args .model , args .backend , args .target )
133
+ winName = 'Deep learning human parsing in OpenCV'
134
+ cv .namedWindow (winName , cv .WINDOW_AUTOSIZE )
135
+ cv .imshow (winName , output )
136
+ cv .waitKey ()
0 commit comments