Raspberry Pi : Object Tracking : YOLO : tracking.py

tracking.py の変数の中身

作成: 2021-05-18
更新: 2021-05-18

#!/usr/bin/env python ##### モデル tiny YOLO の設定 ################# from core.utils import load_class_names, load_image, draw_boxes, draw_boxes_frame from core.yolo_tiny import YOLOv3_tiny from core.yolo import YOLOv3 # object のクラス class_names, n_classes = load_class_names() print( 'len(class_names) : ' ) print( len(class_names) ) print( 'class_names : ' ) print( class_names ) iou_threshold = 0.1 confidence_threshold = 0.25 model = YOLOv3_tiny(n_classes=n_classes, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) import tensorflow as tf inputs = tf.placeholder(tf.float32, [1, *model.input_size, 3]) detections = model(inputs) saver = tf.train.Saver(tf.global_variables(scope=model.scope)) ##### トラッキング対象 ################# target_class = 'sports ball' # 配列 class_names における target object の番号 target_n を求める for n in range(len(class_names)): if class_names[n] == target_class: target_n = n break ##### exit プロセス ################# import cv2 import sys def destroy(): #カメラキャプチャを停止 cap.release() #ストリーミングウインドを閉じる cv2.destroyAllWindows() #プログラムを終了 sys.exit() ##### START ########################## with tf.Session() as sess: # モデル tiny YOLO の読み込み saver.restore(sess, './weights/model-tiny.ckpt') # カメラ映像の読み込み # カメラの番号：0 cap = cv2.VideoCapture(0) while True: ret, frame = cap.read() # ヨコ,タテ frame_size = (frame.shape[1], frame.shape[0]) print( 'frame_size : ') print( frame_size ) resized_frame = cv2.resize(frame, dsize=tuple((x) for x in model.input_size[::-1]), interpolation=cv2.INTER_NEAREST) result = sess.run(detections, feed_dict={inputs: [resized_frame]}) print( 'resized_frame : ' ) print( resized_frame ) #カメラ映像の表示 draw_boxes_frame(frame, frame_size, result, class_names, model.input_size) cv2.imshow('frame', frame) #### target_class の検出 #### boxes_dict = result[0] resize_factor = (frame_size[0] / model.input_size[1], frame_size[1] / model.input_size[0]) print( 'resize_factor : ' ) print( resize_factor ) boxes = boxes_dict[target_n] if( len(boxes) != 0): print("\nターゲット発見") print( 'boxes : ' ) print( boxes ) for box in boxes: print( 'box : ' ) print( box ) coordinates = box[:4] coordinates = [int(coordinates[i] * resize_factor[i % 2]) for i in range(4)] # 残りの box は取り上げない break # ＜for box＞ループ　ここまで # 画像ウィンドウをアクティブにして 'q' キーを押すと, break if cv2.waitKey(10) & 0xFF == ord('q'): destroy()

len(class_names) : 80 class_names : ['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] frame_size : (640, 480) resized_frame : [[[ 47 49 35] [ 44 46 32] [ 33 36 25] ... [ 31 36 54] [ 30 33 52] [ 27 30 50]] [[ 41 43 29] [ 38 40 26] [ 21 24 13] ... [ 28 33 51] [ 28 31 50] [ 28 31 51]] [[ 33 36 25] [ 27 30 19] [ 15 18 7] ... [ 25 30 48] [ 29 32 51] [ 30 33 53]] ... [[143 131 135] [142 130 134] [143 131 135] ... [166 154 157] [166 154 157] [166 154 158]] [[142 130 134] [142 130 134] [143 131 135] ... [165 153 156] [165 153 156] [164 152 156]] [[140 128 132] [141 129 133] [142 130 134] ... [165 153 156] [165 153 156] [167 153 156]]] resize_factor : (1.5384615384615385, 1.1538461538461537) ターゲット発見 boxes : [[209.06332 220.62724 233.29825 246.30476 0.5513591]] box : [209.06332 220.62724 233.29825 246.30476 0.5513591]