#!/usr/bin/python
# -*- coding: utf-8 -*-

import os
import sys

def parse_argv(argv):
    if len(argv) != 2:
        print "Usage: detect_sponsor.py <input_file>"
        sys.exit(1)

    return argv[1]

def detect_sponsor(file_name):
    # 영상 파일의 1분 20초부터 5분 00초까지
    # 매초 3장의 스크린샷을 추출한다.
    cmd = 'ffmpeg -ss 00:01:20 -t 00:03:40 -i \"%s\" -f image2 -r 0.33 -vcodec png img/image_%%03d.png' % file_name
    os.system(cmd)

    # 추출된 파일의 threshold를 대략 95% 정도 주어서 이진화시킨다.
    for file in os.listdir('./img'):
        file_path = './img/' + file
        conv_cmd = 'convert -threshold 95%% %s ./mono.png' % file_path
        os.system(conv_cmd)

        # 이진화된 영상에 tasseract 일본어를 적용시킨다.
        ocr_cmd = 'tesseract ./mono.png result -l jpn'
        os.system(ocr_cmd)

        with open('result.txt', 'r') as f:
            first_line = f.readline()
            if first_line.strip() == '提 供':
                print file_path, 'has sponsor text!'
                break

    for file in os.listdir('./img'):
        file_path = './img/' + file
        os.remove(file_path)
    
def main(argv = None):
    file_name = parse_argv(argv)
    detect_sponsor(file_name)

if __name__ == '__main__':
    sys.exit(main(sys.argv))