# -*- coding: cp949 -*- from parse_base_v3 import parse_base_v3 class parse_album_v3(parse_base_v3): ''' 앨범 상세 정보 파싱 클래스 입력: XML 문서 출력: 파싱된 콘텐츠. 파이썬 기본 데이터형인 딕셔너리와 리스트로 XML 내용을 표현. ''' def __init__(self, parseText): super(parse_album_v3, self).__init__(parseText) self.__parse_response(self.document) def __parse_response(self, document): root = document.documentElement item = document.getElementsByTagName(u'item')[0] ### id, seq, title, shorttitle, longtitle, link, releasedate, thumnail, image, description ### NOTICE: 'thumbail'. The tag name has a typo. self.result[u'id'] = item.getAttribute(u'id') self.result[u'seq'] = item.getAttribute(u'seq') self.result[u'title'] = self._get_text_data(item, u'title') self.result[u'shorttitle'] = self._get_text_data(item, u'maniadb:shorttitle') self.result[u'longtitle'] = self._get_text_data(item, u'maniadb:longtitle') self.result[u'link'] = self._get_text_data(item, u'link') self.result[u'releasedate'] = self._get_text_data(item, u'releasedate') self.result[u'thumbnail'] = self._get_text_data(item, u'thumnail') self.result[u'image'] = self._get_text_data(item, u'image') self.result[u'description'] = self._get_text_data(item, u'description') ### artist info: id, name artist = document.getElementsByTagName(u'maniadb:artist')[0] self.result[u'artist'] = {} self.result[u'artist'][u'id'] = self._get_text_data(artist, u'id') self.result[u'artist'][u'name'] = self._get_text_data(artist, u'name') ### disc info self.result[u'disc'] = [] discs = document.getElementsByTagName(u'disc') for disc in discs: disc_buf = {} disc_buf[u'no'] = disc.getAttribute(u'no') disc_buf[u'title'] = self._get_text_data(disc, u'title') disc_buf[u'song'] = [] songs = disc.getElementsByTagName(u'song') trackno = 1 for song in songs: song_buf = {} tag_no = int(song.getAttribute(u'track')) if trackno > tag_no: continue # 중복된 트랙 자료가 넘어옴 song_buf[u'id'] = song.getAttribute(u'id') song_buf[u'track'] = song.getAttribute(u'track') song_buf[u'title'] = self._get_text_data(song, u'title') song_buf[u'runningtime'] = self._get_text_data(song, u'runningtime') song_buf[u'performer'] = self._get_text_data(song, u'performer') trackno += 1 disc_buf[u'song'].append(song_buf) self.result[u'disc'].append(disc_buf) ### product info product_infos = item.getElementsByTagName(u'maniadb:products')[0] products = product_infos.getElementsByTagName(u'product') self.result[u'product'] = [] for product in products: product_buf = {} ### seqno, releasedate, release product_buf[u'seqno'] = self._get_text_data(product, u'seqno') product_buf[u'releasedate'] = self._get_text_data(product, u'releasedate') product_buf[u'release'] = self._get_text_data(product, u'release') self.result[u'product'].append(product_buf) if __name__ == '__main__': with open('album_info_sample.xml', 'r') as f: xmldoc = f.read() pa = parse_album_v3(xmldoc) print pa.result