{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "#### 摩尔斯编解码字典" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "morse_dict = {\n", " 'A': '·-',\n", " 'B': '-···',\n", " 'C': '-·-·',\n", " 'D': '-··',\n", " 'E': '·',\n", " 'F': '··-·',\n", " 'G': '--·',\n", " 'H': '····',\n", " 'I': '··',\n", " 'J': '·---',\n", " 'K': '-·-',\n", " 'L': '·-··',\n", " 'M': '--',\n", " 'N': '-·',\n", " 'O': '---',\n", " 'P': '·--·',\n", " 'Q': '--·-',\n", " 'R': '·-·',\n", " 'S': '···',\n", " 'T': '-',\n", " 'U': '··-',\n", " 'V': '···-',\n", " 'W': '·--',\n", " 'X': '-··-',\n", " 'Y': '-·--',\n", " 'Z': '--··',\n", " '1': '·----',\n", " '2': '··---',\n", " '3': '···--',\n", " '4': '····-',\n", " '5': '·····',\n", " '6': '-····',\n", " '7': '--···',\n", " '8': '---··',\n", " '9': '----·',\n", " '0': '-----',\n", " '.': '·-·-·-',\n", " ':': '---···',\n", " ',': '--··--',\n", " ';': '-·-·-·',\n", " '?': '··--··',\n", " '=': '-···-',\n", " \"'\": '·----·',\n", " '/': '-··-·',\n", " '!': '-·-·--',\n", " '-': '-····-',\n", " '_': '··--·-',\n", " '\"': '·-··-·',\n", " '(': '-·--·',\n", " ')': '-·--·-',\n", " '$': '···-··-',\n", " '&': '·-···',\n", " '@': '·--·-·',\n", " '+': '·-·-·'\n", "}\n", "morse_dict_r = {\n", " '·-': 'A',\n", " '-···': 'B',\n", " '-·-·': 'C',\n", " '-··': 'D',\n", " '·': 'E',\n", " '··-·': 'F',\n", " '--·': 'G',\n", " '····': 'H',\n", " '··': 'I',\n", " '·---': 'J',\n", " '-·-': 'K',\n", " '·-··': 'L',\n", " '--': 'M',\n", " '-·': 'N',\n", " '---': 'O',\n", " '·--·': 'P',\n", " '--·-': 'Q',\n", " '·-·': 'R',\n", " '···': 'S',\n", " '-': 'T',\n", " '··-': 'U',\n", " '···-': 'V',\n", " '·--': 'W',\n", " '-··-': 'X',\n", " '-·--': 'Y',\n", " '--··': 'Z',\n", " '·----': '1',\n", " '··---': '2',\n", " '···--': '3',\n", " '····-': '4',\n", " '·····': '5',\n", " '-····': '6',\n", " '--···': '7',\n", " '---··': '8',\n", " '----·': '9',\n", " '-----': '0',\n", " '·-·-·-': '.',\n", " '---···': ':',\n", " '--··--': ',',\n", " '-·-·-·': ';',\n", " '··--··': '?',\n", " '-···-': '=',\n", " '·----·': \"'\",\n", " '-··-·': '/',\n", " '-·-·--': '!',\n", " '-····-': '-',\n", " '··--·-': '_',\n", " '·-··-·': '\"',\n", " '-·--·': '(',\n", " '-·--·-': ')',\n", " '···-··-': '$',\n", " '·-···': '&',\n", " '·--·-·': '@',\n", " '·-·-·': '+'\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 莫尔斯编解码类实现" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "class morseCodec:\n", " \"\"\"Morse Encoding and Decoding\n", " \"\"\"\n", " def __init__(self):\n", " self.morse_dict = {\n", " 'A': '·-',\n", " 'B': '-···',\n", " 'C': '-·-·',\n", " 'D': '-··',\n", " 'E': '·',\n", " 'F': '··-·',\n", " 'G': '--·',\n", " 'H': '····',\n", " 'I': '··',\n", " 'J': '·---',\n", " 'K': '-·-',\n", " 'L': '·-··',\n", " 'M': '--',\n", " 'N': '-·',\n", " 'O': '---',\n", " 'P': '·--·',\n", " 'Q': '--·-',\n", " 'R': '·-·',\n", " 'S': '···',\n", " 'T': '-',\n", " 'U': '··-',\n", " 'V': '···-',\n", " 'W': '·--',\n", " 'X': '-··-',\n", " 'Y': '-·--',\n", " 'Z': '--··',\n", " '1': '·----',\n", " '2': '··---',\n", " '3': '···--',\n", " '4': '····-',\n", " '5': '·····',\n", " '6': '-····',\n", " '7': '--···',\n", " '8': '---··',\n", " '9': '----·',\n", " '0': '-----',\n", " '.': '·-·-·-',\n", " ':': '---···',\n", " ',': '--··--',\n", " ';': '-·-·-·',\n", " '?': '··--··',\n", " '=': '-···-',\n", " \"'\": '·----·',\n", " '/': '-··-·',\n", " '!': '-·-·--',\n", " '-': '-····-',\n", " '_': '··--·-',\n", " '\"': '·-··-·',\n", " '(': '-·--·',\n", " ')': '-·--·-',\n", " '$': '···-··-',\n", " '&': '·-···',\n", " '@': '·--·-·',\n", " '+': '·-·-·'\n", " }\n", " self.morse_dict_r = {\n", " '·-': 'A',\n", " '-···': 'B',\n", " '-·-·': 'C',\n", " '-··': 'D',\n", " '·': 'E',\n", " '··-·': 'F',\n", " '--·': 'G',\n", " '····': 'H',\n", " '··': 'I',\n", " '·---': 'J',\n", " '-·-': 'K',\n", " '·-··': 'L',\n", " '--': 'M',\n", " '-·': 'N',\n", " '---': 'O',\n", " '·--·': 'P',\n", " '--·-': 'Q',\n", " '·-·': 'R',\n", " '···': 'S',\n", " '-': 'T',\n", " '··-': 'U',\n", " '···-': 'V',\n", " '·--': 'W',\n", " '-··-': 'X',\n", " '-·--': 'Y',\n", " '--··': 'Z',\n", " '·----': '1',\n", " '··---': '2',\n", " '···--': '3',\n", " '····-': '4',\n", " '·····': '5',\n", " '-····': '6',\n", " '--···': '7',\n", " '---··': '8',\n", " '----·': '9',\n", " '-----': '0',\n", " '·-·-·-': '.',\n", " '---···': ':',\n", " '--··--': ',',\n", " '-·-·-·': ';',\n", " '··--··': '?',\n", " '-···-': '=',\n", " '·----·': \"'\",\n", " '-··-·': '/',\n", " '-·-·--': '!',\n", " '-····-': '-',\n", " '··--·-': '_',\n", " '·-··-·': '\"',\n", " '-·--·': '(',\n", " '-·--·-': ')',\n", " '···-··-': '$',\n", " '·-···': '&',\n", " '·--·-·': '@',\n", " '·-·-·': '+'\n", " }\n", " \n", " def encode(self, src_text):\n", " result = ''\n", " src_text = src_text.upper()\n", " for single_char in src_text:\n", " result += self.morse_dict.get(single_char, '\\\\') + ' '\n", " return result\n", "\n", " def decode(self, morse_txt):\n", " result = ''\n", " for seg in morse_txt.split():\n", " result += self.morse_dict_r.get(seg, ' ')\n", " return result" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 平滑莫尔斯编码编解码类(未完成)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "class smoothMorseCodec(morseCodec):\n", " def __init__(self):\n", " morseCodec.__init__(self)\n", " pass\n", " \n", " def encode(self, src_text):\n", " result = ''\n", " src_text = src_text.upper()\n", " for single_char in src_text:\n", " result += self.morse_dict.get(single_char, '\\\\')\n", " return result\n", " \n", " def decode(self, morse_txt):\n", " result = ''\n", " for seg in morse_txt.split('\\\\'):\n", " result += self.morse_dict_r.get(seg, ' ')\n", " return result" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'···-··---···-·-·-----··-'" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "my_smooth_morse = smoothMorseCodec()\n", "my_smooth_morse.encode('I love you').replace('\\\\', '')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 递归方式实现平滑莫尔斯电码片段的解码" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "max_moorse_len = max([len(k) for k in morse_dict_r])\n", "\n", "def guess_morse(morse_seg, cur_morse, cur_start, result):\n", " for end in range(cur_start + 1, min(cur_start + max_moorse_len + 1, len(morse_seg) + 1)):\n", " single_char = morse_dict_r.get(morse_seg[cur_start:end], None) #对切片尝试进行解码\n", " if single_char:\n", " if end == len(morse_seg): #解码至串末尾递归结束\n", " result.append(cur_morse + single_char)\n", " break\n", " else:\n", " guess_morse(morse_seg, cur_morse + single_char, end, result) #递归解码剩余部分\n", " if cur_start == 0:\n", " return result" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "result = guess_morse('·--··-······-·-·--··----·', '', 0, [])" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8023840" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(result)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 用词典进行筛选" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "word_set = {}\n", "num = 0\n", "with open('google-10000-english.txt') as f:\n", " for word in f.read().split():\n", " num += 1\n", " word_set[word.upper()] = num" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{1, 2, 3}" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "set([1,2,2,3])" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "466551" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(word_set)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### 四种方式实现筛选" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PRESENTATION 2161\n" ] } ], "source": [ "for word in result: #循环遍历方式\n", " if word in word_set:\n", " print(word, word_set[word])" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "result = [(3213,'love'), (2420,'hello')]" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(2420, 'hello'), (3213, 'love')]" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sorted(result)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['AUGER', 'AUGITE', 'RAGER', 'LTZEN', 'LOIR', 'LOVE']" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[word for word in result if word in word_set] #列表推导" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['AUGER', 'AUGITE', 'RAGER', 'LTZEN', 'LOIR', 'LOVE']" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(filter(lambda x: x in word_set, result)) #利用filter()函数" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'AUGER', 'AUGITE', 'LOIR', 'LOVE', 'LTZEN', 'RAGER'}" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "set(result) & word_set #利用集合运算" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['ETEETTTEEETE',\n", " 'ETEETTTEEEN',\n", " 'ETEETTTEEAE',\n", " 'ETEETTTEER',\n", " 'ETEETTTEITE',\n", " 'ETEETTTEIN',\n", " 'ETEETTTEUE',\n", " 'ETEETTTEF',\n", " 'ETEETTTIETE',\n", " 'ETEETTTIEN']" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result[:10]" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [], "source": [ "all_word_dict = {}\n", "my_smooth_morse = smoothMorseCodec()\n", "\n", "with open('words.txt') as f:\n", " for word in f.read().split():\n", " all_word_dict[my_smooth_morse.encode(word)] = word.upper()" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "def guess_morse_new(morse_seg):\n", " if morse_seg in all_word_dict:\n", " print(all_word_dict[morse_seg])\n", " else:\n", " print('N/A')" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PRESENTATION\n" ] } ], "source": [ "guess_morse_new('·--··-······-·-·--··----·')" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [], "source": [ "word_dict = {}\n", "with open('google-10000-english.txt') as f:\n", " for word in f.read().split()[:3000]:\n", " word_dict[my_smooth_morse.encode(word)] = word" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### 添加缺失单词" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [], "source": [ "word_dict[my_smooth_morse.encode('i')] = 'i'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 单词级递归解码" ] }, { "cell_type": "code", "execution_count": 112, "metadata": {}, "outputs": [], "source": [ "max_moorse_len = max([len(k) for k in word_dict])\n", "min_moorse_len = min([len(k) for k in word_dict])\n", "\n", "def guess_morse(morse_seg, cur_morse, cur_start, result, depth):\n", " for end in range(cur_start + min_moorse_len, min(cur_start + max_moorse_len + 1, len(morse_seg) + 1)):\n", " single_word = word_dict.get(morse_seg[cur_start:end], None) #对切片尝试进行解码\n", " if single_word:\n", " if end == len(morse_seg): #解码至串末尾递归结束\n", " result.append(cur_morse + ' ' + single_word)\n", " break\n", " elif depth <= 7:\n", " guess_morse(morse_seg, cur_morse + ' ' + single_word, end, result, depth + 1) #递归解码剩余部分\n", " if cur_start == 0:\n", " return result" ] }, { "cell_type": "code", "execution_count": 113, "metadata": {}, "outputs": [], "source": [ "result = guess_morse('···-··---···-·-·-----··-', '', 0, [], 0)" ] }, { "cell_type": "code", "execution_count": 114, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "272337" ] }, "execution_count": 114, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(result)" ] }, { "cell_type": "code", "execution_count": 115, "metadata": {}, "outputs": [], "source": [ "result_ = []\n", "for item in result:\n", " result_.append((len(item.split()), item))" ] }, { "cell_type": "code", "execution_count": 116, "metadata": {}, "outputs": [], "source": [ "result_ = sorted(result_)" ] }, { "cell_type": "code", "execution_count": 117, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 117, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'I' in word_dict.values()" ] }, { "cell_type": "code", "execution_count": 118, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(3, ' i love you'),\n", " (4, ' e e love you'),\n", " (4, ' e fat dr you'),\n", " (4, ' eu eat dr you'),\n", " (4, ' eu u msn you'),\n", " (4, ' i aim dr you'),\n", " (4, ' i au msn you'),\n", " (4, ' i aug f you'),\n", " (4, ' i los co ga'),\n", " (4, ' i los com u'),\n", " (4, ' i los come et'),\n", " (4, ' i los n you'),\n", " (4, ' i lost e you'),\n", " (4, ' i lost rom u'),\n", " (4, ' i love km ga'),\n", " (4, ' i love not tea'),\n", " (4, ' i love two u'),\n", " (4, ' ie name f you'),\n", " (4, ' ie nj scott u'),\n", " (4, ' ie tea msn you'),\n", " (4, ' ie teams co ga'),\n", " (4, ' ie teams com u'),\n", " (4, ' ie teams come et'),\n", " (4, ' ie teams n you'),\n", " (4, ' ie tim dr you'),\n", " (4, ' sd m dr you'),\n", " (4, ' sd md en you'),\n", " (4, ' sd mt scott u'),\n", " (4, ' sd os co ga'),\n", " (4, ' sd os com u')]" ] }, "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result_[:30]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 4 }