{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 摩尔斯编解码字典"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "morse_dict = {\n",
    "    'A': '·-',\n",
    "    'B': '-···',\n",
    "    'C': '-·-·',\n",
    "    'D': '-··',\n",
    "    'E': '·',\n",
    "    'F': '··-·',\n",
    "    'G': '--·',\n",
    "    'H': '····',\n",
    "    'I': '··',\n",
    "    'J': '·---',\n",
    "    'K': '-·-',\n",
    "    'L': '·-··',\n",
    "    'M': '--',\n",
    "    'N': '-·',\n",
    "    'O': '---',\n",
    "    'P': '·--·',\n",
    "    'Q': '--·-',\n",
    "    'R': '·-·',\n",
    "    'S': '···',\n",
    "    'T': '-',\n",
    "    'U': '··-',\n",
    "    'V': '···-',\n",
    "    'W': '·--',\n",
    "    'X': '-··-',\n",
    "    'Y': '-·--',\n",
    "    'Z': '--··',\n",
    "    '1': '·----',\n",
    "    '2': '··---',\n",
    "    '3': '···--',\n",
    "    '4': '····-',\n",
    "    '5': '·····',\n",
    "    '6': '-····',\n",
    "    '7': '--···',\n",
    "    '8': '---··',\n",
    "    '9': '----·',\n",
    "    '0': '-----',\n",
    "    '.': '·-·-·-',\n",
    "    ':': '---···',\n",
    "    ',': '--··--',\n",
    "    ';': '-·-·-·',\n",
    "    '?': '··--··',\n",
    "    '=': '-···-',\n",
    "    \"'\": '·----·',\n",
    "    '/': '-··-·',\n",
    "    '!': '-·-·--',\n",
    "    '-': '-····-',\n",
    "    '_': '··--·-',\n",
    "    '\"': '·-··-·',\n",
    "    '(': '-·--·',\n",
    "    ')': '-·--·-',\n",
    "    '$': '···-··-',\n",
    "    '&': '·-···',\n",
    "    '@': '·--·-·',\n",
    "    '+': '·-·-·'\n",
    "}\n",
    "morse_dict_r = {\n",
    "    '·-': 'A',\n",
    "    '-···': 'B',\n",
    "    '-·-·': 'C',\n",
    "    '-··': 'D',\n",
    "    '·': 'E',\n",
    "    '··-·': 'F',\n",
    "    '--·': 'G',\n",
    "    '····': 'H',\n",
    "    '··': 'I',\n",
    "    '·---': 'J',\n",
    "    '-·-': 'K',\n",
    "    '·-··': 'L',\n",
    "    '--': 'M',\n",
    "    '-·': 'N',\n",
    "    '---': 'O',\n",
    "    '·--·': 'P',\n",
    "    '--·-': 'Q',\n",
    "    '·-·': 'R',\n",
    "    '···': 'S',\n",
    "    '-': 'T',\n",
    "    '··-': 'U',\n",
    "    '···-': 'V',\n",
    "    '·--': 'W',\n",
    "    '-··-': 'X',\n",
    "    '-·--': 'Y',\n",
    "    '--··': 'Z',\n",
    "    '·----': '1',\n",
    "    '··---': '2',\n",
    "    '···--': '3',\n",
    "    '····-': '4',\n",
    "    '·····': '5',\n",
    "    '-····': '6',\n",
    "    '--···': '7',\n",
    "    '---··': '8',\n",
    "    '----·': '9',\n",
    "    '-----': '0',\n",
    "    '·-·-·-': '.',\n",
    "    '---···': ':',\n",
    "    '--··--': ',',\n",
    "    '-·-·-·': ';',\n",
    "    '··--··': '?',\n",
    "    '-···-': '=',\n",
    "    '·----·': \"'\",\n",
    "    '-··-·': '/',\n",
    "    '-·-·--': '!',\n",
    "    '-····-': '-',\n",
    "    '··--·-': '_',\n",
    "    '·-··-·': '\"',\n",
    "    '-·--·': '(',\n",
    "    '-·--·-': ')',\n",
    "    '···-··-': '$',\n",
    "    '·-···': '&',\n",
    "    '·--·-·': '@',\n",
    "    '·-·-·': '+'\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 莫尔斯编解码类实现"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "class morseCodec:\n",
    "    \"\"\"Morse Encoding and Decoding\n",
    "    \"\"\"\n",
    "    def __init__(self):\n",
    "        self.morse_dict = {\n",
    "            'A': '·-',\n",
    "            'B': '-···',\n",
    "            'C': '-·-·',\n",
    "            'D': '-··',\n",
    "            'E': '·',\n",
    "            'F': '··-·',\n",
    "            'G': '--·',\n",
    "            'H': '····',\n",
    "            'I': '··',\n",
    "            'J': '·---',\n",
    "            'K': '-·-',\n",
    "            'L': '·-··',\n",
    "            'M': '--',\n",
    "            'N': '-·',\n",
    "            'O': '---',\n",
    "            'P': '·--·',\n",
    "            'Q': '--·-',\n",
    "            'R': '·-·',\n",
    "            'S': '···',\n",
    "            'T': '-',\n",
    "            'U': '··-',\n",
    "            'V': '···-',\n",
    "            'W': '·--',\n",
    "            'X': '-··-',\n",
    "            'Y': '-·--',\n",
    "            'Z': '--··',\n",
    "            '1': '·----',\n",
    "            '2': '··---',\n",
    "            '3': '···--',\n",
    "            '4': '····-',\n",
    "            '5': '·····',\n",
    "            '6': '-····',\n",
    "            '7': '--···',\n",
    "            '8': '---··',\n",
    "            '9': '----·',\n",
    "            '0': '-----',\n",
    "            '.': '·-·-·-',\n",
    "            ':': '---···',\n",
    "            ',': '--··--',\n",
    "            ';': '-·-·-·',\n",
    "            '?': '··--··',\n",
    "            '=': '-···-',\n",
    "            \"'\": '·----·',\n",
    "            '/': '-··-·',\n",
    "            '!': '-·-·--',\n",
    "            '-': '-····-',\n",
    "            '_': '··--·-',\n",
    "            '\"': '·-··-·',\n",
    "            '(': '-·--·',\n",
    "            ')': '-·--·-',\n",
    "            '$': '···-··-',\n",
    "            '&': '·-···',\n",
    "            '@': '·--·-·',\n",
    "            '+': '·-·-·'\n",
    "        }\n",
    "        self.morse_dict_r = {\n",
    "            '·-': 'A',\n",
    "            '-···': 'B',\n",
    "            '-·-·': 'C',\n",
    "            '-··': 'D',\n",
    "            '·': 'E',\n",
    "            '··-·': 'F',\n",
    "            '--·': 'G',\n",
    "            '····': 'H',\n",
    "            '··': 'I',\n",
    "            '·---': 'J',\n",
    "            '-·-': 'K',\n",
    "            '·-··': 'L',\n",
    "            '--': 'M',\n",
    "            '-·': 'N',\n",
    "            '---': 'O',\n",
    "            '·--·': 'P',\n",
    "            '--·-': 'Q',\n",
    "            '·-·': 'R',\n",
    "            '···': 'S',\n",
    "            '-': 'T',\n",
    "            '··-': 'U',\n",
    "            '···-': 'V',\n",
    "            '·--': 'W',\n",
    "            '-··-': 'X',\n",
    "            '-·--': 'Y',\n",
    "            '--··': 'Z',\n",
    "            '·----': '1',\n",
    "            '··---': '2',\n",
    "            '···--': '3',\n",
    "            '····-': '4',\n",
    "            '·····': '5',\n",
    "            '-····': '6',\n",
    "            '--···': '7',\n",
    "            '---··': '8',\n",
    "            '----·': '9',\n",
    "            '-----': '0',\n",
    "            '·-·-·-': '.',\n",
    "            '---···': ':',\n",
    "            '--··--': ',',\n",
    "            '-·-·-·': ';',\n",
    "            '··--··': '?',\n",
    "            '-···-': '=',\n",
    "            '·----·': \"'\",\n",
    "            '-··-·': '/',\n",
    "            '-·-·--': '!',\n",
    "            '-····-': '-',\n",
    "            '··--·-': '_',\n",
    "            '·-··-·': '\"',\n",
    "            '-·--·': '(',\n",
    "            '-·--·-': ')',\n",
    "            '···-··-': '$',\n",
    "            '·-···': '&',\n",
    "            '·--·-·': '@',\n",
    "            '·-·-·': '+'\n",
    "        }\n",
    "        \n",
    "    def encode(self, src_text):\n",
    "        result = ''\n",
    "        src_text = src_text.upper()\n",
    "        for single_char in src_text:\n",
    "            result += self.morse_dict.get(single_char, '\\\\') + ' '\n",
    "        return result\n",
    "\n",
    "    def decode(self, morse_txt):\n",
    "        result = ''\n",
    "        for seg in morse_txt.split():\n",
    "            result += self.morse_dict_r.get(seg, ' ')\n",
    "        return result"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 平滑莫尔斯编码编解码类(未完成)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "class smoothMorseCodec(morseCodec):\n",
    "    def __init__(self):\n",
    "        morseCodec.__init__(self)\n",
    "        pass\n",
    "    \n",
    "    def encode(self, src_text):\n",
    "        result = ''\n",
    "        src_text = src_text.upper()\n",
    "        for single_char in src_text:\n",
    "            result += self.morse_dict.get(single_char, '\\\\')\n",
    "        return result\n",
    "    \n",
    "    def decode(self, morse_txt):\n",
    "        result = ''\n",
    "        for seg in morse_txt.split('\\\\'):\n",
    "            result += self.morse_dict_r.get(seg, ' ')\n",
    "        return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'···-··---···-·-·-----··-'"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "my_smooth_morse = smoothMorseCodec()\n",
    "my_smooth_morse.encode('I love you').replace('\\\\', '')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 递归方式实现平滑莫尔斯电码片段的解码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "max_moorse_len = max([len(k) for k in morse_dict_r])\n",
    "\n",
    "def guess_morse(morse_seg, cur_morse, cur_start, result):\n",
    "    for end in range(cur_start + 1, min(cur_start + max_moorse_len + 1, len(morse_seg) + 1)):\n",
    "        single_char = morse_dict_r.get(morse_seg[cur_start:end], None) #对切片尝试进行解码\n",
    "        if single_char:\n",
    "            if end == len(morse_seg):  #解码至串末尾递归结束\n",
    "                result.append(cur_morse + single_char)\n",
    "                break\n",
    "            else:\n",
    "                guess_morse(morse_seg, cur_morse + single_char, end, result) #递归解码剩余部分\n",
    "    if cur_start == 0:\n",
    "        return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = guess_morse('·--··-······-·-·--··----·', '', 0, [])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8023840"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(result)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 用词典进行筛选"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "word_set = {}\n",
    "num = 0\n",
    "with open('google-10000-english.txt') as f:\n",
    "    for word in f.read().split():\n",
    "        num += 1\n",
    "        word_set[word.upper()] = num"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{1, 2, 3}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set([1,2,2,3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "466551"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(word_set)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### 四种方式实现筛选"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PRESENTATION 2161\n"
     ]
    }
   ],
   "source": [
    "for word in result: #循环遍历方式\n",
    "    if word in word_set:\n",
    "        print(word, word_set[word])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = [(3213,'love'), (2420,'hello')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(2420, 'hello'), (3213, 'love')]"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sorted(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['AUGER', 'AUGITE', 'RAGER', 'LTZEN', 'LOIR', 'LOVE']"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[word for word in result if word in word_set]  #列表推导"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['AUGER', 'AUGITE', 'RAGER', 'LTZEN', 'LOIR', 'LOVE']"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(filter(lambda x: x in word_set, result))  #利用filter()函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'AUGER', 'AUGITE', 'LOIR', 'LOVE', 'LTZEN', 'RAGER'}"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(result) & word_set   #利用集合运算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['ETEETTTEEETE',\n",
       " 'ETEETTTEEEN',\n",
       " 'ETEETTTEEAE',\n",
       " 'ETEETTTEER',\n",
       " 'ETEETTTEITE',\n",
       " 'ETEETTTEIN',\n",
       " 'ETEETTTEUE',\n",
       " 'ETEETTTEF',\n",
       " 'ETEETTTIETE',\n",
       " 'ETEETTTIEN']"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_word_dict = {}\n",
    "my_smooth_morse = smoothMorseCodec()\n",
    "\n",
    "with open('words.txt') as f:\n",
    "    for word in f.read().split():\n",
    "        all_word_dict[my_smooth_morse.encode(word)] = word.upper()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "def guess_morse_new(morse_seg):\n",
    "    if morse_seg in all_word_dict:\n",
    "        print(all_word_dict[morse_seg])\n",
    "    else:\n",
    "        print('N/A')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PRESENTATION\n"
     ]
    }
   ],
   "source": [
    "guess_morse_new('·--··-······-·-·--··----·')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [],
   "source": [
    "word_dict = {}\n",
    "with open('google-10000-english.txt') as f:\n",
    "    for word in f.read().split()[:3000]:\n",
    "        word_dict[my_smooth_morse.encode(word)] = word"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### 添加缺失单词"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [],
   "source": [
    "word_dict[my_smooth_morse.encode('i')] = 'i'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 单词级递归解码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [],
   "source": [
    "max_moorse_len = max([len(k) for k in word_dict])\n",
    "min_moorse_len = min([len(k) for k in word_dict])\n",
    "\n",
    "def guess_morse(morse_seg, cur_morse, cur_start, result, depth):\n",
    "    for end in range(cur_start + min_moorse_len, min(cur_start + max_moorse_len + 1, len(morse_seg) + 1)):\n",
    "        single_word = word_dict.get(morse_seg[cur_start:end], None) #对切片尝试进行解码\n",
    "        if single_word:\n",
    "            if end == len(morse_seg):  #解码至串末尾递归结束\n",
    "                result.append(cur_morse + ' ' + single_word)\n",
    "                break\n",
    "            elif depth <= 7:\n",
    "                guess_morse(morse_seg, cur_morse + ' ' + single_word, end, result, depth + 1) #递归解码剩余部分\n",
    "    if cur_start == 0:\n",
    "        return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = guess_morse('···-··---···-·-·-----··-', '', 0, [], 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "272337"
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [],
   "source": [
    "result_ = []\n",
    "for item in result:\n",
    "    result_.append((len(item.split()), item))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [],
   "source": [
    "result_ = sorted(result_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 117,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'I' in word_dict.values()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(3, ' i love you'),\n",
       " (4, ' e e love you'),\n",
       " (4, ' e fat dr you'),\n",
       " (4, ' eu eat dr you'),\n",
       " (4, ' eu u msn you'),\n",
       " (4, ' i aim dr you'),\n",
       " (4, ' i au msn you'),\n",
       " (4, ' i aug f you'),\n",
       " (4, ' i los co ga'),\n",
       " (4, ' i los com u'),\n",
       " (4, ' i los come et'),\n",
       " (4, ' i los n you'),\n",
       " (4, ' i lost e you'),\n",
       " (4, ' i lost rom u'),\n",
       " (4, ' i love km ga'),\n",
       " (4, ' i love not tea'),\n",
       " (4, ' i love two u'),\n",
       " (4, ' ie name f you'),\n",
       " (4, ' ie nj scott u'),\n",
       " (4, ' ie tea msn you'),\n",
       " (4, ' ie teams co ga'),\n",
       " (4, ' ie teams com u'),\n",
       " (4, ' ie teams come et'),\n",
       " (4, ' ie teams n you'),\n",
       " (4, ' ie tim dr you'),\n",
       " (4, ' sd m dr you'),\n",
       " (4, ' sd md en you'),\n",
       " (4, ' sd mt scott u'),\n",
       " (4, ' sd os co ga'),\n",
       " (4, ' sd os com u')]"
      ]
     },
     "execution_count": 118,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_[:30]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}