diff --git a/README.md b/README.md index 37fd6df..7acfb6e 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,14 @@ -# WeixinBot [![star this repo](http://github-svg-buttons.herokuapp.com/star.svg?user=Urinx&repo=WeixinBot&style=flat&background=1081C1)](http://github.com/Urinx/WeixinBot) [![fork this repo](http://github-svg-buttons.herokuapp.com/fork.svg?user=Urinx&repo=WeixinBot&style=flat&background=1081C1)](http://github.com/Urinx/WeixinBot/fork) ![python](https://img.shields.io/badge/python-2.7-ff69b4.svg) - +# WeixinBot 网页版微信API,包含终端版微信及微信机器人 +##与master版本的区别## +* 默认自动回复 +* 默认设置小黄鸡Simsimi为聊天机器人 +* 添加谷歌语音识别功能,用户可以自动回复语音消息 +* 自动更换小黄鸡api——key +* 修复无法稳定在线的bug(无限红包) +* 运行:python weixin2.py + ## Demo 为了运行 `weixin.py` 示例脚本,你需要有安装 `qrcode` 包,你可以通过 `pip install qrcode` 来安装。 @@ -21,741 +28,4 @@ ![5](screenshot/5.png) -**目前支持的命令**: - -`->[昵称或ID]:[内容]` 给好友发送消息 - -`m->[昵称或ID]:[文件路径]` 给好友发送文件中的内容 - -![6](screenshot/6.png) - -`f->[昵称或ID]:[文件路径]` 给好友发送文件 - -`i->[昵称或ID]:[图片路径]` 给好友发送图片 - -`e->[昵称或ID]:[文件路径]` 给好友发送表情(jpg/gif) - -`quit` 退出程序 - -![7](screenshot/7.png) - -注意,以上命令均不包含方括号。 - -## Web Weixin Pipeline - -``` - +--------------+ +---------------+ +---------------+ - | | | | | | - | Get UUID | | Get Contact | | Status Notify | - | | | | | | - +-------+------+ +-------^-------+ +-------^-------+ - | | | - | +-------+ +--------+ - | | | - +-------v------+ +-----+--+------+ +--------------+ - | | | | | | - | Get QRCode | | Weixin Init +------> Sync Check <----+ - | | | | | | | - +-------+------+ +-------^-------+ +-------+------+ | - | | | | - | | +-----------+ - | | | - +-------v------+ +-------+--------+ +-------v-------+ - | | Confirm Login | | | | -+------> Login +---------------> New Login Page | | Weixin Sync | -| | | | | | | -| +------+-------+ +----------------+ +---------------+ -| | -|QRCode Scaned| -+-------------+ -``` - -## Web Weixin API - -### 登录 - -| API | 获取 UUID | -| --- | --------- | -| url | https://login.weixin.qq.com/jslogin | -| method | POST | -| data | URL Encode | -| params | **appid**: `应用ID`
**fun**: new `应用类型`
**lang**: zh\_CN `语言`
**_**: `时间戳` | - -返回数据(String): -``` -window.QRLogin.code = 200; window.QRLogin.uuid = "xxx" -``` -> 注:这里的appid就是在微信开放平台注册的应用的AppID。网页版微信有两个AppID,早期的是`wx782c26e4c19acffb`,在微信客户端上显示为应用名称为`Web微信`;现在用的是`wxeb7ec651dd0aefa9`,显示名称为`微信网页版`。 - -![6](screenshot/8.jpg) -
- -| API | 生成二维码 | -| --- | --------- | -| url | https://login.weixin.qq.com/l/ `uuid` | -
- -| API | 二维码扫描登录 | -| --- | --------- | -| url | https://login.weixin.qq.com/cgi-bin/mmwebwx-bin/login | -| method | GET | -| params | **tip**: 1 `未扫描` 0 `已扫描`
**uuid**: xxx
**_**: `时间戳` | - -返回数据(String): -``` -window.code=xxx; - -xxx: - 408 登陆超时 - 201 扫描成功 - 200 确认登录 - -当返回200时,还会有 -window.redirect_uri="https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxnewloginpage?ticket=xxx&uuid=xxx&lang=xxx&scan=xxx"; -``` -
- -| API | webwxnewloginpage | -| --- | --------- | -| url | https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxnewloginpage | -| method | GET | -| params | **ticket**: xxx
**uuid**: xxx
**lang**: zh_CN `语言`
**scan**: xxx
**fun**: new | - -返回数据(XML): -``` - - 0 - OK - xxx - xxx - xxx - xxx - 1 - -``` -
- -### 微信初始化 - -| API | webwxinit | -| --- | --------- | -| url | https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?pass_ticket=xxx&skey=xxx&r=xxx | -| method | POST | -| data | JSON | -| header | ContentType: application/json; charset=UTF-8 | -| params | {
     BaseRequest: {
         Uin: xxx,
         Sid: xxx,
         Skey: xxx,
         DeviceID: xxx,
     }
} | - -返回数据(JSON): -``` -{ - "BaseResponse": { - "Ret": 0, - "ErrMsg": "" - }, - "Count": 11, - "ContactList": [...], - "SyncKey": { - "Count": 4, - "List": [ - { - "Key": 1, - "Val": 635705559 - }, - ... - ] - }, - "User": { - "Uin": xxx, - "UserName": xxx, - "NickName": xxx, - "HeadImgUrl": xxx, - "RemarkName": "", - "PYInitial": "", - "PYQuanPin": "", - "RemarkPYInitial": "", - "RemarkPYQuanPin": "", - "HideInputBarFlag": 0, - "StarFriend": 0, - "Sex": 1, - "Signature": "Apt-get install B", - "AppAccountFlag": 0, - "VerifyFlag": 0, - "ContactFlag": 0, - "WebWxPluginSwitch": 0, - "HeadImgFlag": 1, - "SnsFlag": 17 - }, - "ChatSet": xxx, - "SKey": xxx, - "ClientVersion": 369297683, - "SystemTime": 1453124908, - "GrayScale": 1, - "InviteStartCount": 40, - "MPSubscribeMsgCount": 2, - "MPSubscribeMsgList": [...], - "ClickReportInterval": 600000 -} -``` -
- -| API | webwxstatusnotify | -| --- | --------- | -| url | https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxstatusnotify?lang=zh_CN&pass_ticket=xxx | -| method | POST | -| data | JSON | -| header | ContentType: application/json; charset=UTF-8 | -| params | {
     BaseRequest: { Uin: xxx, Sid: xxx, Skey: xxx, DeviceID: xxx },
     Code: 3,
     FromUserName: `自己ID`,
     ToUserName: `自己ID`,
     ClientMsgId: `时间戳`
} | - -返回数据(JSON): -``` -{ - "BaseResponse": { - "Ret": 0, - "ErrMsg": "" - }, - ... -} -``` -
- -### 获取联系人信息 - -| API | webwxgetcontact | -| --- | --------- | -| url | https://wx.qq.com/cgi-bin/mmwebwx-bin//webwxgetcontact?pass_ticket=xxx&skey=xxx&r=xxx | -| method | POST | -| data | JSON | -| header | ContentType: application/json; charset=UTF-8 | - -返回数据(JSON): -``` -{ - "BaseResponse": { - "Ret": 0, - "ErrMsg": "" - }, - "MemberCount": 334, - "MemberList": [ - { - "Uin": 0, - "UserName": xxx, - "NickName": "Urinx", - "HeadImgUrl": xxx, - "ContactFlag": 3, - "MemberCount": 0, - "MemberList": [], - "RemarkName": "", - "HideInputBarFlag": 0, - "Sex": 0, - "Signature": "你好,我们是地球三体组织。在这里,你将感受到不一样的思维模式,以及颠覆常规的世界观。而我们的目标,就是以三体人的智慧,引领人类未来科学技术500年。", - "VerifyFlag": 8, - "OwnerUin": 0, - "PYInitial": "URINX", - "PYQuanPin": "Urinx", - "RemarkPYInitial": "", - "RemarkPYQuanPin": "", - "StarFriend": 0, - "AppAccountFlag": 0, - "Statues": 0, - "AttrStatus": 0, - "Province": "", - "City": "", - "Alias": "Urinxs", - "SnsFlag": 0, - "UniFriend": 0, - "DisplayName": "", - "ChatRoomId": 0, - "KeyWord": "gh_", - "EncryChatRoomId": "" - }, - ... - ], - "Seq": 0 -} -``` -
- -| API | webwxbatchgetcontact | -| --- | --------- | -| url | https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxbatchgetcontact?type=ex&r=xxx&pass_ticket=xxx | -| method | POST | -| data | JSON | -| header | ContentType: application/json; charset=UTF-8 | -| params | {
     BaseRequest: { Uin: xxx, Sid: xxx, Skey: xxx, DeviceID: xxx },
     Count: `群数量`,
     List: [
         { UserName: `群ID`, EncryChatRoomId: "" },
         ...
     ],
} | - -返回数据(JSON)同上 -

- -### 同步刷新 - -| API | synccheck | -| --- | --------- | -| protocol | https | -| host | webpush.weixin.qq.com
webpush2.weixin.qq.com
webpush.wechat.com
webpush1.wechat.com
webpush2.wechat.com
webpush.wechatapp.com
webpush1.wechatapp.com | -| path | /cgi-bin/mmwebwx-bin/synccheck | -| method | GET | -| data | URL Encode | -| params | **r**: `时间戳`
**sid**: xxx
**uin**: xxx
**skey**: xxx
**deviceid**: xxx
**synckey**: xxx
**_**: `时间戳` | - -返回数据(String): -``` -window.synccheck={retcode:"xxx",selector:"xxx"} - -retcode: - 0 正常 - 1100 失败/登出微信 -selector: - 0 正常 - 2 新的消息 - 7 进入/离开聊天界面 -``` -
- -| API | webwxsync | -| --- | --------- | -| url | https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsync?sid=xxx&skey=xxx&pass_ticket=xxx | -| method | POST | -| data | JSON | -| header | ContentType: application/json; charset=UTF-8 | -| params | {
     BaseRequest: { Uin: xxx, Sid: xxx, Skey: xxx, DeviceID: xxx },
     SyncKey: xxx,
     rr: `时间戳取反`
} | - -返回数据(JSON): -``` -{ - 'BaseResponse': {'ErrMsg': '', 'Ret': 0}, - 'SyncKey': { - 'Count': 7, - 'List': [ - {'Val': 636214192, 'Key': 1}, - ... - ] - }, - 'ContinueFlag': 0, - 'AddMsgCount': 1, - 'AddMsgList': [ - { - 'FromUserName': '', - 'PlayLength': 0, - 'RecommendInfo': {...}, - 'Content': "", - 'StatusNotifyUserName': '', - 'StatusNotifyCode': 5, - 'Status': 3, - 'VoiceLength': 0, - 'ToUserName': '', - 'ForwardFlag': 0, - 'AppMsgType': 0, - 'AppInfo': {'Type': 0, 'AppID': ''}, - 'Url': '', - 'ImgStatus': 1, - 'MsgType': 51, - 'ImgHeight': 0, - 'MediaId': '', - 'FileName': '', - 'FileSize': '', - ... - }, - ... - ], - 'ModChatRoomMemberCount': 0, - 'ModContactList': [], - 'DelContactList': [], - 'ModChatRoomMemberList': [], - 'DelContactCount': 0, - ... -} -``` -
- -### 消息接口 - -| API | webwxsendmsg | -| --- | ------------ | -| url | https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsendmsg?pass_ticket=xxx | -| method | POST | -| data | JSON | -| header | ContentType: application/json; charset=UTF-8 | -| params | {
     BaseRequest: { Uin: xxx, Sid: xxx, Skey: xxx, DeviceID: xxx },
     Msg: {
         Type: 1 `文字消息`,
         Content: `要发送的消息`,
         FromUserName: `自己ID`,
         ToUserName: `好友ID`,
         LocalID: `与clientMsgId相同`,
         ClientMsgId: `时间戳左移4位随后补上4位随机数`
     }
} | - -返回数据(JSON): -``` -{ - "BaseResponse": { - "Ret": 0, - "ErrMsg": "" - }, - ... -} -``` - -#### 发送表情 - -| API | webwxsendmsgemotion | -| --- | ------------ | -| url | https://wx2.qq.com/cgi-bin/mmwebwx-bin/webwxsendemoticon?fun=sys&f=json&pass_ticket=xxx | -| method | POST | -| data | JSON | -| header | ContentType: application/json; charset=UTF-8 | -| params | {
     BaseRequest: { Uin: xxx, Sid: xxx, Skey: xxx, DeviceID: xxx },
     Msg: {
         Type: 47 `emoji消息`,
         EmojiFlag: 2,
         MediaId: `表情上传后的媒体ID`,
         FromUserName: `自己ID`,
         ToUserName: `好友ID`,
         LocalID: `与clientMsgId相同`,
         ClientMsgId: `时间戳左移4位随后补上4位随机数`
     }
} | - -
- -### 图片接口 - -| API | webwxgeticon | -| --- | ------------ | -| url | https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgeticon | -| method | GET | -| params | **seq**: `数字,可为空`
**username**: `ID`
**skey**: xxx | -
- -| API | webwxgetheadimg | -| --- | --------------- | -| url | https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgetheadimg | -| method | GET | -| params | **seq**: `数字,可为空`
**username**: `群ID`
**skey**: xxx | -
- -| API | webwxgetmsgimg | -| --- | --------------- | -| url | https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgetmsgimg | -| method | GET | -| params | **MsgID**: `消息ID`
**type**: slave `略缩图` or `为空时加载原图`
**skey**: xxx | -
- -### 多媒体接口 - -| API | webwxgetvideo | -| --- | --------------- | -| url | https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgetvideo | -| method | GET | -| params | **msgid**: `消息ID`
**skey**: xxx | -
- -| API | webwxgetvoice | -| --- | --------------- | -| url | https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgetvoice | -| method | GET | -| params | **msgid**: `消息ID`
**skey**: xxx | -
- -### 账号类型 - -| 类型 | 说明 | -| :--: | --- | -| 个人账号 | 以`@`开头,例如:`@xxx` | -| 群聊 | 以`@@`开头,例如:`@@xxx` | -| 公众号/服务号 | 以`@`开头,但其`VerifyFlag` & 8 != 0

`VerifyFlag`:
         一般公众号/服务号:8
         微信自家的服务号:24
         微信官方账号`微信团队`:56 | -| 特殊账号 | 像文件传输助手之类的账号,有特殊的ID,目前已知的有:
`filehelper`, `newsapp`, `fmessage`, `weibo`, `qqmail`, `fmessage`, `tmessage`, `qmessage`, `qqsync`, `floatbottle`, `lbsapp`, `shakeapp`, `medianote`, `qqfriend`, `readerapp`, `blogapp`, `facebookapp`, `masssendapp`, `meishiapp`, `feedsapp`, `voip`, `blogappweixin`, `weixin`, `brandsessionholder`, `weixinreminder`, `officialaccounts`, `notification_messages`, `wxitil`, `userexperience_alarm`, `notification_messages` | -
- -### 消息类型 - -消息一般格式: -``` -{ - "FromUserName": "", - "ToUserName": "", - "Content": "", - "StatusNotifyUserName": "", - "ImgWidth": 0, - "PlayLength": 0, - "RecommendInfo": {...}, - "StatusNotifyCode": 4, - "NewMsgId": "", - "Status": 3, - "VoiceLength": 0, - "ForwardFlag": 0, - "AppMsgType": 0, - "Ticket": "", - "AppInfo": {...}, - "Url": "", - "ImgStatus": 1, - "MsgType": 1, - "ImgHeight": 0, - "MediaId": "", - "MsgId": "", - "FileName": "", - "HasProductId": 0, - "FileSize": "", - "CreateTime": 1454602196, - "SubMsgType": 0 -} -``` -
- -| MsgType | 说明 | -| ------- | --- | -| 1 | 文本消息 | -| 3 | 图片消息 | -| 34 | 语音消息 | -| 37 | VERIFYMSG | -| 40 | POSSIBLEFRIEND_MSG | -| 42 | 共享名片 | -| 43 | 视频通话消息 | -| 47 | 动画表情 | -| 48 | 位置消息 | -| 49 | 分享链接 | -| 50 | VOIPMSG | -| 51 | 微信初始化消息 | -| 52 | VOIPNOTIFY | -| 53 | VOIPINVITE | -| 62 | 小视频 | -| 9999 | SYSNOTICE | -| 10000 | 系统消息 | -| 10002 | 撤回消息 | -
- -**微信初始化消息** -```html -MsgType: 51 -FromUserName: 自己ID -ToUserName: 自己ID -StatusNotifyUserName: 最近联系的联系人ID -Content: - - - - // 最近联系的联系人 - filehelper,xxx@chatroom,wxid_xxx,xxx,... - - - - - // 朋友圈 - MomentsUnreadMsgStatus - - - 1454502365 - - - - - // 未读的功能账号消息,群发助手,漂流瓶等 - - - -``` - -**文本消息** -``` -MsgType: 1 -FromUserName: 发送方ID -ToUserName: 接收方ID -Content: 消息内容 -``` - -**图片消息** -```html -MsgType: 3 -FromUserName: 发送方ID -ToUserName: 接收方ID -MsgId: 用于获取图片 -Content: - - - - -``` - -**小视频消息** -```html -MsgType: 62 -FromUserName: 发送方ID -ToUserName: 接收方ID -MsgId: 用于获取小视频 -Content: - - - - -``` - -**地理位置消息** -``` -MsgType: 1 -FromUserName: 发送方ID -ToUserName: 接收方ID -Content: http://weixin.qq.com/cgi-bin/redirectforward?args=xxx -// 属于文本消息,只不过内容是一个跳转到地图的链接 -``` - -**名片消息** -```js -MsgType: 42 -FromUserName: 发送方ID -ToUserName: 接收方ID -Content: - - - -RecommendInfo: - { - "UserName": "xxx", // ID - "Province": "xxx", - "City": "xxx", - "Scene": 17, - "QQNum": 0, - "Content": "", - "Alias": "xxx", // 微信号 - "OpCode": 0, - "Signature": "", - "Ticket": "", - "Sex": 0, // 1:男, 2:女 - "NickName": "xxx", // 昵称 - "AttrStatus": 4293221, - "VerifyFlag": 0 - } -``` - -**语音消息** -```html -MsgType: 34 -FromUserName: 发送方ID -ToUserName: 接收方ID -MsgId: 用于获取语音 -Content: - - - -``` - -**动画表情** -```html -MsgType: 47 -FromUserName: 发送方ID -ToUserName: 接收方ID -Content: - - - - -``` - -**普通链接或应用分享消息** -```html -MsgType: 49 -AppMsgType: 5 -FromUserName: 发送方ID -ToUserName: 接收方ID -Url: 链接地址 -FileName: 链接标题 -Content: - - - - - 5 - - - - ... - - - - - - -``` - -**音乐链接消息** -```html -MsgType: 49 -AppMsgType: 3 -FromUserName: 发送方ID -ToUserName: 接收方ID -Url: 链接地址 -FileName: 音乐名 - -AppInfo: // 分享链接的应用 - { - Type: 0, - AppID: wx485a97c844086dc9 - } - -Content: - - - - - - 3 - 0 - - - - - 0 - - - - http://ws.stream.qqmusic.qq.com/C100003i9hMt1bgui0.m4a?vkey=6867EF99F3684&guid=ffffffffc104ea2964a111cf3ff3edaf&fromtag=46 - - - http://ws.stream.qqmusic.qq.com/C100003i9hMt1bgui0.m4a?vkey=6867EF99F3684&guid=ffffffffc104ea2964a111cf3ff3edaf&fromtag=46 - - - 0 - - - - - - - - - - http://imgcache.qq.com/music/photo/album/63/180_albumpic_143163_0.jpg - - - - - 0 - - 29 - 摇一摇搜歌 - - - -``` - -**群消息** -``` -MsgType: 1 -FromUserName: @@xxx -ToUserName: @xxx -Content: - @xxx:
xxx -``` - -**红包消息** -``` -MsgType: 49 -AppMsgType: 2001 -FromUserName: 发送方ID -ToUserName: 接收方ID -Content: 未知 -``` -注:根据网页版的代码可以看到未来可能支持查看红包消息,但目前走的是系统消息,见下。 - -**系统消息** -``` -MsgType: 10000 -FromUserName: 发送方ID -ToUserName: 自己ID -Content: - "你已添加了 xxx ,现在可以开始聊天了。" - "如果陌生人主动添加你为朋友,请谨慎核实对方身份。" - "收到红包,请在手机上查看" -``` - -持续更新中 ... - -## Todo -- [x] 发送图片或者文件功能 -- [ ] 主动给群聊发送消息 -- [ ] 建立群聊 -- [x] 群发消息 -- [ ] 补充更多的接口及完善文档 - -P.S. 还有啥要补充的也可以在[issue #8](https://github.com/Urinx/WeixinBot/issues/8)下留言 +# wechatbot diff --git a/WeixinBot.zip b/WeixinBot.zip new file mode 100644 index 0000000..c7bf3ac Binary files /dev/null and b/WeixinBot.zip differ diff --git a/autosub.py b/autosub.py new file mode 100644 index 0000000..e66f464 --- /dev/null +++ b/autosub.py @@ -0,0 +1,544 @@ +#!/usr/bin/env python +import argparse +import audioop +from googleapiclient.discovery import build +import json +import math +import multiprocessing +import os +import requests +import subprocess +import sys +import tempfile +import wave +import base64 + +# Import Google Speech API +from googleapiclient import discovery +import httplib2 +from oauth2client.client import GoogleCredentials + +from progressbar import ProgressBar, Percentage, Bar, ETA + +# -*- coding: utf-8 -*- +import sys +import json + +import pysrt + +text_type = unicode if sys.version_info < (3,) else str + +DISCOVERY_URL = ('https://{api}.googleapis.com/$discovery/rest?' + 'version={apiVersion}') + +def get_speech_service(): + credentials = GoogleCredentials.get_application_default().create_scoped( + ['https://www.googleapis.com/auth/cloud-platform']) + http = httplib2.Http() + credentials.authorize(http) + + return discovery.build( + 'speech', 'v1beta1', http=http, discoveryServiceUrl=DISCOVERY_URL) + +def force_unicode(s, encoding="utf-8"): + if isinstance(s, text_type): + return s + return s.decode(encoding) + + +def srt_formatter(subtitles, show_before=0, show_after=0): + f = pysrt.SubRipFile() + for i, (rng, text) in enumerate(subtitles, 1): + item = pysrt.SubRipItem() + item.index = i + item.text = force_unicode(text) + start, end = rng + item.start.seconds = max(0, start - show_before) + item.end.seconds = end + show_after + f.append(item) + return '\n'.join(map(unicode, f)) + +def vtt_formatter(subtitles, show_before=0, show_after=0): + text = srt_formatter(subtitles, show_before, show_after) + text = 'WEBVTT\n\n' + text.replace(',', '.') + return text + +def json_formatter(subtitles): + subtitle_dicts = map(lambda (r, t): {'start': r[0], 'end': r[1], 'content': t}, subtitles) + return json.dumps(subtitle_dicts) + +def raw_formatter(subtitles): + return ' '.join(map(lambda (rng, text): text, subtitles)) + +FORMATTERS = { + 'srt': srt_formatter, + 'vtt': vtt_formatter, + 'json': json_formatter, + 'raw': raw_formatter, +} + +GOOGLE_SPEECH_API_KEY = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw" +# GOOGLE_SPEECH_API_KEY = "AIzaSyBdrYmI_ZiZ7dey_ymBd-BlLZkw4IvoLZ0" +GOOGLE_SPEECH_API_URL = "http://www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}" + +LANGUAGE_CODES = { + 'af': 'Afrikaans', + 'ar': 'Arabic', + 'az': 'Azerbaijani', + 'be': 'Belarusian', + 'bg': 'Bulgarian', + 'bn': 'Bengali', + 'bs': 'Bosnian', + 'ca': 'Catalan', + 'ceb': 'Cebuano', + 'cs': 'Czech', + 'cy': 'Welsh', + 'da': 'Danish', + 'de': 'German', + 'el': 'Greek', + 'en': 'English', + 'eo': 'Esperanto', + 'es': 'Spanish', + 'et': 'Estonian', + 'eu': 'Basque', + 'fa': 'Persian', + 'fi': 'Finnish', + 'fr': 'French', + 'ga': 'Irish', + 'gl': 'Galician', + 'gu': 'Gujarati', + 'ha': 'Hausa', + 'hi': 'Hindi', + 'hmn': 'Hmong', + 'hr': 'Croatian', + 'ht': 'Haitian Creole', + 'hu': 'Hungarian', + 'hy': 'Armenian', + 'id': 'Indonesian', + 'ig': 'Igbo', + 'is': 'Icelandic', + 'it': 'Italian', + 'iw': 'Hebrew', + 'ja': 'Japanese', + 'jw': 'Javanese', + 'ka': 'Georgian', + 'kk': 'Kazakh', + 'km': 'Khmer', + 'kn': 'Kannada', + 'ko': 'Korean', + 'la': 'Latin', + 'lo': 'Lao', + 'lt': 'Lithuanian', + 'lv': 'Latvian', + 'mg': 'Malagasy', + 'mi': 'Maori', + 'mk': 'Macedonian', + 'ml': 'Malayalam', + 'mn': 'Mongolian', + 'mr': 'Marathi', + 'ms': 'Malay', + 'mt': 'Maltese', + 'my': 'Myanmar (Burmese)', + 'ne': 'Nepali', + 'nl': 'Dutch', + 'no': 'Norwegian', + 'ny': 'Chichewa', + 'pa': 'Punjabi', + 'pl': 'Polish', + 'pt': 'Portuguese', + 'ro': 'Romanian', + 'ru': 'Russian', + 'si': 'Sinhala', + 'sk': 'Slovak', + 'sl': 'Slovenian', + 'so': 'Somali', + 'sq': 'Albanian', + 'sr': 'Serbian', + 'st': 'Sesotho', + 'su': 'Sudanese', + 'sv': 'Swedish', + 'sw': 'Swahili', + 'ta': 'Tamil', + 'te': 'Telugu', + 'tg': 'Tajik', + 'th': 'Thai', + 'tl': 'Filipino', + 'tr': 'Turkish', + 'uk': 'Ukrainian', + 'ur': 'Urdu', + 'uz': 'Uzbek', + 'vi': 'Vietnamese', + 'yi': 'Yiddish', + 'yo': 'Yoruba', + 'zh-CN': 'Chinese (Simplified)', + 'zh-TW': 'Chinese (Traditional)', + 'zu': 'Zulu', +} + + + +def percentile(arr, percent): + arr = sorted(arr) + k = (len(arr) - 1) * percent + f = math.floor(k) + c = math.ceil(k) + if f == c: return arr[int(k)] + d0 = arr[int(f)] * (c - k) + d1 = arr[int(c)] * (k - f) + return d0 + d1 + + +def is_same_language(lang1, lang2): + return lang1.split("-")[0] == lang2.split("-")[0] + + +class FLACConverter(object): + def __init__(self, source_path, include_before=0.25, include_after=0.25): + self.source_path = source_path + self.include_before = include_before + self.include_after = include_after + + def __call__(self, region): + try: + start, end = region + start = max(0, start - self.include_before) + end += self.include_after + temp = tempfile.NamedTemporaryFile(suffix='.flac') + command = ["ffmpeg","-ss", str(start), "-t", str(end - start), + "-y", "-i", self.source_path, + "-loglevel", "error", temp.name] + subprocess.check_output(command) + os.system('stty sane') + return temp.read() + + except KeyboardInterrupt: + return + + +class SpeechRecognizer(object): + def __init__(self, language='en-US', rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY): + self.language = language + self.rate = rate + self.api_key = api_key + self.retries = retries + def __call__(self, data): + try: + for i in range(self.retries): + + speech_content = base64.b64encode(data) + service = get_speech_service() + service_request = service.speech().syncrecognize( + body={ + 'config': { + # There are a bunch of config options you can specify. See + # https://goo.gl/KPZn97 for the full list. + 'encoding': 'FLAC', # FLAC + 'sampleRate': self.rate, # default rate for the audio file + 'languageCode': 'cmn-Hans-CN', # a BCP-47 language tag + }, + 'audio': { + 'content': speech_content.decode('UTF-8') + } + }) + # [END construct_request] + # [START send_request] + response = service_request.execute() + line = "" + if response.has_key('results'): + for sentences in response['results']: + transcript = sentences['alternatives'][0] + line = line + transcript['transcript'] + + # [END send_request] + return line[:1].upper() + line[1:] + # url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key) + # headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate} + + # try: + # resp = requests.post(url, data=data, headers=headers) + # except requests.exceptions.ConnectionError: + # continue + + # for line in resp.content.split("\n"): + # try: + # line = json.loads(line) + # line = line['result'][0]['alternative'][0]['transcript'] + # return line[:1].upper() + line[1:] + # except: + # # no result + # continue + + + except KeyboardInterrupt: + return + + +class Translator(object): + def __init__(self, language, api_key, src, dst): + self.language = language + self.api_key = api_key + self.service = build('translate', 'v2', + developerKey=self.api_key) + self.src = src + self.dst = dst + + def __call__(self, sentence): + try: + if not sentence: return + result = self.service.translations().list( + source=self.src, + target=self.dst, + q=[sentence] + ).execute() + if 'translations' in result and len(result['translations']) and \ + 'translatedText' in result['translations'][0]: + return result['translations'][0]['translatedText'] + return "" + + except KeyboardInterrupt: + return + + +def which(program): + def is_exe(fpath): + return os.path.isfile(fpath) and os.access(fpath, os.X_OK) + + fpath, fname = os.path.split(program) + if fpath: + if is_exe(program): + return program + else: + for path in os.environ["PATH"].split(os.pathsep): + path = path.strip('"') + exe_file = os.path.join(path, program) + if is_exe(exe_file): + return exe_file + return None + + +def extract_audio(filename, channels=1, rate=16000): + temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) + if not os.path.isfile(filename): + print "The given file does not exist: {0}".format(filename) + raise Exception("Invalid filepath: {0}".format(filename)) + if not which("ffmpeg"): + print "ffmpeg: Executable not found on machine." + raise Exception("Dependency not found: ffmpeg") + command = ["ffmpeg", "-y", "-i", filename, "-ac", str(channels), "-ar", str(rate), "-loglevel", "error", temp.name] + subprocess.check_output(command) + return temp.name, rate + + +def find_speech_regions(filename, frame_width=4096, min_region_size=3, max_region_size=20): + reader = wave.open(filename) + sample_width = reader.getsampwidth() + rate = reader.getframerate() + n_channels = reader.getnchannels() + + total_duration = reader.getnframes() / rate + chunk_duration = float(frame_width) / rate + + n_chunks = int(total_duration / chunk_duration) + energies = [] + + for i in range(n_chunks): + chunk = reader.readframes(frame_width) + energies.append(audioop.rms(chunk, sample_width * n_channels)) + + threshold = percentile(energies, 0.2) + + elapsed_time = 0 + + regions = [] + region_start = 0.00001 + + # for energy in energies: + # is_silence = energy <= threshold + # max_exceeded = region_start and elapsed_time - region_start >= max_region_size + # if (max_exceeded or is_silence) and region_start: + # if elapsed_time - region_start >= min_region_size: + # regions.append((region_start, elapsed_time)) + # region_start = None + + # elif (not region_start) and (not is_silence): + # region_start = elapsed_time + # elapsed_time += chunk_duration + + # Canceled min + for energy in energies: + is_silence = energy <= threshold + max_exceeded = region_start and elapsed_time - region_start >= max_region_size + + if is_silence and region_start: + if max_exceeded: + regions.append((region_start, elapsed_time)) + region_start = None + + elif (not region_start) and (not is_silence): + region_start = elapsed_time + elapsed_time += chunk_duration + regions.append((region_start, elapsed_time)) + + print regions + + return regions + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('source_path', help="Path to the video or audio file to subtitle", nargs='?') + parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make", type=int, default=10) + parser.add_argument('-o', '--output', + help="Output path for subtitles (by default, subtitles are saved in \ + the same directory and name as the source path)") + parser.add_argument('-F', '--format', help="Destination subtitle format", default="srt") + parser.add_argument('-S', '--src-language', help="Language spoken in source file", default='en') + parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles", default="en") + parser.add_argument('-K', '--api-key', + help="The Google Translate API key to be used. (Required for subtitle translation)") + parser.add_argument('-V', '--video', help="Input a video file to transcribe") + parser.add_argument('--list-formats', help="List all available subtitle formats", action='store_true') + parser.add_argument('--list-languages', help="List all available source/destination languages", action='store_true') + + args = parser.parse_args() + + if args.list_formats: + print("List of formats:") + for subtitle_format in FORMATTERS.keys(): + print("{format}".format(format=subtitle_format)) + return 0 + + if args.list_languages: + print("List of all languages:") + for code, language in sorted(LANGUAGE_CODES.items()): + print("{code}\t{language}".format(code=code, language=language)) + return 0 + + if args.format not in FORMATTERS.keys(): + print("Subtitle format not supported. Run with --list-formats to see all supported formats.") + return 1 + + if args.src_language not in LANGUAGE_CODES.keys(): + print("Source language not supported. Run with --list-languages to see all supported languages.") + return 1 + + if args.dst_language not in LANGUAGE_CODES.keys(): + print( + "Destination language not supported. Run with --list-languages to see all supported languages.") + return 1 + + if not args.source_path and not args.video: + print("Error: You need to specify a source path.") + return 1 + + if args.video: + print("Transcribe video to audio") + temp = tempfile.NamedTemporaryFile(prefix="audio_", suffix='.flac') + temp_name = temp.name + command = "ffmpeg -i %s -y -ab 160k -ac 1 -ar 44100 -vn %s" %(args.video, temp_name) + subprocess.call(command, shell=True) + source_path = temp_name + output_path = args.video + + if not args.video: + source_path = args.source_path + output_path = args.source_path + + audio_filename, audio_rate = extract_audio(source_path) + + regions = find_speech_regions(audio_filename) + + pool = multiprocessing.Pool(args.concurrency) + converter = FLACConverter(source_path=audio_filename) + recognizer = SpeechRecognizer(language=args.src_language, rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY) + + transcripts = [] + if regions: + try: + widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', ETA()] + pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() + extracted_regions = [] + for i, extracted_region in enumerate(pool.imap(converter, regions)): + extracted_regions.append(extracted_region) + pbar.update(i) + pbar.finish() + + widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()] + pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() + + for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)): + transcripts.append(transcript) + pbar.update(i) + pbar.finish() + + if not is_same_language(args.src_language, args.dst_language): + if args.api_key: + google_translate_api_key = args.api_key + translator = Translator(args.dst_language, google_translate_api_key, dst=args.dst_language, + src=args.src_language) + prompt = "Translating from {0} to {1}: ".format(args.src_language, args.dst_language) + widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()] + pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() + translated_transcripts = [] + for i, transcript in enumerate(pool.imap(translator, transcripts)): + translated_transcripts.append(transcript) + pbar.update(i) + pbar.finish() + transcripts = translated_transcripts + else: + print "Error: Subtitle translation requires specified Google Translate API key. \ + See --help for further information." + return 1 + + except KeyboardInterrupt: + pbar.finish() + pool.terminate() + pool.join() + print "Cancelling transcription" + return 1 + + timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] + + + + + formatter = FORMATTERS.get(args.format) + formatted_subtitles = formatter(timed_subtitles) + + dest = args.output + + if not dest: + base, ext = os.path.splitext(output_path) + dest = "{base}.{format}".format(base=base, format=args.format) + + with open(dest, 'wb') as f: + f.write(formatted_subtitles.encode("utf-8")) + + print "Subtitles file created at {}".format(dest) + + + + + + os.remove(audio_filename) + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) + + + + + + + + + + + + + + + diff --git a/googlekey.json b/googlekey.json new file mode 100644 index 0000000..edcc90a --- /dev/null +++ b/googlekey.json @@ -0,0 +1,12 @@ +{ + "type": "service_account", + "project_id": "wechatbot-154909", + "private_key_id": "e53738e2d14ccd6a29de580163d1ff49cc08e0c5", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCgnnEvff7A8nHA\nNsBCQpqcOHKhblAZSEFMFZkOKjdjxzszpdZL4nGZDEJv/LnN96pQUwdzmdq/I3Nz\n98O9L1GjU2WdC8g4pHnMAz70xUWqf2yQDJkXMMbHxs+jvs19agkaFI0FIU0cjG1f\nmlupAYPbvELbEkvw8mcKBV4IcAGTmGb1I72uwk15obAYYH+VtEVefkNaWx4eHHK6\n/6qXsU47wTLM+9v2iDyOp6GEwCHdpZXg8ghd0SPE3KRo/7nPZm3iuj2cKizrGjLg\nojl4kpFyh/jMHxfpjDTBICAv5UMc2ZMLffPgpUh1QjZnsBtaolU5T0LUSylgrbG/\nLKbGPigtAgMBAAECggEAHJp6v6Fpd8T1OuzFKkegLwfiahsyCn6SmlESU7Jy8MLv\nKRB0bEkkP8QuJLFWCXb8jpdqsUyJ5xYSHmnWIt/FYxeb9e6+NlAcHJfLY12qPWW5\n2KFVtgqKq9Mc8SUuhRIYEtvsDSjUCax8YdUkU6GgcMZDBa5pdbxFW0R/bXM85KKs\nH82dXtylgt15/SNrYKFnCXOFmmZri8GT1We1qNGBJqcJbPA0v3qhTWWmW0E9Zhrp\nMO1AEBR285wGq2E2WFVq4sH9IQoGobXevg04Qr+BGyLAerZrYQWbCfFNcknZcQCX\nnR6FFagNLtoEYBCl0Fnll8WVaR2Sm1v9R4FdPctU4QKBgQDN8OgWfS+XUxYv7Cns\n6YQDhEouqdqHV/g4b8k0fHgi2XWGhMhnEs984JnCyfoSzeng3s3koz9P+GWEn0Cd\n92ud3AMqvaQ+XKbNlShaoyYKsa5mLPqbdxKIU/BdKLD+0t/Y+Uk6MNadLfRyL2lz\nh6nO6CK1fxd62udDATCx24J4iQKBgQDHqUaHZlj+PR2y300WrLKk+oBCRYEYOpF4\nuGnrNETnu99yBiRI35yRQ/UAYShEo/B0G2wV7LNon8NMG/XdJG1qqX7QS/lWbE8x\nh9dZbW/GopVGyH3eOPDdjFkqgM2/9coSNgtaHQRpoUwpB6sNqr+3CpWSS/If/gey\nPw5Mt9UBhQKBgHIldi6I4qkId4LGbpKO+AoO+CMXKDXeT4nQhABggSIn0BloXb+r\n0G+R+gfadY6YrWpjdFGnHj+QGYzBzEoCSpbrzKGTDYc3SYknhH+AcGR7CYQ+qAUl\noZMIm3C3Titf7IzR14G2ci4au6PSnatYZYl2Z06cDAKfdJFSBpB+b7mpAoGBAJGz\nbL2IKa82SELtA2NlmWQdTZWPBPr6WogfA2RZm2METbX212m6jJXRYqvpOqqEfAcu\nr7x8JBxYYftbeBrt507r65fzqTuxBEWf3L3sx6HLqWWjD38oTFVQgw0qO3s20URi\n3fvfqwWuaFaUuvXZQdhASBAfjcxc457mtvezOyTFAoGAJnw8jFhxTqCfZCi3YHqZ\nclBgQ/w8ahqo+V2wF4zwuhyFmqbjuw7N4zOdezXgaTZCM7SDpvlpp4Ygh7ubO+91\nZ71FoGnR78wh8XjEQ1ixSWzp42HQQrUCYgnhFnBOjJaNxjGTGlMMo+Oej6Mt08Ax\nX3s6PmAl5aYA3kiItAlvbms=\n-----END PRIVATE KEY-----\n", + "client_email": "webot-361@wechatbot-154909.iam.gserviceaccount.com", + "client_id": "106655796840196313946", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://accounts.google.com/o/oauth2/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/webot-361%40wechatbot-154909.iam.gserviceaccount.com" +} diff --git a/language_codes.py b/language_codes.py new file mode 100644 index 0000000..c239c67 --- /dev/null +++ b/language_codes.py @@ -0,0 +1,45 @@ +LC_AFRIKAANS = "af" +LC_ARABIC = "ar" +LC_BULGARIAN = "bg" +LC_CATALAN = "ca" +LC_CHINESE_SIMPLIFIED = "ch" +LC_CZECH = "cs" +LC_CYMRAEG = "cy" +LC_DANSK = "da" +LC_DEUTSCH = "de" +LC_GREEK = "el" +LC_ENGLISH = "en" +LC_SPANISH = "es" +LC_EUSKARA = "eu" +LC_SUOMI = "fi" +LC_FRENCH = "fr" +LC_HEBREW = "he" +LC_HINDI = "hi" +LC_CROATIAN = "hr" +LC_HUNGARIAN = "hu" +LC_INDONESIAN = "id" +LC_ITALIANO = "it" +LC_JAPANESE = "ja" +LC_KHMER = "kh" +LC_KOREAN = "ko" +LC_LITHUANIAN = "lt" +LC_MALAYALAM = "ml" +LC_BAHASA_MELAYU = "ms" +LC_NORSK = "nb" +LC_NEDERLANDS = "nl" +LC_PUNJABI = "pa" +LC_FILIPINO = "ph" +LC_POLSKI = "pl" +LC_PORTUGUESE = "pt" +LC_ROMANIAN = "ro" +LC_SERBIAN = "rs" +LC_RUSSIAN = "ru" +LC_SLOVAK = "sk" +LC_SVENSKA = "sv" +LC_TAMIL = "ta" +LC_TELUGU = "te" +LC_THAI = "th" +LC_TURKISH = "tr" +LC_UKRAINIAN = "uk" +LC_TIENG_VIET = "vn" +LC_CHINESE_TRADITIONAL = "zh" \ No newline at end of file diff --git a/language_codes.pyc b/language_codes.pyc new file mode 100644 index 0000000..81d86a1 Binary files /dev/null and b/language_codes.pyc differ diff --git a/maintest.py b/maintest.py new file mode 100644 index 0000000..c849be5 --- /dev/null +++ b/maintest.py @@ -0,0 +1,60 @@ + +import subprocess +import os +import codecs +import json +import re + +vedio_formats = ['mp4','avi','wmv','mov'] # 1 +audio_formats = ['wav','flac','mp3','aiff'] # 2 + +def file_upload(voice): + regex = r"(.+)\/(.+)" + if re.search(regex, voice): + match = re.search(regex, voice) + file_dir = match.group(1) + '/' + file_name_and_type = match.group(2).lower() + else: + raise fileNameError('fileNameError') + regex = r"(.+)\.(.+)" + if re.search(regex, file_name_and_type): + match = re.search(regex, file_name_and_type) + file_name = match.group(1) + file_type = match.group(2).lower() + else: + raise fileNameError('fileNameError') + file_pwd = file_dir + file_name_and_type + transcripts_timed_pwd = file_dir + file_name + '.json' + autosubing(file_pwd,transcripts_timed_pwd,file_type) + json_data = open(transcripts_timed_pwd) + transcripts_timed = json.load(json_data) + transcripts_content = '' + for i in transcripts_timed: + transcripts_content = transcripts_content + ' ' + i['content'] + json_data.close() + return transcripts_content + +def autosubing(file_pwd,transcripts_timed_pwd,file_type): + if file_format(file_type) == 1: + # command = "python autosub.py -F json -V %s" %(file_pwd) + command = "python autosub.py %s -F json" %(file_pwd) + else: + command = "python autosub.py %s -F json" %(file_pwd) + subprocess.call(command, shell=True) + print "Autosubed" + + +# throw formatError +def file_format(file_type): + if file_type in vedio_formats: + return 1; + elif file_type in audio_formats: + return 2 + else: raise Exception('Format prohibited') + +# dir1 = '/Users/n0where/GoogleDrive/WeixinBot/saved/voices/voice_2546547996039896197.mp3' +# dir2 = '/Users/n0where/Desktop/DFA_01.flac' +# dir3 = '/Users/n0where/GoogleDrive/ASQ/ASQ/transcripts/Chem101.mp4' +# dir4 = '/Users/n0where/GoogleDrive/WeixinBot/saved/voices/voice_1089270824656503909.mp3' +# dir5 = '/Users/n0where/GoogleDrive/WeixinBot/saved/voices/voice_8675834799709315495.mp3' +# print file_upload(dir5) diff --git a/maintest.pyc b/maintest.pyc new file mode 100644 index 0000000..84cdc31 Binary files /dev/null and b/maintest.pyc differ diff --git a/response_codes.py b/response_codes.py new file mode 100644 index 0000000..32406bb --- /dev/null +++ b/response_codes.py @@ -0,0 +1,5 @@ +RESPONSE_OK = 100 +RESPONSE_BAD_REQUEST = 400 +RESPONSE_UNAUTHORIZED = 401 +RESPONSE_NOT_FOUND = 404 +RESPONSE_500 = 500 \ No newline at end of file diff --git a/response_codes.pyc b/response_codes.pyc new file mode 100644 index 0000000..8fbce4d Binary files /dev/null and b/response_codes.pyc differ diff --git a/simsimi.py b/simsimi.py new file mode 100644 index 0000000..15c25ce --- /dev/null +++ b/simsimi.py @@ -0,0 +1,33 @@ +from language_codes import LC_ENGLISH +import urllib2, urllib, json +from response_codes import RESPONSE_OK + +class SimSimiException(Exception): + pass + +class SimSimi(object): + + def __init__(self, *args, **kwargs): + self.conversation_request_url = kwargs.get('conversation_request_url','http://sandbox.api.simsimi.com/request.p') + self.conversation_key = kwargs.get('conversation_key','') + self.conversation_language = kwargs.get('conversation_language', LC_ENGLISH) + self.conversation_filter = kwargs.get('conversation_filter','0.0') + + def getConversation(self, text): + + requestParam = { + 'key':self.conversation_key, + 'lc':self.conversation_language, + 'ft':self.conversation_filter, + 'text':text + } + + requestUrl = "%s?%s" % (self.conversation_request_url, urllib.urlencode(requestParam)) + + response = urllib2.urlopen(requestUrl) + responseDict = json.loads(str(response.read())) + + if responseDict['result'] != RESPONSE_OK: + raise SimSimiException("SimSimiException occured: %s" % responseDict['msg']) + + return responseDict \ No newline at end of file diff --git a/simsimi.pyc b/simsimi.pyc new file mode 100644 index 0000000..6a3549e Binary files /dev/null and b/simsimi.pyc differ diff --git a/simsimitest.py b/simsimitest.py new file mode 100644 index 0000000..8a485ce --- /dev/null +++ b/simsimitest.py @@ -0,0 +1,37 @@ + #coding=utf-8 +import simsimi +import language_codes +import response_codes +import re +from simsimi import SimSimiException + +keys = ['20666778-3204-480a-b98b-0d705ad7c170','45c0139e-7a59-4c06-9a83-7a9e4c8f6470','59a9d8b2-e4d0-495a-8a9f-1168cbb1193f'] +keys_sum = 0 +simSimis = [] + +for i in range(len(keys)): + simSimis.append(simsimi.SimSimi( + conversation_language=language_codes.LC_CHINESE_SIMPLIFIED, + conversation_key=keys[i] )) +simSimi = simSimis[0] + +def foo(): + global simSimi + global keys_sum + try: + response = simSimi.getConversation(u'。。'.encode('utf-8')) + except Exception, e: + print e + if str(e).find("Not found") != -1: + response = {'response': "傻逼"} + elif str(e).find("Limit Exceeded") != -1: + response = {'response': "到达每日上限了,正在自动更换api_key,使用第%d个api_key"%((keys_sum+1) % len(keys)+1)} + keys_sum = keys_sum + 1 + simSimi = simSimis[keys_sum % len(keys)] + else: + response = {'response': "代码出现了未知的问题"} + + print response['response'] + +for i in range(3): + foo() \ No newline at end of file diff --git a/transcribe.py b/transcribe.py new file mode 100644 index 0000000..f2aecdd --- /dev/null +++ b/transcribe.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Speech API sample application using the REST API for batch +processing.""" + +# [START import_libraries] +import argparse +import base64 +import json + +from googleapiclient import discovery +import httplib2 +from oauth2client.client import GoogleCredentials +# [END import_libraries] + + +# [START authenticating] +DISCOVERY_URL = ('https://{api}.googleapis.com/$discovery/rest?' + 'version={apiVersion}') + + +# Application default credentials provided by env variable +# GOOGLE_APPLICATION_CREDENTIALS +def get_speech_service(): + credentials = GoogleCredentials.get_application_default().create_scoped( + ['https://www.googleapis.com/auth/cloud-platform']) + http = httplib2.Http() + credentials.authorize(http) + + return discovery.build( + 'speech', 'v1beta1', http=http, discoveryServiceUrl=DISCOVERY_URL) +# [END authenticating] + + +def transcribe(speech_file): + """Transcribe the given audio file. + + Args: + speech_file: the name of the audio file. + """ + # [START construct_request] + with open(speech_file, 'rb') as speech: + # Base64 encode the binary audio file for inclusion in the JSON + # request. + speech_content = base64.b64encode(speech.read()) + + service = get_speech_service() + service_request = service.speech().syncrecognize( + body={ + 'config': { + # There are a bunch of config options you can specify. See + # https://goo.gl/KPZn97 for the full list. + 'encoding': 'LINEAR16', # raw 16-bit signed LE samples + 'sampleRate': 16000, # 16 khz + # See http://g.co/cloud/speech/docs/languages for a list of + # supported languages. + 'languageCode': 'cmn-Hans-CN', # a BCP-47 language tag + }, + 'audio': { + 'content': speech_content.decode('UTF-8') + } + }) + # [END construct_request] + # [START send_request] + response = service_request.execute() + print response + return response + # print(json.dumps(response)) + # [END send_request] + +def get_content(speech_file): + content = transcribe(speech_file) + if content.has_key('results'): + return content['results'][0]['alternatives'][0]['transcript'] + else: return 'Wrong rec' + +print get_content('/Users/n0where/GoogleDrive/WeixinBot/saved/voices/voice_2376641155930976295.mp3') +# [START run_application] +# if __name__ == '__main__': +# parser = argparse.ArgumentParser() +# parser.add_argument( +# 'speech_file', help='Full path of audio file to be recognized') +# args = parser.parse_args() +# main(args.speech_file) + # [END run_application] + diff --git a/transcribe.pyc b/transcribe.pyc new file mode 100644 index 0000000..2cf31a0 Binary files /dev/null and b/transcribe.pyc differ diff --git a/weixin2.py b/weixin2.py new file mode 100644 index 0000000..455ec91 --- /dev/null +++ b/weixin2.py @@ -0,0 +1,1232 @@ + +#coding:utf-8 +#!/usr/bin/env python +import sys +import re +reload(sys) +sys.setdefaultencoding("utf-8") +import maintest +import subprocess +import qrcode +import urllib +import urllib2 +import cookielib +import requests +import xml.dom.minidom +import json +import time +import re +import sys +import os +import random +import multiprocessing +import platform +import logging +import httplib +from collections import defaultdict +from urlparse import urlparse +from lxml import html +import transcribe + +import simsimi +from language_codes import LC_CHINESE_SIMPLIFIED +from simsimi import SimSimiException +from simsimi import SimSimi +# for media upload +import mimetypes + +from requests_toolbelt.multipart.encoder import MultipartEncoder + + +keys = ['20666778-3204-480a-b98b-0d705ad7c170','45c0139e-7a59-4c06-9a83-7a9e4c8f6470','59a9d8b2-e4d0-495a-8a9f-1168cbb1193f'] +keys_sum = 0 +simSimis = [] + +for i in range(len(keys)): + simSimis.append(simsimi.SimSimi( + conversation_language=LC_CHINESE_SIMPLIFIED, + conversation_key=keys[i] )) +simSimi = simSimis[0] + +def catchKeyboardInterrupt(fn): + def wrapper(*args): + try: + return fn(*args) + except KeyboardInterrupt: + print '\n[*] 强制退出程序' + logging.debug('[*] 强制退出程序') + return wrapper + + +def _decode_list(data): + rv = [] + for item in data: + if isinstance(item, unicode): + item = item.encode('utf-8') + elif isinstance(item, list): + item = _decode_list(item) + elif isinstance(item, dict): + item = _decode_dict(item) + rv.append(item) + return rv + + + +def _decode_dict(data): + rv = {} + for key, value in data.iteritems(): + if isinstance(key, unicode): + key = key.encode('utf-8') + if isinstance(value, unicode): + value = value.encode('utf-8') + elif isinstance(value, list): + value = _decode_list(value) + elif isinstance(value, dict): + value = _decode_dict(value) + rv[key] = value + return rv + + +class WebWeixin(object): + + def __str__(self): + description = \ + "=========================\n" + \ + "[#] Web Weixin\n" + \ + "[#] Debug Mode: " + str(self.DEBUG) + "\n" + \ + "[#] Uuid: " + self.uuid + "\n" + \ + "[#] Uin: " + str(self.uin) + "\n" + \ + "[#] Sid: " + self.sid + "\n" + \ + "[#] Skey: " + self.skey + "\n" + \ + "[#] DeviceId: " + self.deviceId + "\n" + \ + "[#] PassTicket: " + self.pass_ticket + "\n" + \ + "=========================" + return description + + def __init__(self): + self.DEBUG = False + self.uuid = '' + self.base_uri = '' + self.redirect_uri = '' + self.uin = '' + self.sid = '' + self.skey = '' + self.pass_ticket = '' + self.deviceId = 'e' + repr(random.random())[2:17] + self.BaseRequest = {} + self.synckey = '' + self.SyncKey = [] + self.User = [] + self.MemberList = [] + self.ContactList = [] # 好友 + self.GroupList = [] # 群 + self.GroupMemeberList = [] # 群友 + self.PublicUsersList = [] # 公众号/服务号 + self.SpecialUsersList = [] # 特殊账号 + self.autoReplyMode = True + self.syncHost = '' + self.user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.109 Safari/537.36' + self.interactive = False + self.autoOpen = False + self.saveFolder = os.path.join(os.getcwd(), 'saved') + self.saveSubFolders = {'webwxgeticon': 'icons', 'webwxgetheadimg': 'headimgs', 'webwxgetmsgimg': 'msgimgs', + 'webwxgetvideo': 'videos', 'webwxgetvoice': 'voices', '_showQRCodeImg': 'qrcodes'} + self.appid = 'wx782c26e4c19acffb' + self.lang = 'zh_CN' + self.lastCheckTs = time.time() + self.memberCount = 0 + self.SpecialUsers = ['newsapp', 'fmessage', 'filehelper', 'weibo', 'qqmail', 'fmessage', 'tmessage', 'qmessage', 'qqsync', 'floatbottle', 'lbsapp', 'shakeapp', 'medianote', 'qqfriend', 'readerapp', 'blogapp', 'facebookapp', 'masssendapp', 'meishiapp', 'feedsapp', + 'voip', 'blogappweixin', 'weixin', 'brandsessionholder', 'weixinreminder', 'wxid_novlwrv3lqwv11', 'gh_22b87fa7cb3c', 'officialaccounts', 'notification_messages', 'wxid_novlwrv3lqwv11', 'gh_22b87fa7cb3c', 'wxitil', 'userexperience_alarm', 'notification_messages'] + self.TimeOut = 20 # 同步最短时间间隔(单位:秒) + self.media_count = -1 + + self.cookie = cookielib.CookieJar() + opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie)) + opener.addheaders = [('User-agent', self.user_agent)] + urllib2.install_opener(opener) + + def loadConfig(self, config): + if config['DEBUG']: + self.DEBUG = config['DEBUG'] + if config['autoReplyMode']: + self.autoReplyMode = config['autoReplyMode'] + if config['user_agent']: + self.user_agent = config['user_agent'] + if config['interactive']: + self.interactive = config['interactive'] + if config['autoOpen']: + self.autoOpen = config['autoOpen'] + + def getUUID(self): + url = 'https://login.weixin.qq.com/jslogin' + params = { + 'appid': self.appid, + 'fun': 'new', + 'lang': self.lang, + '_': int(time.time()), + } + data = self._post(url, params, False) + if data == '': + return False + regx = r'window.QRLogin.code = (\d+); window.QRLogin.uuid = "(\S+?)"' + pm = re.search(regx, data) + if pm: + code = pm.group(1) + self.uuid = pm.group(2) + return code == '200' + return False + + def genQRCode(self): + #return self._showQRCodeImg() + if sys.platform.startswith('win'): + self._showQRCodeImg() + else: + self._str2qr('https://login.weixin.qq.com/l/' + self.uuid) + + def _showQRCodeImg(self): + url = 'https://login.weixin.qq.com/qrcode/' + self.uuid + params = { + 't': 'webwx', + '_': int(time.time()) + } + + data = self._post(url, params, False) + if data == '': + return + QRCODE_PATH = self._saveFile('qrcode.jpg', data, '_showQRCodeImg') + os.startfile(QRCODE_PATH) + + def waitForLogin(self, tip=1): + time.sleep(tip) + url = 'https://login.weixin.qq.com/cgi-bin/mmwebwx-bin/login?tip=%s&uuid=%s&_=%s' % ( + tip, self.uuid, int(time.time())) + data = self._get(url) + if data == '': + return False + pm = re.search(r'window.code=(\d+);', data) + code = pm.group(1) + + if code == '201': + return True + elif code == '200': + pm = re.search(r'window.redirect_uri="(\S+?)";', data) + r_uri = pm.group(1) + '&fun=new' + self.redirect_uri = r_uri + self.base_uri = r_uri[:r_uri.rfind('/')] + return True + elif code == '408': + self._echo('[登陆超时] \n') + else: + self._echo('[登陆异常] \n') + return False + + def login(self): + data = self._get(self.redirect_uri) + if data == '': + return False + doc = xml.dom.minidom.parseString(data) + root = doc.documentElement + + for node in root.childNodes: + if node.nodeName == 'skey': + self.skey = node.childNodes[0].data + elif node.nodeName == 'wxsid': + self.sid = node.childNodes[0].data + elif node.nodeName == 'wxuin': + self.uin = node.childNodes[0].data + elif node.nodeName == 'pass_ticket': + self.pass_ticket = node.childNodes[0].data + + if '' in (self.skey, self.sid, self.uin, self.pass_ticket): + return False + + self.BaseRequest = { + 'Uin': int(self.uin), + 'Sid': self.sid, + 'Skey': self.skey, + 'DeviceID': self.deviceId, + } + return True + + def webwxinit(self): + url = self.base_uri + '/webwxinit?pass_ticket=%s&skey=%s&r=%s' % ( + self.pass_ticket, self.skey, int(time.time())) + params = { + 'BaseRequest': self.BaseRequest + } + dic = self._post(url, params) + if dic == '': + return False + self.SyncKey = dic['SyncKey'] + self.User = dic['User'] + # synckey for synccheck + self.synckey = '|'.join( + [str(keyVal['Key']) + '_' + str(keyVal['Val']) for keyVal in self.SyncKey['List']]) + + return dic['BaseResponse']['Ret'] == 0 + + def webwxstatusnotify(self): + url = self.base_uri + \ + '/webwxstatusnotify?lang=zh_CN&pass_ticket=%s' % (self.pass_ticket) + params = { + 'BaseRequest': self.BaseRequest, + "Code": 3, + "FromUserName": self.User['UserName'], + "ToUserName": self.User['UserName'], + "ClientMsgId": int(time.time()) + } + dic = self._post(url, params) + if dic == '': + return False + + return dic['BaseResponse']['Ret'] == 0 + + def webwxgetcontact(self): + SpecialUsers = self.SpecialUsers + url = self.base_uri + '/webwxgetcontact?pass_ticket=%s&skey=%s&r=%s' % ( + self.pass_ticket, self.skey, int(time.time())) + dic = self._post(url, {}) + if dic == '': + return False + + self.MemberCount = dic['MemberCount'] + self.MemberList = dic['MemberList'] + ContactList = self.MemberList[:] + GroupList = self.GroupList[:] + PublicUsersList = self.PublicUsersList[:] + SpecialUsersList = self.SpecialUsersList[:] + + for i in xrange(len(ContactList) - 1, -1, -1): + Contact = ContactList[i] + if Contact['VerifyFlag'] & 8 != 0: # 公众号/服务号 + ContactList.remove(Contact) + self.PublicUsersList.append(Contact) + elif Contact['UserName'] in SpecialUsers: # 特殊账号 + ContactList.remove(Contact) + self.SpecialUsersList.append(Contact) + elif '@@' in Contact['UserName']: # 群聊 + ContactList.remove(Contact) + self.GroupList.append(Contact) + elif Contact['UserName'] == self.User['UserName']: # 自己 + ContactList.remove(Contact) + self.ContactList = ContactList + + return True + + def webwxbatchgetcontact(self): + url = self.base_uri + \ + '/webwxbatchgetcontact?type=ex&r=%s&pass_ticket=%s' % ( + int(time.time()), self.pass_ticket) + params = { + 'BaseRequest': self.BaseRequest, + "Count": len(self.GroupList), + "List": [{"UserName": g['UserName'], "EncryChatRoomId":""} for g in self.GroupList] + } + dic = self._post(url, params) + if dic == '': + return False + + # blabla ... + ContactList = dic['ContactList'] + ContactCount = dic['Count'] + self.GroupList = ContactList + + for i in xrange(len(ContactList) - 1, -1, -1): + Contact = ContactList[i] + MemberList = Contact['MemberList'] + for member in MemberList: + self.GroupMemeberList.append(member) + return True + + def getNameById(self, id): + url = self.base_uri + \ + '/webwxbatchgetcontact?type=ex&r=%s&pass_ticket=%s' % ( + int(time.time()), self.pass_ticket) + params = { + 'BaseRequest': self.BaseRequest, + "Count": 1, + "List": [{"UserName": id, "EncryChatRoomId": ""}] + } + dic = self._post(url, params) + if dic == '': + return None + + # blabla ... + return dic['ContactList'] + + def testsynccheck(self): + SyncHost = [ + 'wx2.qq.com', + 'webpush.wx2.qq.com', + 'wx8.qq.com', + 'webpush.wx8.qq.com', + 'qq.com', + 'webpush.wx.qq.com', + 'web2.wechat.com', + 'webpush.web2.wechat.com', + 'wechat.com', + 'webpush.web.wechat.com', + 'webpush.weixin.qq.com', + 'webpush.wechat.com', + 'webpush1.wechat.com', + 'webpush2.wechat.com', + 'webpush.wx.qq.com', + 'webpush2.wx.qq.com' + # 'webpush.wechatapp.com' + ] + for host in SyncHost: + self.syncHost = host + [retcode, selector] = self.synccheck() + if retcode == '0': + return True + return False + + def synccheck(self): + params = { + 'r': int(time.time()), + 'sid': self.sid, + 'uin': self.uin, + 'skey': self.skey, + 'deviceid': self.deviceId, + 'synckey': self.synckey, + '_': int(time.time()), + } + url = 'https://' + self.syncHost + \ + '/cgi-bin/mmwebwx-bin/synccheck?' + urllib.urlencode(params) + data = self._get(url) + if data == '': + return [-1,-1] + pm = re.search( + r'window.synccheck={retcode:"(\d+)",selector:"(\d+)"}', data) + retcode = pm.group(1) + selector = pm.group(2) + return [retcode, selector] + + def webwxsync(self): + url = self.base_uri + \ + '/webwxsync?sid=%s&skey=%s&pass_ticket=%s' % ( + self.sid, self.skey, self.pass_ticket) + params = { + 'BaseRequest': self.BaseRequest, + 'SyncKey': self.SyncKey, + 'rr': ~int(time.time()) + } + dic = self._post(url, params) + if dic == '': + return None + if self.DEBUG: + print json.dumps(dic, indent=4) + (json.dumps(dic, indent=4)) + + if dic['BaseResponse']['Ret'] == 0: + self.SyncKey = dic['SyncKey'] + self.synckey = '|'.join( + [str(keyVal['Key']) + '_' + str(keyVal['Val']) for keyVal in self.SyncKey['List']]) + return dic + + def webwxsendmsg(self, word, to='filehelper'): + url = self.base_uri + \ + '/webwxsendmsg?pass_ticket=%s' % (self.pass_ticket) + clientMsgId = str(int(time.time() * 1000)) + \ + str(random.random())[:5].replace('.', '') + params = { + 'BaseRequest': self.BaseRequest, + 'Msg': { + "Type": 1, + "Content": self._transcoding(word), + "FromUserName": self.User['UserName'], + "ToUserName": to, + "LocalID": clientMsgId, + "ClientMsgId": clientMsgId + } + } + headers = {'content-type': 'application/json; charset=UTF-8'} + data = json.dumps(params, ensure_ascii=False).encode('utf8') + r = requests.post(url, data=data, headers=headers) + dic = r.json() + return dic['BaseResponse']['Ret'] == 0 + + def webwxuploadmedia(self, image_name): + url = 'https://file2.wx.qq.com/cgi-bin/mmwebwx-bin/webwxuploadmedia?f=json' + # 计数器 + self.media_count = self.media_count + 1 + # 文件名 + file_name = image_name + # MIME格式 + # mime_type = application/pdf, image/jpeg, image/png, etc. + mime_type = mimetypes.guess_type(image_name, strict=False)[0] + # 微信识别的文档格式,微信服务器应该只支持两种类型的格式。pic和doc + # pic格式,直接显示。doc格式则显示为文件。 + media_type = 'pic' if mime_type.split('/')[0] == 'image' else 'doc' + # 上一次修改日期 + lastModifieDate = 'Thu Mar 17 2016 00:55:10 GMT+0800 (CST)' + # 文件大小 + file_size = os.path.getsize(file_name) + # PassTicket + pass_ticket = self.pass_ticket + # clientMediaId + client_media_id = str(int(time.time() * 1000)) + \ + str(random.random())[:5].replace('.', '') + # webwx_data_ticket + webwx_data_ticket = '' + for item in self.cookie: + if item.name == 'webwx_data_ticket': + webwx_data_ticket = item.value + break + if (webwx_data_ticket == ''): + return "None Fuck Cookie" + + uploadmediarequest = json.dumps({ + "BaseRequest": self.BaseRequest, + "ClientMediaId": client_media_id, + "TotalLen": file_size, + "StartPos": 0, + "DataLen": file_size, + "MediaType": 4 + }, ensure_ascii=False).encode('utf8') + + multipart_encoder = MultipartEncoder( + fields={ + 'id': 'WU_FILE_' + str(self.media_count), + 'name': file_name, + 'type': mime_type, + 'lastModifieDate': lastModifieDate, + 'size': str(file_size), + 'mediatype': media_type, + 'uploadmediarequest': uploadmediarequest, + 'webwx_data_ticket': webwx_data_ticket, + 'pass_ticket': pass_ticket, + 'filename': (file_name, open(file_name, 'rb'), mime_type.split('/')[1]) + }, + boundary='-----------------------------1575017231431605357584454111' + ) + + headers = { + 'Host': 'file2.wx.qq.com', + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:42.0) Gecko/20100101 Firefox/42.0', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.5', + 'Accept-Encoding': 'gzip, deflate', + 'Referer': 'https://wx2.qq.com/', + 'Content-Type': multipart_encoder.content_type, + 'Origin': 'https://wx2.qq.com', + 'Connection': 'keep-alive', + 'Pragma': 'no-cache', + 'Cache-Control': 'no-cache' + } + + r = requests.post(url, data=multipart_encoder, headers=headers) + response_json = r.json() + if response_json['BaseResponse']['Ret'] == 0: + return response_json + return None + + def webwxsendmsgimg(self, user_id, media_id): + url = 'https://wx2.qq.com/cgi-bin/mmwebwx-bin/webwxsendmsgimg?fun=async&f=json&pass_ticket=%s' % self.pass_ticket + clientMsgId = str(int(time.time() * 1000)) + \ + str(random.random())[:5].replace('.', '') + data_json = { + "BaseRequest": self.BaseRequest, + "Msg": { + "Type": 3, + "MediaId": media_id, + "FromUserName": self.User['UserName'], + "ToUserName": user_id, + "LocalID": clientMsgId, + "ClientMsgId": clientMsgId + } + } + headers = {'content-type': 'application/json; charset=UTF-8'} + data = json.dumps(data_json, ensure_ascii=False).encode('utf8') + r = requests.post(url, data=data, headers=headers) + dic = r.json() + return dic['BaseResponse']['Ret'] == 0 + + def webwxsendmsgemotion(self, user_id, media_id): + url = 'https://wx2.qq.com/cgi-bin/mmwebwx-bin/webwxsendemoticon?fun=sys&f=json&pass_ticket=%s' % self.pass_ticket + clientMsgId = str(int(time.time() * 1000)) + \ + str(random.random())[:5].replace('.', '') + data_json = { + "BaseRequest": self.BaseRequest, + "Msg": { + "Type": 47, + "EmojiFlag": 2, + "MediaId": media_id, + "FromUserName": self.User['UserName'], + "ToUserName": user_id, + "LocalID": clientMsgId, + "ClientMsgId": clientMsgId + } + } + headers = {'content-type': 'application/json; charset=UTF-8'} + data = json.dumps(data_json, ensure_ascii=False).encode('utf8') + r = requests.post(url, data=data, headers=headers) + dic = r.json() + if self.DEBUG: + print json.dumps(dic, indent=4) + logging.debug(json.dumps(dic, indent=4)) + return dic['BaseResponse']['Ret'] == 0 + + def _saveFile(self, filename, data, api=None): + fn = filename + if self.saveSubFolders[api]: + dirName = os.path.join(self.saveFolder, self.saveSubFolders[api]) + if not os.path.exists(dirName): + os.makedirs(dirName) + fn = os.path.join(dirName, filename) + logging.debug('Saved file: %s' % fn) + with open(fn, 'wb') as f: + f.write(data) + f.close() + return fn + + def webwxgeticon(self, id): + url = self.base_uri + \ + '/webwxgeticon?username=%s&skey=%s' % (id, self.skey) + data = self._get(url) + if data == '': + return '' + fn = 'img_' + id + '.jpg' + return self._saveFile(fn, data, 'webwxgeticon') + + def webwxgetheadimg(self, id): + url = self.base_uri + \ + '/webwxgetheadimg?username=%s&skey=%s' % (id, self.skey) + data = self._get(url) + if data == '': + return '' + fn = 'img_' + id + '.jpg' + return self._saveFile(fn, data, 'webwxgetheadimg') + + def webwxgetmsgimg(self, msgid): + url = self.base_uri + \ + '/webwxgetmsgimg?MsgID=%s&skey=%s' % (msgid, self.skey) + data = self._get(url) + if data == '': + return '' + fn = 'img_' + msgid + '.jpg' + return self._saveFile(fn, data, 'webwxgetmsgimg') + + # Not work now for weixin haven't support this API + def webwxgetvideo(self, msgid): + url = self.base_uri + \ + '/webwxgetvideo?msgid=%s&skey=%s' % (msgid, self.skey) + data = self._get(url, api='webwxgetvideo') + if data == '': + return '' + fn = 'video_' + msgid + '.mp4' + return self._saveFile(fn, data, 'webwxgetvideo') + + def webwxgetvoice(self, msgid): + url = self.base_uri + \ + '/webwxgetvoice?msgid=%s&skey=%s' % (msgid, self.skey) + data = self._get(url) + print "This is voice data: \n" + data + if data == '': + return '' + fn = 'voice_' + msgid + '.mp3' + return self._saveFile(fn, data, 'webwxgetvoice') + + def getGroupName(self, id): + name = '未知群' + for member in self.GroupList: + if member['UserName'] == id: + name = member['NickName'] + if name == '未知群': + # 现有群里面查不到 + GroupList = self.getNameById(id) + for group in GroupList: + self.GroupList.append(group) + if group['UserName'] == id: + name = group['NickName'] + MemberList = group['MemberList'] + for member in MemberList: + self.GroupMemeberList.append(member) + return name + + def getUserRemarkName(self, id): + name = '未知群' if id[:2] == '@@' else '陌生人' + if id == self.User['UserName']: + return self.User['NickName'] # 自己 + + if id[:2] == '@@': + # 群 + name = self.getGroupName(id) + else: + # 特殊账号 + for member in self.SpecialUsersList: + if member['UserName'] == id: + name = member['RemarkName'] if member[ + 'RemarkName'] else member['NickName'] + + # 公众号或服务号 + for member in self.PublicUsersList: + if member['UserName'] == id: + name = member['RemarkName'] if member[ + 'RemarkName'] else member['NickName'] + + # 直接联系人 + for member in self.ContactList: + if member['UserName'] == id: + name = member['RemarkName'] if member[ + 'RemarkName'] else member['NickName'] + # 群友 + for member in self.GroupMemeberList: + if member['UserName'] == id: + name = member['DisplayName'] if member[ + 'DisplayName'] else member['NickName'] + + if name == '未知群' or name == '陌生人': + logging.debug(id) + return name + + def getUSerID(self, name): + for member in self.MemberList: + if name == member['RemarkName'] or name == member['NickName']: + return member['UserName'] + return None + + def _showMsg(self, message): + + srcName = None + dstName = None + groupName = None + content = None + + msg = message + logging.debug(msg) + + if msg['raw_msg']: + srcName = self.getUserRemarkName(msg['raw_msg']['FromUserName']) + dstName = self.getUserRemarkName(msg['raw_msg']['ToUserName']) + content = msg['raw_msg']['Content'].replace( + '<', '<').replace('>', '>') + message_id = msg['raw_msg']['MsgId'] + + if content.find('http://weixin.qq.com/cgi-bin/redirectforward?args=') != -1: + # 地理位置消息 + data = self._get(content) + if data == '': + return + data.decode('gbk').encode('utf-8') + pos = self._searchContent('title', data, 'xml') + temp = self._get(content) + if temp == '': + return + tree = html.fromstring(temp) + url = tree.xpath('//html/body/div/img')[0].attrib['src'] + + for item in urlparse(url).query.split('&'): + if item.split('=')[0] == 'center': + loc = item.split('=')[-1:] + + content = '%s 发送了一个 位置消息 - 我在 [%s](%s) @ %s]' % ( + srcName, pos, url, loc) + + if msg['raw_msg']['ToUserName'] == 'filehelper': + # 文件传输助手 + dstName = '文件传输助手' + + if msg['raw_msg']['FromUserName'][:2] == '@@': + # 接收到来自群的消息 + if ":
" in content: + [people, content] = content.split(':
', 1) + groupName = srcName + srcName = self.getUserRemarkName(people) + dstName = 'GROUP' + else: + groupName = srcName + srcName = 'SYSTEM' + elif msg['raw_msg']['ToUserName'][:2] == '@@': + # 自己发给群的消息 + groupName = dstName + dstName = 'GROUP' + + # 收到了红包 + if content == '收到红包,请在手机上查看': + msg['message'] = content + + # 指定了消息内容 + if 'message' in msg.keys(): + content = msg['message'] + + if groupName != None: + print '%s |%s| %s -> %s: %s' % (message_id, groupName.strip(), srcName.strip(), dstName.strip(), content.replace('
', '\n')) + logging.info('%s |%s| %s -> %s: %s' % (message_id, groupName.strip(), + srcName.strip(), dstName.strip(), content.replace('
', '\n'))) + else: + print '%s %s -> %s: %s' % (message_id, srcName.strip(), dstName.strip(), content.replace('
', '\n')) + logging.info('%s %s -> %s: %s' % (message_id, srcName.strip(), + dstName.strip(), content.replace('
', '\n'))) + + + + def handleMsg(self, r): + for msg in r['AddMsgList']: + print '[*] 你有新的消息,请注意查收' + logging.debug('[*] 你有新的消息,请注意查收') + + if self.DEBUG: + fn = 'msg' + str(int(random.random() * 1000)) + '.json' + with open(fn, 'w') as f: + f.write(json.dumps(msg)) + print '[*] 该消息已储存到文件: ' + fn + logging.debug('[*] 该消息已储存到文件: %s' % (fn)) + + msgType = msg['MsgType'] + name = self.getUserRemarkName(msg['FromUserName']) + content = msg['Content'].replace('<', '<').replace('>', '>') + msgid = msg['MsgId'] + + if msgType == 1: + raw_msg = {'raw_msg': msg} + self._showMsg(raw_msg) + if self.autoReplyMode: + global simSimi + global keys_sum + try: + response = simSimi.getConversation(content.encode('utf-8')) + except Exception, e: + print e + if str(e).find("Not found") != -1: + response = {'response': "傻逼"} + elif str(e).find("Limit Exceeded") != -1: + response = {'response': "到达每日上限了,正在自动更换api_key,使用第%d个api_key"%((keys_sum+1) % len(keys)+1)} + keys_sum = keys_sum + 1 + simSimi = simSimis[keys_sum % len(keys)] + else: + response = {'response': "代码出现了未知的问题"+str(e)} + + ans = response['response'].encode("utf-8") + if self.webwxsendmsg(ans, msg['FromUserName']): + print 'Auto: ' + ans + logging.info('Auto: ' + ans) + else: + print 'Auto Failed' + logging.info('Auto Failed') + elif msgType == 3: + image = self.webwxgetmsgimg(msgid) + raw_msg = {'raw_msg': msg, + 'message': '%s 发送了一张图片: %s' % (name, image)} + self._showMsg(raw_msg) + self._safe_open(image) + elif msgType == 34: + voice = self.webwxgetvoice(msgid) + print voice + text = maintest.file_upload(voice) + raw_msg = {'raw_msg': msg, + 'message': '%s 发了一段语音: %s' % (name, text)} + self._showMsg(raw_msg) + self._safe_open(voice) + if self.autoReplyMode: + global simSimi + try: + response = simSimi.getConversation(content.encode('utf-8')) + except Exception, e: + print e + if str(e).find("Not found") != -1: + response = {'response': "傻逼"} + elif str(e).find("Limit Exceeded") != -1: + response = {'response': "到达每日上限了,正在自动更换api_key,使用第%d个api_key"%((keys_sum+1) % len(keys)+1)} + keys_sum = keys_sum + 1 + simSimi = simSimis[keys_sum % len(keys)] + else: + response = {'response': "代码出现了未知的问题"+str(e)} + ans = response['response'].encode("utf-8") + if self.webwxsendmsg(ans, msg['FromUserName']): + print 'Auto: ' + ans + logging.info('Auto: ' + ans) + else: + print 'Auto Failed' + logging.info('Auto Failed') + elif msgType == 42: + info = msg['RecommendInfo'] + print '%s 发送了一张名片:' % name + print '=========================' + print '= 昵称: %s' % info['NickName'] + print '= 微信号: %s' % info['Alias'] + print '= 地区: %s %s' % (info['Province'], info['City']) + print '= 性别: %s' % ['未知', '男', '女'][info['Sex']] + print '=========================' + raw_msg = {'raw_msg': msg, 'message': '%s 发送了一张名片: %s' % ( + name.strip(), json.dumps(info))} + self._showMsg(raw_msg) + elif msgType == 47: + url = self._searchContent('cdnurl', content) + raw_msg = {'raw_msg': msg, + 'message': '%s 发了一个动画表情,点击下面链接查看: %s' % (name, url)} + self._showMsg(raw_msg) + self._safe_open(url) + elif msgType == 49: + appMsgType = defaultdict(lambda: "") + appMsgType.update({5: '链接', 3: '音乐', 7: '微博'}) + print '%s 分享了一个%s:' % (name, appMsgType[msg['AppMsgType']]) + print '=========================' + print '= 标题: %s' % msg['FileName'] + print '= 描述: %s' % self._searchContent('des', content, 'xml') + print '= 链接: %s' % msg['Url'] + print '= 来自: %s' % self._searchContent('appname', content, 'xml') + print '=========================' + card = { + 'title': msg['FileName'], + 'description': self._searchContent('des', content, 'xml'), + 'url': msg['Url'], + 'appname': self._searchContent('appname', content, 'xml') + } + raw_msg = {'raw_msg': msg, 'message': '%s 分享了一个%s: %s' % ( + name, appMsgType[msg['AppMsgType']], json.dumps(card))} + self._showMsg(raw_msg) + elif msgType == 51: + raw_msg = {'raw_msg': msg, 'message': '[*] 成功获取联系人信息'} + self._showMsg(raw_msg) + elif msgType == 62: + video = self.webwxgetvideo(msgid) + raw_msg = {'raw_msg': msg, + 'message': '%s 发了一段小视频: %s' % (name, video)} + self._showMsg(raw_msg) + self._safe_open(video) + elif msgType == 10002: + raw_msg = {'raw_msg': msg, 'message': '%s 撤回了一条消息' % name} + self._showMsg(raw_msg) + else: + logging.debug('[*] 该消息类型为: %d,可能是表情,图片, 链接或红包: %s' % + (msg['MsgType'], json.dumps(msg))) + raw_msg = { + 'raw_msg': msg, 'message': '[*] 该消息类型为: %d,可能是表情,图片, 链接或红包' % msg['MsgType']} + self._showMsg(raw_msg) + + def listenMsgMode(self): + print '[*] 进入消息监听模式 ... 成功' + logging.debug('[*] 进入消息监听模式 ... 成功') + self._run('[*] 进行同步线路测试 ... ', self.testsynccheck) + playWeChat = 0 + redEnvelope = 0 + while True: + self.lastCheckTs = time.time() + [retcode, selector] = self.synccheck() + if self.DEBUG: + print 'retcode: %s, selector: %s' % (retcode, selector) + logging.debug('retcode: %s, selector: %s' % (retcode, selector)) + if retcode == '1100': + print '[*] 你在手机上登出了微信,债见' + logging.debug('[*] 你在手机上登出了微信,债见') + break + if retcode == '1101': + print '[*] 你在其他地方登录了 WEB 版微信,债见' + logging.debug('[*] 你在其他地方登录了 WEB 版微信,债见') + break + elif retcode == '0': + if selector == '2': + r = self.webwxsync() + if r is not None: + self.handleMsg(r) + elif selector == '6': + # TODO + redEnvelope += 1 + print '[*] 收到疑似红包消息 %d 次' % redEnvelope + #logging.debug('[*] 收到疑似红包消息 %d 次' % redEnvelope) + elif selector == '7': + playWeChat += 1 + print '[*] 你在手机上玩微信被我发现了 %d 次' % playWeChat + #logging.debug('[*] 你在手机上玩微信被我发现了 %d 次' % playWeChat) + r = self.webwxsync() + elif selector == '0': + time.sleep(1) + if (time.time() - self.lastCheckTs) <= 20: + time.sleep(time.time() - self.lastCheckTs) + + def sendMsg(self, name, word, isfile=False): + id = self.getUSerID(name) + if id: + if isfile: + with open(word, 'r') as f: + for line in f.readlines(): + line = line.replace('\n', '') + self._echo('-> ' + name + ': ' + line) + if self.webwxsendmsg(line, id): + print ' [成功]' + else: + print ' [失败]' + time.sleep(1) + else: + if self.webwxsendmsg(word, id): + print '[*] 消息发送成功' + logging.debug('[*] 消息发送成功') + else: + print '[*] 消息发送失败' + logging.debug('[*] 消息发送失败') + else: + print '[*] 此用户不存在' + logging.debug('[*] 此用户不存在') + + def sendMsgToAll(self, word): + for contact in self.ContactList: + name = contact['RemarkName'] if contact[ + 'RemarkName'] else contact['NickName'] + id = contact['UserName'] + self._echo('-> ' + name + ': ' + word) + if self.webwxsendmsg(word, id): + print ' [成功]' + else: + print ' [失败]' + time.sleep(1) + + def sendImg(self, name, file_name): + response = self.webwxuploadmedia(file_name) + media_id = "" + if response is not None: + media_id = response['MediaId'] + user_id = self.getUSerID(name) + response = self.webwxsendmsgimg(user_id, media_id) + + def sendEmotion(self, name, file_name): + response = self.webwxuploadmedia(file_name) + media_id = "" + if response is not None: + media_id = response['MediaId'] + user_id = self.getUSerID(name) + response = self.webwxsendmsgemotion(user_id, media_id) + + @catchKeyboardInterrupt + def start(self): + self._echo('[*] 微信网页版 ... 开动') + print + logging.debug('[*] 微信网页版 ... 开动') + while True: + self._run('[*] 正在获取 uuid ... ', self.getUUID) + self._echo('[*] 正在获取二维码 ... 成功') + print + logging.debug('[*] 微信网页版 ... 开动') + self.genQRCode() + print '[*] 请使用微信扫描二维码以登录 ... ' + if not self.waitForLogin(): + continue + print '[*] 请在手机上点击确认以登录 ... ' + if not self.waitForLogin(0): + continue + break + + self._run('[*] 正在登录 ... ', self.login) + self._run('[*] 微信初始化 ... ', self.webwxinit) + self._run('[*] 开启状态通知 ... ', self.webwxstatusnotify) + self._run('[*] 获取联系人 ... ', self.webwxgetcontact) + self._echo('[*] 应有 %s 个联系人,读取到联系人 %d 个' % + (self.MemberCount, len(self.MemberList))) + print + self._echo('[*] 共有 %d 个群 | %d 个直接联系人 | %d 个特殊账号 | %d 公众号或服务号' % (len(self.GroupList), + len(self.ContactList), len(self.SpecialUsersList), len(self.PublicUsersList))) + print + self._run('[*] 获取群 ... ', self.webwxbatchgetcontact) + logging.debug('[*] 微信网页版 ... 开动') + if self.DEBUG: + print self + logging.debug(self) + + if self.interactive and raw_input('[*] 是否开启自动回复模式(y/n): ') == 'y': + self.autoReplyMode = True + print '[*] 自动回复模式 ... 开启' + logging.debug('[*] 自动回复模式 ... 开启') + else: + print '[*] 自动回复模式 ... 关闭' + logging.debug('[*] 自动回复模式 ... 关闭') + + if sys.platform.startswith('win'): + import thread + thread.start_new_thread(self.listenMsgMode()) + else: + listenProcess = multiprocessing.Process(target=self.listenMsgMode) + listenProcess.start() + + while True: + text = raw_input('') + if text == 'quit': + listenProcess.terminate() + print('[*] 退出微信') + logging.debug('[*] 退出微信') + exit() + elif text[:2] == '->': + [name, word] = text[2:].split(':') + if name == 'all': + self.sendMsgToAll(word) + else: + self.sendMsg(name, word) + elif text[:3] == 'm->': + [name, file] = text[3:].split(':') + self.sendMsg(name, file, True) + elif text[:3] == 'f->': + print '发送文件' + logging.debug('发送文件') + elif text[:3] == 'i->': + print '发送图片' + [name, file_name] = text[3:].split(':') + self.sendImg(name, file_name) + logging.debug('发送图片') + elif text[:3] == 'e->': + print '发送表情' + [name, file_name] = text[3:].split(':') + self.sendEmotion(name, file_name) + logging.debug('发送表情') + + def _safe_open(self, path): + if self.autoOpen: + if platform.system() == "Linux": + os.system("xdg-open %s &" % path) + else: + os.system('open %s &' % path) + + def _run(self, str, func, *args): + self._echo(str) + if func(*args): + print '成功' + logging.debug('%s... 成功' % (str)) + else: + print('失败\n[*] 退出程序') + logging.debug('%s... 失败' % (str)) + logging.debug('[*] 退出程序') + exit() + + def _echo(self, str): + sys.stdout.write(str) + sys.stdout.flush() + + def _printQR(self, mat): + for i in mat: + BLACK = '\033[40m \033[0m' + WHITE = '\033[47m \033[0m' + print ''.join([BLACK if j else WHITE for j in i]) + + def _str2qr(self, str): + print(str) + qr = qrcode.QRCode() + qr.border = 1 + qr.add_data(str) + qr.make() + # img = qr.make_image() + # img.save("qrcode.png") + #mat = qr.get_matrix() + #self._printQR(mat) # qr.print_tty() or qr.print_ascii() + qr.print_ascii(invert=True) + + def _transcoding(self, data): + if not data: + return data + result = None + if type(data) == unicode: + result = data + elif type(data) == str: + result = data.decode('utf-8') + return result + + def _get(self, url, api=None): + request = urllib2.Request(url=url) + request.add_header('Referer', 'https://wx.qq.com/') + if api == 'webwxgetvoice': + request.add_header('Range', 'bytes=0-') + if api == 'webwxgetvideo': + request.add_header('Range', 'bytes=0-') + try: + response = urllib2.urlopen(request) + data = response.read() + logging.debug(url) + return data + except urllib2.HTTPError, e: + logging.error('HTTPError = ' + str(e.code)) + except urllib2.URLError, e: + logging.error('URLError = ' + str(e.reason)) + except httplib.HTTPException, e: + logging.error('HTTPException') + except Exception: + import traceback + logging.error('generic exception: ' + traceback.format_exc()) + return '' + + def _post(self, url, params, jsonfmt=True): + if jsonfmt: + request = urllib2.Request(url=url, data=json.dumps(params)) + request.add_header( + 'ContentType', 'application/json; charset=UTF-8') + else: + request = urllib2.Request(url=url, data=urllib.urlencode(params)) + + + try: + response = urllib2.urlopen(request) + data = response.read() + if jsonfmt: + return json.loads(data, object_hook=_decode_dict) + return data + except urllib2.HTTPError, e: + logging.error('HTTPError = ' + str(e.code)) + except urllib2.URLError, e: + logging.error('URLError = ' + str(e.reason)) + except httplib.HTTPException, e: + logging.error('HTTPException') + except Exception: + import traceback + logging.error('generic exception: ' + traceback.format_exc()) + + return '' + + def _xiaodoubi(self, word): + url = 'http://www.xiaodoubi.com/bot/chat.php' + try: + r = requests.post(url, data={'chat': word}) + return r.content + except: + return "让我一个人静静 T_T..." + + def _simsimi(self, word): + key = '' + url = 'http://sandbox.api.simsimi.com/request.p?key=%s&lc=ch&ft=0.0&text=%s' % ( + key, word) + r = requests.get(url) + ans = r.json() + if ans['result'] == '100': + return ans['response'] + else: + return '你在说什么,风太大听不清列' + + def _searchContent(self, key, content, fmat='attr'): + if fmat == 'attr': + pm = re.search(key + '\s?=\s?"([^"<]+)"', content) + if pm: + return pm.group(1) + elif fmat == 'xml': + pm = re.search('<{0}>([^<]+)'.format(key), content) + if not pm: + pm = re.search( + '<{0}><\!\[CDATA\[(.*?)\]\]>'.format(key), content) + if pm: + return pm.group(1) + return '未知' + + +class UnicodeStreamFilter: + + def __init__(self, target): + self.target = target + self.encoding = 'utf-8' + self.errors = 'replace' + self.encode_to = self.target.encoding + + def write(self, s): + if type(s) == str: + s = s.decode('utf-8') + s = s.encode(self.encode_to, self.errors).decode(self.encode_to) + self.target.write(s) + + def flush(self): + self.target.flush() + +if sys.stdout.encoding == 'cp936': + sys.stdout = UnicodeStreamFilter(sys.stdout) + + +if __name__ == '__main__': + logger = logging.getLogger(__name__) + if not sys.platform.startswith('win'): + import coloredlogs + coloredlogs.install(level='DEBUG') + + webwx = WebWeixin() + webwx.start()