22

「docker实战篇」python的docker- 抖音视频抓取(中)(25)

 5 years ago
source link: https://idig8.com/2019/04/13/dockershizhanpianpythondedocker-douyinshipinzhuaquzhong25/?amp%3Butm_medium=referral
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.

本次主要针对python对上次抖音分享的页面中的_signature进行解析并完成抖音视频的下载。源码:https://github.com/limingios/dockerpython.git (源码/「「docker实战篇」python的docker- 抖音视频抓取(下)(24))

https://github.com/limingios/dockerpython.git (谷歌插件)

AnAZNzf.png!web

找到方法,完成本地的html的生成

其实就是复制出来分享页面的函数,然后通过函数,调用的方式完成_signature的生成。

html_foot.txt

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
</body>
</html>
<script type="text/javascript">

RBfemei.png!web

 html_foot.txt 
!function(t) {
    if (t.__M = t.__M || {},
    !t.__M.require) {
        var e, n, r = document.getElementsByTagName("head")[0], i = {}, o = {}, a = {}, u = {}, c = {}, s = {}, l = function(t, n) {
            if (!(t in u)) {
                u[t] = !0;
                var i = document.createElement("script");
                if (n) {
                    var o = setTimeout(n, e.timeout);
                    i.onerror = function() {
                        clearTimeout(o),
                        n()
                    }
                    ;
                    var a = function() {
                        clearTimeout(o)
                    };
                    "onload"in i ? i.onload = a : i.onreadystatechange = function() {
                        ("loaded" === this.readyState || "complete" === this.readyState) && a()
                    }
                }
                return i.type = "text/javascript",
                i.src = t,
                r.appendChild(i),
                i
            }
        }, f = function(t, e, n) {
            var r = i[t] || (i[t] = []);
            r.push(e);
            var o, a = c[t] || c[t + ".js"] || {}, u = a.pkg;
            o = u ? s[u].url || s[u].uri : a.url || a.uri || t,
            l(o, n && function() {
                n(t)
            }
            )
        };
        n = function(t, e) {
            "function" != typeof e && (e = arguments[2]),
            t = t.replace(/\.js$/i, ""),
            o[t] = e;
            var n = i[t];
            if (n) {
                for (var r = 0, a = n.length; a > r; r++)
                    n[r]();
                delete i[t]
            }
        }
        ,
        e = function(t) {
            if (t && t.splice)
                return e.async.apply(this, arguments);
            t = e.alias(t);
            var n = a[t];
            if (n)
                return n.exports;
            var r = o[t];
            if (!r)
                throw "[ModJS] Cannot find module `" + t + "`";
            n = a[t] = {
                exports: {}
            };
            var i = "function" == typeof r ? r.apply(n, [e, n.exports, n]) : r;
            return i && (n.exports = i),
            n.exports && !n.exports["default"] && Object.defineProperty && Object.isExtensible(n.exports) && Object.defineProperty(n.exports, "default", {
                value: n.exports
            }),
            n.exports
        }
        ,
        e.async = function(n, r, i) {
            function a(t) {
                for (var n, r = 0, h = t.length; h > r; r++) {
                    var p = e.alias(t[r]);
                    p in o ? (n = c[p] || c[p + ".js"],
                    n && "deps"in n && a(n.deps)) : p in s || (s[p] = !0,
                    l++,
                    f(p, u, i),
                    n = c[p] || c[p + ".js"],
                    n && "deps"in n && a(n.deps))
                }
            }
            function u() {
                if (0 === l--) {
                    for (var i = [], o = 0, a = n.length; a > o; o++)
                        i[o] = e(n[o]);
                    r && r.apply(t, i)
                }
            }
            "string" == typeof n && (n = [n]);
            var s = {}
              , l = 0;
            a(n),
            u()
        }
        ,
        e.resourceMap = function(t) {
            var e, n;
            n = t.res;
            for (e in n)
                n.hasOwnProperty(e) && (c[e] = n[e]);
            n = t.pkg;
            for (e in n)
                n.hasOwnProperty(e) && (s[e] = n[e])
        }
        ,
        e.loadJs = function(t) {
            l(t)
        }
        ,
        e.loadCss = function(t) {
            if (t.content) {
                var e = document.createElement("style");
                e.type = "text/css",
                e.styleSheet ? e.styleSheet.cssText = t.content : e.innerHTML = t.content,
                r.appendChild(e)
            } else if (t.url) {
                var n = document.createElement("link");
                n.href = t.url,
                n.rel = "stylesheet",
                n.type = "text/css",
                r.appendChild(n)
            }
        }
        ,
        e.alias = function(t) {
            return t.replace(/\.js$/i, "")
        }
        ,
        e.timeout = 5e3,
        t.__M.define = n,
        t.__M.require = e
    }
}(this)


__M.define("douyin_falcon:node_modules/byted-acrawler/dist/runtime", function(l, e) {
    Function(function(l) {
        return 'e(e,a,r){(b[e]||(b[e]=t("x,y","x "+e+" y")(r,a)}a(e,a,r){(k[r]||(k[r]=t("x,y","new x[y]("+Array(r+1).join(",x[y]")(1)+")")(e,a)}r(e,a,r){n,t,s={},b=s.d=r?r.d+1:0;for(s["$"+b]=s,t=0;t<b;t)s[n="$"+t]=r[n];for(t=0,b=s=a;t<b;t)s[t]=a[t];c(e,0,s)}c(t,b,k){u(e){v[x]=e}f{g=,ting(bg)}l{try{y=c(t,b,k)}catch(e){h=e,y=l}}for(h,y,d,g,v=[],x=0;;)switch(g=){case 1:u(!)4:f5:u((e){a=0,r=e;{c=a<r;c&&u(e[a]),c}}(6:y=,u((y8:if(g=,lg,g=,y===c)b+=g;else if(y!==l)y9:c10:u(s(11:y=,u(+y)12:for(y=f,d=[],g=0;g<y;g)d[g]=y.charCodeAt(g)^g+y;u(String.fromCharCode.apply(null,d13:y=,h=delete [y]14:59:u((g=)?(y=x,v.slice(x-=g,y:[])61:u([])62:g=,k[0]=65599*k[0]+k[1].charCodeAt(g)>>>065:h=,y=,[y]=h66:u(e(t[b],,67:y=,d=,u((g=).x===c?r(g.y,y,k):g.apply(d,y68:u(e((g=t[b])<"<"?(b--,f):g+g,,70:u(!1)71:n72:+f73:u(parseInt(f,3675:if(){bcase 74:g=<<16>>16g76:u(k[])77:y=,u([y])78:g=,u(a(v,x-=g+1,g79:g=,u(k["$"+g])81:h=,[f]=h82:u([f])83:h=,k[]=h84:!085:void 086:u(v[x-1])88:h=,y=,h,y89:u({e{r(e.y,arguments,k)}e.y=f,e.x=c,e})90:null91:h93:h=0:;default:u((g<<16>>16)-16)}}n=this,t=n.Function,s=Object.keys||(e){a={},r=0;for(c in e)a[r]=c;a=r,a},b={},k={};r'.replace(/[-]/g, function(e) {
            return l[15 & e.charCodeAt(0)]
        })
    }("v[x++]=v[--x]t.charCodeAt(b++)-32function return ))++.substrvar .length(),b+=;break;case ;break}".split("")))()('gr$Daten Иb/s!l y͒yĹg,(lfi~ah`{mv,-n|jqewVxp{rvmmx,&effkx[!cs"l".Pq%widthl"@q&heightl"vr*getContextx$"2d[!cs#l#,*;?|u.|uc{uq$fontl#vr(fillTextx$$龘ฑภ경2<[#c}l#2q*shadowBlurl#1q-shadowOffsetXl#$$limeq+shadowColorl#vr#arcx88802[%c}l#vr&strokex[ c}l"v,)}eOmyoZB]mx[ cs!0s$l$Pb<k7l l!r&lengthb%^l$1+s$jl  s#i$1ek1s$gr#tack4)zgr#tac$! +0o![#cj?o ]!l$b%s"o ]!l"l$b*b^0d#>>>s!0s%yA0s"l"l!r&lengthb<k+l"^l"1+s"jl  s&l&z0l!$ +["cs\'(0l#i\'1ps9wxb&s() &{s)/s(gr&Stringr,fromCharCodes)0s*yWl ._b&s o!])l l Jb<k$.aj;l .Tb<k$.gj/l .^b<k&i"-4j!+& s+yPo!]+s!l!l Hd>&l!l Bd>&+l!l <d>&+l!l 6d>&+l!l &+ s,y=o!o!]/q"13o!l q"10o!],l 2d>& s.{s-yMo!o!]0q"13o!]*Ld<l 4d#>>>b|s!o!l q"10o!],l!& s/yIo!o!].q"13o!],o!]*Jd<l 6d#>>>b|&o!]+l &+ s0l-l!&l-l!i\'1z141z4b/@d<l"b|&+l-l(l!b^&+l-l&zl\'g,)gk}ejo{cm,)|yn~Lij~em["cl$b%@d<l&zl\'l $ +["cl$b%b|&+l-l%8d<@b|l!b^&+ q$sign ', [Object.defineProperty(e, "__esModule", {
        value: !0
    })])
});

dycs = __M.require("douyin_falcon:node_modules/byted-acrawler/dist/runtime")

signc = dycs.sign(&&&&)

document.title = signc
document.write(signc)

</script>

qUrmm2u.png!web

handle_douyin_movie.py 下载代码

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2019/2/20 17:39
# @Author  : Aries
# @Site    : 
# @File    : handle_douyin_movie.py.py
# @Software: PyCharm
import json
import os

import requests
import re
import time

from selenium import webdriver
from selenium.webdriver.chrome.options import Options

#分享ID
share_id = "89923219116"
share_url = "https://www.douyin.com/share/user/"+share_id


header = {
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36"
}

#dytk 和tac的正则表达式
dytk_search = re.compile(r"dytk: '(.*?)'")
tac_search = re.compile(r"<script>tac=(.*?)</script>")
response = requests.get(url=share_url,headers=header)


#处理获取dytk 和tac
dytk = re.search(dytk_search,response.text).group(1)
tac = re.search(tac_search,response.text).group(1)


#tac封装成为js的格式
tac = "var tac="+tac+";"


# html页面的编写合成 header + tac+ foot
with open("html_head.txt") as f1:
    f1_read = f1.read()

with open("html_foot.txt") as f2:
    f2_read = f2.read().replace("&&&&","89923219116")


with open("test.html","w") as f_w:
    f_w.write(f1_read+"\n"+tac+"\n"+f2_read)


# signature = input("秘钥为:")

chrome_options = Options()
chrome_options.add_argument("--headless")
abspath = os.path.abspath(r"D:\Program Files\chromedriver\chromedriver.exe")
douyin_driver = webdriver.Chrome(executable_path=abspath,chrome_options=chrome_options,)
douyin_driver.get("file:///E:\\dockerpython\\python\\douyin\\test.html")
signature = douyin_driver.title
douyin_driver.quit()
movie_url = "https://www.douyin.com/aweme/v1/aweme/post/?user_id="+share_id+"&count=21&max_cursor=0&aid=1128&_signature="+signature+"&dytk="+dytk

#接口不太稳定,所以要使用while循环一直调用
while True:
    movie_reponse = requests.get(url=movie_url,headers=header)
    if json.loads(movie_reponse.text)["aweme_list"] == []:
        #time.sleep(1)
        continue
    else:
        print(movie_reponse.text)
        for item in json.loads(movie_reponse.text)["aweme_list"]:
            video_url = item["video"]["play_addr"]["url_list"][0]
            video_response = requests.get(url=video_url,headers=header)
            with open("douyin.mp4","wb") as v:
                #不能使用video_response.text,必须使用content才可以把内容写进去
                v.write(video_response.content)
                break

Iz6r2qZ.png!web

最终结果

2ERviqq.png!web

里面关于chromedriver的配置直接引入他的路径最稳了,我比较喜欢这种方式网上很多搞环境变量的导致电脑很慢不建议。

NzeAFzZ.png!web

PS:基本上抖音视频下载的都已经完成了,下次对于需要注意的做下总结。

百度未收录

>>原创文章,欢迎转载。转载请注明:转载自IT人故事会,谢谢!

>>原文链接地址:上一篇:

已是最新文章


About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK