123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118 |
- #!/usr/bin/env python3
- import sys
- import re
- import json
- import requests
- import time
- import traceback
- import gzip
- p=re.compile(r'.*/s/(.*)')
- skipp = re.compile(r'.*(cover|screen|频道).*',re.IGNORECASE)
- reqcount=1
- sharedict=set()
- def getlist(w,shareid, fileid,morepage):
- global p
- global skipp
- global reqcount
- global sharedict
- reqcount += 1
- if reqcount % 5 == 0:
- print(f"reqcount:{reqcount} shareid:{shareid} fileid:{fileid}",file=sys.stderr)
- #time.sleep(1)
- url = f'http://192.168.101.188:9978/proxy?do=pikpak&type=list&share_id={shareid}&file_id={fileid}&pass_code=&morepage={morepage}'
- print(f"url: {url}",file=sys.stderr)
- resp = requests.get(url)
- content = resp.content.decode('utf-8')
- lines = content.split("\n")
- if "folder" not in content and len(lines)<=4:
- return
- isfirst=True
- for line in lines:
- if isfirst:
- isfirst=False
- print(f"first line:{line}",file=sys.stderr)
- if skipp.match(line):
- continue
- linearr = line.split('\t')
- if len(linearr)>2:
- m = p.match(linearr[0])
- if m:
- arr = m.group(1).split("/")
- else:
- arr = linearr[0].split("/")
- shareid=arr[0]
- fileid=arr[1] if len(arr)>1 else ""
- if shareid+"/"+fileid in sharedict:
- print(f"skip shareid{shareid} fileid:{fileid}", file=sys.stderr)
- continue
- w.write(line+"\n")
- w.flush()
- if linearr[2] == "folder":
- getlist(w,shareid,fileid,False)
- if len(lines)>0:
- getlist(w,shareid,fileid,True)
- def main():
- try:
- f = gzip.open(sys.argv[1]+".raw.gz",mode="rt",encoding="utf-8")
- if f is not None:
- print(f"found gz raw file:{sys.argv[1]}.raw.gz, extract it",file=sys.stderr)
- with(open(sys.argv[1]+".raw","w",encoding="utf-8")) as w:
- while(True):
- lines = f.readlines()
- if len(lines)<=0:
- break
- for line in lines:
- line=line.strip()
- w.write(line+"\n")
- f.seek(0)
- except:
- traceback.print_exc()
- try:
- f = open(sys.argv[1]+".raw","r",encoding="utf-8")
- except:
- f = None
- if f is not None:
- print("found old raw file")
- while True:
- lines = f.readlines()
- if len(lines)<=0:
- break
- for line in lines:
- linearr = line.split("\t")
- m = p.match(linearr[0])
- if m:
- arr = m.group(1).split("/")
- else:
- arr = linearr[0].split("/")
- if len(arr)>1:
- shareid = arr[0]
- fileid = arr[1]
- sharedict.add(shareid+"/"+fileid)
- f.close()
- print(f"old raw file record:{len(sharedict)}")
- else:
- print("no old raw file")
- with(open(sys.argv[1]+".raw","a+",encoding="utf-8")) as w:
- with(open(sys.argv[1],"r",encoding="utf-8")) as f:
- j = json.load(f)
- for c in j:
- shareid=c.get("type_id")
- fileid=""
- m = p.match(shareid)
- if m:
- arr = m.group(1).split("/")
- else:
- arr = shareid.split("/")
- shareid=arr[0]
- fileid=arr[1] if len(arr)>1 else ""
- if shareid+"/"+fileid in sharedict:
- continue
- getlist(w,shareid,fileid,False)
- main()
|