mirror of
https://git.sr.ht/~cadence/NewLeaf
synced 2024-11-22 07:37:29 +00:00
Improve ytInitialData extraction
This commit is contained in:
parent
ba88c53857
commit
554cd8cc3a
@ -28,6 +28,11 @@ def get_created_files(id):
|
|||||||
id = "_" + id[1:] # youtube-dl changes - to _ at the start, presumably to not accidentally trigger switches with * in shell
|
id = "_" + id[1:] # youtube-dl changes - to _ at the start, presumably to not accidentally trigger switches with * in shell
|
||||||
return (f for f in os.listdir() if f.startswith("{}_".format(id)))
|
return (f for f in os.listdir() if f.startswith("{}_".format(id)))
|
||||||
|
|
||||||
|
def clean_up_temp_files(id):
|
||||||
|
created_files = get_created_files(id)
|
||||||
|
for file in created_files:
|
||||||
|
os.unlink(file)
|
||||||
|
|
||||||
def format_order(format):
|
def format_order(format):
|
||||||
# most significant to least significant
|
# most significant to least significant
|
||||||
# key, max, order, transform
|
# key, max, order, transform
|
||||||
@ -172,6 +177,8 @@ def extract_video(id):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
except youtube_dlc.DownloadError as e:
|
except youtube_dlc.DownloadError as e:
|
||||||
|
clean_up_temp_files(id)
|
||||||
|
|
||||||
if isinstance(e.exc_info[1], urllib.error.HTTPError):
|
if isinstance(e.exc_info[1], urllib.error.HTTPError):
|
||||||
if e.exc_info[1].code == 429:
|
if e.exc_info[1].code == 429:
|
||||||
result = {
|
result = {
|
||||||
@ -192,9 +199,7 @@ def extract_video(id):
|
|||||||
print("messed up in original transform.")
|
print("messed up in original transform.")
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
created_files = get_created_files(id)
|
clean_up_temp_files(id)
|
||||||
for file in created_files:
|
|
||||||
os.unlink(file)
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def get_more_stuff_from_file(id, result):
|
def get_more_stuff_from_file(id, result):
|
||||||
|
@ -1,13 +1,11 @@
|
|||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
|
||||||
r_yt_initial_data = re.compile(r"""(?:\s*window\["ytInitialData"\]|var ytInitialData) = (\{.+\});</script>""")
|
r_yt_initial_data = re.compile(r"""(?:^\s*window\["ytInitialData"\]|var ytInitialData) = (\{.+?\});(?:\s*$|</script>)""", re.S + re.M)
|
||||||
|
|
||||||
def extract_yt_initial_data(content):
|
def extract_yt_initial_data(content):
|
||||||
content = content.replace("\n", "")
|
|
||||||
m_yt_initial_data = re.search(r_yt_initial_data, content)
|
m_yt_initial_data = re.search(r_yt_initial_data, content)
|
||||||
if m_yt_initial_data:
|
if m_yt_initial_data:
|
||||||
print(m_yt_initial_data.group(1))
|
|
||||||
yt_initial_data = json.loads(m_yt_initial_data.group(1))
|
yt_initial_data = json.loads(m_yt_initial_data.group(1))
|
||||||
return yt_initial_data
|
return yt_initial_data
|
||||||
else:
|
else:
|
||||||
|
Loading…
Reference in New Issue
Block a user