Introdução
Hoje vamos considerar um serviço de música tão conhecido como Yandex.Music. Bom serviço geral, mas com uma desvantagem significativa - a incapacidade de trabalhar offline. Tentaremos corrigir esse mal-entendido chato usando as ferramentas disponíveis.
Ferramentas
Então, precisamos:
- Python relativamente nova : 3.7 e superior
- Qualquer assíncrono: aiohttp e aiofile
- Ferramenta clássica para trabalhar com html-API: BeautifulSoup
- Para entreter o usuário durante o processo: tqdm
- Para preencher as tags: mutagênico
Autorização
Usuários não autorizados do serviço podem acessar apenas segmentos de músicas de até 30 segundos de duração. Isso claramente não é suficiente para uma audição de qualidade. Faremos o login da forma mais natural, através de um formulário web e receberemos cookies. Isso nos ajudará a abrir para fazer solicitações e HTMLParser para analisar formulários.
def resolve_cookie(login: str, password: str) -> str:
cookies = CookieJar()
opener = urllib.request.build_opener(
urllib.request.HTTPCookieProcessor(cookies),
urllib.request.HTTPRedirectHandler())
response = opener.open("https://passport.yandex.ru")
doc = response.read()
parser = FormParser()
parser.feed(doc.decode("utf-8"))
parser.close()
parser.params["login"] = login
response = opener.open(parser.url or response.url, urllib.parse.urlencode(parser.params).encode("utf-8"))
doc = response.read()
parser = FormParser()
parser.feed(doc.decode("utf-8"))
parser.close()
parser.params["login"] = login
parser.params["passwd"] = password
response = opener.open(parser.url or response.url, urllib.parse.urlencode(parser.params).encode("utf-8"))
cookie_data = {}
for item in cookies:
if item.domain == ".yandex.ru":
cookie_data[item.name] = item.value
if "yandex_login" not in cookie_data:
keys = ", ".join(cookie_data.keys())
raise Exception(f"Invalid cookie_data {keys}")
return "; ".join(map(lambda v: f"{v[0]}={v[1]}", cookie_data.items()))
https://passport.yandex.ru login. , , . . — . yandex_login, . .
Yandex Music (HTML) API
. , aiohttp. html BeautifulSoup. , , -.
class YandexMusicApi:
host = "music.yandex.ru"
base_url = f"https://{host}"
def __init__(self, cookie: str):
self.headers = Headers(self.host, cookie)
async def _request(self, end_point: str):
async with aiohttp.ClientSession() as session:
url = f"{self.base_url}/{end_point}"
async with session.request(method="GET", url=url) as response:
return await response.read()
async def get_favorite_artists(self, login: str) -> List[Artist]:
body = await self._request(f"users/{login}/artists")
soup = BeautifulSoup(body, "lxml")
artists_soup = soup.find("div", class_="page-users__artists")
if artists_soup is None:
caption = soup.find("div", class_="page-users__caption")
if caption:
raise Exception(caption.contents[0])
result = []
for artist_soup in artists_soup.find_all("div", class_="artist"):
title_soup = artist_soup.find("div", class_="artist__name")
title = title_soup.attrs["title"]
title_href_soup = title_soup.find("a")
id_ = int(title_href_soup.attrs["href"].split("/")[-1])
result.append(Artist(id_, title))
return result
, https://music.yandex.ru/users/<login>/artists page-users__artists . title artist__name. Id split .
, .
. , . — yandex-. , . Network , https://{host}/get-mp3/{sign}/{ts}/{path}, sign. (XGRlBW9FXlekgbPrRHuSiA) . , .
async def get_track_url(self, album_id: int, track_id: int) -> str:
async with aiohttp.ClientSession() as session:
url = f"{self.base_url}/api/v2.1/handlers/track/{track_id}:{album_id}/" \
f"web-album-track-track-main/download/m?hq=0&external-domain={self.host}&overembed=no&__t={timestamp()}"
page = f"album/{album_id}"
headers = self.headers.build(page)
async with session.request(method="GET", url=url, headers=headers) as response:
body = await response.json()
src = body["src"]
src += f"&format=json&external-domain={self.host}&overembed=no&__t={timestamp()}"
result = parse.urlparse(src)
headers = self.headers.build(page, {
":authority": "storage.mds.yandex.net",
":method": "GET",
":path": f"{result.path}/{result.query}",
":scheme": "https",
}, True)
async with session.request(method="GET", url=src, headers=headers) as response:
body = await response.json()
host = body["host"]
path = body["path"]
s = body["s"]
ts = body["ts"]
sign = md5(f"XGRlBW9FXlekgbPrRHuSiA{path[1::]}{s}".encode("utf-8")).hexdigest()
url = f"https://{host}/get-mp3/{sign}/{ts}/{path}"
return url
, .
async def download_file(cls, url: str, filename: str):
async with aiohttp.ClientSession() as session:
async with session.request(method="GET", url=url) as response:
data = await response.read()
async with AIOFile(filename, "wb") as afp:
await afp.write(data)
await afp.fsync()
. , , mp3, , , . , , . , . .
, . , , . , , . (- mp3 ).
async def download_artist(self, artist: Artist, depth: Depth = Depth.NORMAL):
artist_progress = tqdm(total=0, desc=artist.title, position=1, ascii=True)
albums = await self.api.get_artist_albums(artist.id)
artist_progress.total = len(albums)
artist_progress.refresh()
for album in albums:
album_dir = os.path.join(self.target_dir, normalize(artist.title), f"{album.year} - {normalize(album.title)}")
if depth < Depth.ALBUMS and os.path.exists(album_dir):
artist_progress.update()
continue
album_progress = tqdm(total=0, desc=f"> {album.title}", position=0, ascii=True)
tracks = await self.api.get_album_tracks(album.id)
album_progress.total = len(tracks)
album_progress.refresh()
os.makedirs(album_dir, exist_ok=True)
if album.cover:
album_progress.total += 1
cover_filename = os.path.join(album_dir, "cover.jpg")
if not os.path.exists(cover_filename):
await self.download_file(album.cover, cover_filename)
album_progress.update()
for track in tracks:
target_filename = os.path.join(album_dir, f"{track.num:02d}. {normalize(track.title)}.mp3")
if depth >= Depth.TRACKS or not os.path.exists(target_filename):
url = await self.api.get_track_url(track.album_id, track.id)
await self.download_file(url, target_filename)
self.write_tags(target_filename, {
"title": track.title,
"tracknumber": str(track.num),
"artist": artist.title,
"album": album.title,
"date": str(album.year),
})
album_progress.update()
album_progress.close()
artist_progress.update()
artist_progress.close()
, , AC/DC . normalize:
def normalize(name: str) -> str:
return name.replace("/", "-")
, ( ) . . asyncio.Semaphore asyncio.gather.
, .
, , , . .credentials, . .cookie .
def resolve_cookie() -> str:
base_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), os.path.pardir))
cookie_file = os.path.join(base_dir, ".cookie")
if os.path.exists(cookie_file):
with open(cookie_file, "rt") as file:
return file.read()
credentials_file = os.path.join(base_dir, ".credentials")
if os.path.exists(credentials_file):
config = configparser.ConfigParser()
config.read(credentials_file)
login = config["yandex"]["login"]
password = config["yandex"]["password"]
else:
raise Exception(f"""Create \"{credentials_file}\" with content
[yandex]
login=<user_login>
password=<user_password>
""")
cookie = auth.resolve_cookie(login, password)
with open(cookie_file, "wt") as file:
file.write(cookie)
return cookie
, , . argparse, .
:
- -a (--artist), Id , ,
- -o (--output), , —
Music. - -d (--depth), ,
- -
0 (NORMAL), , , - O valor
1 (ALBUMS)percorre todas as faixas do álbum e baixa as que faltam - O valor
2 (TRACKS)baixa e sobrescreve as trilhas, mesmo se já estiverem presentes no sistema de arquivos
- -
async def main():
parser = argparse.ArgumentParser()
parser.add_argument("-a", "--artist", help="Artist ID")
parser.add_argument("-o", "--output", default=f"{Path.home()}/Music",
help=f"Output directory, default {Path.home()}/Music")
parser.add_argument("-d", "--depth", default=0, type=int,
help=f"Exists files check depth, {enum_print(Depth)}")
args = parser.parse_args()
cookie = resolve_cookie()
api = YandexMusicApi(cookie)
agent = YandexMusicAgent(api, args.output)
if args.artist:
artist = await api.get_artist(args.artist)
await agent.download_artist(artist, args.depth)
else:
email = re.compile(".*?yandex_login=(.*?);.*?", re.M).match(cookie).group(1)
await agent.download_favorites(email, args.depth)
E agora, finalmente, podemos executar tudo:
if __name__ == "__main__":
asyncio.run(main())
Obrigado pela atenção. Agora você sabe como implementar uma API que não existe, baixe a não disponível para download e torne-se o orgulhoso proprietário de sua própria coleção de músicas.
O resultado pode ser visto no repositório yandex.music.agent