From a102009ec15850fbe4a752a47496fd1fb3a0d7a1 Mon Sep 17 00:00:00 2001 From: conky Date: Tue, 18 Oct 2022 02:16:43 +0300 Subject: [PATCH] Refactored classes and functionality, added more data caching, working on topic parser. --- src/Client.php | 144 +++++--------------------- src/ClientInterface.php | 9 +- src/Exception/Topic/TopicNotFound.php | 9 ++ src/Models/Topic/Movie.php | 7 ++ src/Models/Topic/TopicBase.php | 49 +++++++++ src/Topic/Movie.php | 9 -- src/Topic/TopicBase.php | 38 ------- src/Traits/AuthTrait.php | 109 +++++++++++++++++++ src/Traits/HttpHelperTrait.php | 69 ++++++++++++ src/Traits/TopicTrait.php | 41 ++++++++ 10 files changed, 311 insertions(+), 173 deletions(-) create mode 100644 src/Exception/Topic/TopicNotFound.php create mode 100644 src/Models/Topic/Movie.php create mode 100644 src/Models/Topic/TopicBase.php delete mode 100644 src/Topic/Movie.php delete mode 100644 src/Topic/TopicBase.php create mode 100644 src/Traits/AuthTrait.php create mode 100644 src/Traits/HttpHelperTrait.php create mode 100644 src/Traits/TopicTrait.php diff --git a/src/Client.php b/src/Client.php index 2963ca9..52dd76c 100644 --- a/src/Client.php +++ b/src/Client.php @@ -2,30 +2,41 @@ namespace Toloka\PhpApi; -use Dflydev\FigCookies\Cookie; use Dflydev\FigCookies\Cookies; -use Dflydev\FigCookies\FigRequestCookies; -use Dflydev\FigCookies\SetCookies; -use GuzzleHttp\Psr7\Request; use Psr\Http\Client\ClientInterface as HttpClientInterface; -use Psr\Http\Message\RequestInterface; use Psr\Log\LoggerInterface; use Psr\SimpleCache\CacheInterface; -use GuzzleHttp\Psr7\Utils; -use Symfony\Component\DomCrawler\Crawler; -use Toloka\PhpApi\Exception\Auth\AuthException; -use Toloka\PhpApi\Exception\Auth\InvalidAuthCredentials; +use Toloka\PhpApi\Traits\AuthTrait; +use Toloka\PhpApi\Traits\HttpHelperTrait; +use Toloka\PhpApi\Traits\TopicTrait; class Client implements ClientInterface { + use HttpHelperTrait; + use AuthTrait; + use TopicTrait; + const CACHE_KEY_COOKIES = 'cookies'; + const CACHE_KEY_LOGGED_IN = 'is_logged_in'; + + /** + * @var ClientInterface|null + */ + private static ?ClientInterface $instance; /** * Hurtom Toloka base url. * * @var string */ - protected string $base_url; + public static string $base_url = 'https://toloka.to'; + + /** + * User agent to use for requests. + * + * @var string + */ + public static string $user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'; /** * Http client. @@ -48,25 +59,18 @@ class Client implements ClientInterface { */ protected LoggerInterface $logger; - /** - * @var Cookies - */ - protected ?Cookies $cookies = NULL; - /** * Toloka client constructor. * - * @param string $base_url - * @param CacheInterface|NULL $cache - * @param LoggerInterface|NULL $logger + * @param \Psr\Http\Client\ClientInterface $httpClient + * @param \Psr\SimpleCache\CacheInterface $cache + * @param \Psr\Log\LoggerInterface $logger */ public function __construct( - string $base_url, HttpClientInterface $httpClient, CacheInterface $cache, LoggerInterface $logger ) { - $this->base_url = $base_url; $this->httpClient = $httpClient; $this->cache = $cache; $this->logger = $logger; @@ -79,104 +83,8 @@ class Client implements ClientInterface { } } - /** - * {@inheritDoc} - */ - public function getBaseUrl(): string { - return $this->base_url; - } - - /** - * {@inheritDoc} - */ - public function login(string $login, string $password): void { - if ($this->isLoggedIn()) { - return; - } - $body = http_build_query([ - 'username' => $login, - 'password' => $password, - 'autologin' => 'on', - 'ssl' => 'on', - 'redirect' => '', - 'login' => 'Вхід', - ]); - $request = new Request('POST', $this->getBaseUrl() . '/login.php', [ - 'Content-Type' => 'application/x-www-form-urlencoded', - ], Utils::streamFor($body)); - $response = $this->httpClient->sendRequest($request); - if ($response->getStatusCode() === 302) { - // If redirected - this means successful login, can save cookies. - $cookies = SetCookies::fromResponse($response); - $this->saveCookies($cookies); - } - elseif ($response->getStatusCode() === 200) { - // In case of returned page - try to find an error and throw exception. - $crawler = new Crawler($response->getBody()->getContents()); - $text = $crawler->filter('.forumline .row1 span.gen')->text(NULL, TRUE); - if (str_contains($text, 'Такий псевдонім не існує, або не збігається пароль.')) { - throw new InvalidAuthCredentials(); - } - } - throw new AuthException(); - } - - private function saveCookies($data) { - if ($data instanceof SetCookies) { - $cookies = []; - foreach ($data->getAll() as $cookie) { - $cookies[] = Cookie::create($cookie->getName(), $cookie->getValue()); - } - $this->cookies = new Cookies($cookies); - } - elseif ($data instanceof Cookies) { - $this->cookies = $data; - } - $this->cache->set(self::CACHE_KEY_COOKIES, $this->cookies); - } - - private function applyRequestCookies(RequestInterface $request): RequestInterface { - if (!$this->cookies) { - return $request; - } - foreach ($this->cookies->getAll() as $cookie) { - $request = FigRequestCookies::set($request, $cookie); - } - return $request; - } - - /** - * {@inheritDoc} - */ - public function isLoggedIn(): bool { - $request = new Request('GET', $this->getBaseUrl()); - $request = $this->applyRequestCookies($request); - $response = $this->httpClient->sendRequest($request); - if ($response->getStatusCode() === 200) { - $crawler = new Crawler($response->getBody()->getContents()); - $menu_links = $crawler->filter('table.navie6fix a'); - foreach ($menu_links as $menu_link) { - // If there is a link to profile, then we authorized. - if ($menu_link->textContent === 'Профіль') { - return TRUE; - } - } - } - return FALSE; - } - - /** - * {@inheritDoc} - */ - public function logout(): void { - // TODO: Implement logout() method. - } - - /** - * {@inheritDoc} - */ - public function getTopic(int $id): TopicInterface { - + public static function getInstance(): ClientInterface { + return self::$instance; } } diff --git a/src/ClientInterface.php b/src/ClientInterface.php index 8d27d20..5b5dcc1 100644 --- a/src/ClientInterface.php +++ b/src/ClientInterface.php @@ -4,14 +4,6 @@ namespace Toloka\PhpApi; interface ClientInterface { - /** - * Get Toloka base url. - * - * @return string - * Absolute URL to toloka. - */ - public function getBaseUrl(): string; - /** * Login using credentials. * @@ -22,6 +14,7 @@ interface ClientInterface { * * @throws \Toloka\PhpApi\Exception\Auth\InvalidAuthCredentials * @throws \Toloka\PhpApi\Exception\Auth\TooManyLoginAttempts + * @throws \Toloka\PhpApi\Exception\Auth\AuthException * @throws \Psr\Http\Client\ClientExceptionInterface */ public function login(string $login, string $password): void; diff --git a/src/Exception/Topic/TopicNotFound.php b/src/Exception/Topic/TopicNotFound.php new file mode 100644 index 0000000..28a84b4 --- /dev/null +++ b/src/Exception/Topic/TopicNotFound.php @@ -0,0 +1,9 @@ +id; + } + + /** + * @return string + */ + public function title(): string { + return $this->title; + } + + /** + * {@inheritDoc} + */ + public function url(bool $absolute = TRUE): string { + return Client::$base_url . '/t' . $this->id(); + } + +} diff --git a/src/Topic/Movie.php b/src/Topic/Movie.php deleted file mode 100644 index 33ce712..0000000 --- a/src/Topic/Movie.php +++ /dev/null @@ -1,9 +0,0 @@ -client = $client; - } - - /** - * @return int - */ - public function id(): int { - return $this->id; - } - - /** - * @param int $id - */ - public function setId(int $id): void { - $this->id = $id; - } - - /** - * {@inheritDoc} - */ - public function url(bool $absolute = TRUE): string { - return $this->client->getBaseUrl() . 't' . $this->id(); - } - -} diff --git a/src/Traits/AuthTrait.php b/src/Traits/AuthTrait.php new file mode 100644 index 0000000..446e1d1 --- /dev/null +++ b/src/Traits/AuthTrait.php @@ -0,0 +1,109 @@ +isLoggedIn()) { + return; + } + + // Flush cached logged in state. + $this->cache->delete(self::CACHE_KEY_LOGGED_IN); + + $body = http_build_query([ + 'username' => $login, + 'password' => $password, + 'autologin' => 'on', + 'ssl' => 'on', + 'redirect' => '', + 'login' => 'Вхід', + ]); + $response = $this->makeRequest('POST', '/login.php', [ + 'Content-Type' => 'application/x-www-form-urlencoded', + ], Utils::streamFor($body), FALSE); + if ($response->getStatusCode() === 302) { + // Force check if we logged in successfully, to be sure + cache this info. + if (!$this->isLoggedIn()) { + throw new AuthException(); + } + return; + } + elseif ($response->getStatusCode() === 200) { + // In case of returned page - try to find an error and throw exception. + $crawler = new Crawler($response->getBody()->getContents()); + $text = $crawler->filter('.forumline .row1 span.gen')->text(NULL, TRUE); + if (str_contains($text, 'Такий псевдонім не існує, або не збігається пароль.')) { + throw new InvalidAuthCredentials(); + } + } + throw new AuthException(); + } + + /** + * {@inheritDoc} + */ + public function isLoggedIn(): bool { + $cached = $this->cache->get(self::CACHE_KEY_LOGGED_IN, NULL); + if ($cached === NULL) { + $response = $this->makeRequest('GET', '/'); + $cached = FALSE; + if ($response->getStatusCode() === 200) { + $crawler = new Crawler($response->getBody()->getContents()); + $menu_links = $crawler->filter('table.navie6fix a'); + foreach ($menu_links as $menu_link) { + // If there is a link to profile, then we authorized. + if ($menu_link->textContent === 'Профіль') { + $cached = TRUE; + break; + } + } + } + // Save logged in state in cache. + $this->cache->set(self::CACHE_KEY_LOGGED_IN, $cached); + } + return $cached; + } + + /** + * {@inheritDoc} + */ + public function logout(): void { + if (!$this->isLoggedIn()) { + return; + } + // Flush cached logged in state. + $this->cache->delete(self::CACHE_KEY_LOGGED_IN); + $session_id = $this->getUserSessionId(); + if (!$session_id) { + return; + } + $response = $this->makeRequest('GET', '/login.php?logout=true&sid=' . $session_id); + if ($response->getStatusCode() === 302) { + $this->cache->set(self::CACHE_KEY_LOGGED_IN, FALSE); + } + } + + /** + * Get current session id. + * + * @return string|null + * String value if session exists, null otherwise. + */ + private function getUserSessionId(): ?string { + if ($this->cookies->has('toloka_sid')) { + return $this->cookies->get('toloka_sid')->getValue(); + } + return NULL; + } + +} diff --git a/src/Traits/HttpHelperTrait.php b/src/Traits/HttpHelperTrait.php new file mode 100644 index 0000000..454fbc1 --- /dev/null +++ b/src/Traits/HttpHelperTrait.php @@ -0,0 +1,69 @@ +cookies) { + return $request; + } + foreach ($this->cookies->getAll() as $cookie) { + $request = FigRequestCookies::set($request, $cookie); + } + return $request; + } + + private function saveCookies($data) { + if ($data instanceof SetCookies) { + $cookies = []; + foreach ($data->getAll() as $cookie) { + $cookies[] = Cookie::create($cookie->getName(), $cookie->getValue()); + } + $this->cookies = new Cookies($cookies); + } + elseif ($data instanceof Cookies) { + $this->cookies = $data; + } + $this->cache->set(self::CACHE_KEY_COOKIES, $this->cookies); + } + + public function makeRequest( + string $method, + $uri, + array $headers = [], + $body = null, + bool $include_cookies = TRUE, + bool $save_cookies = TRUE + ): ResponseInterface { + $headers['User-Agent'] = Client::$user_agent; + $request = new Request($method, Client::$base_url . $uri, $headers, $body); + if ($include_cookies) { + $request = $this->applyRequestCookies($request); + } + $response = $this->httpClient->sendRequest($request); + if ($save_cookies) { + $cookies = SetCookies::fromResponse($response); + // Sometimes server don't respond with cookie header. + if (count($cookies->getAll()) > 0) { + $this->saveCookies($cookies); + } + } + return $response; + } + +} diff --git a/src/Traits/TopicTrait.php b/src/Traits/TopicTrait.php new file mode 100644 index 0000000..260e775 --- /dev/null +++ b/src/Traits/TopicTrait.php @@ -0,0 +1,41 @@ +makeRequest('GET', "/t{$id}?spmode=full&dl=names"); + if ($response->getStatusCode() !== 200) { + throw new TopicNotFound(); + } + $crawler = new Crawler($response->getBody()->getContents()); + + $title = $crawler->filter('a.maintitle')->first()->text(); + + $crumbs = $crawler->filter('.bodyline td.nav > span, .bodyline td.nav > h2'); + if ($crumbs->eq(1)->text() === 'Відео') { + + } + + $g = 1; + + $imdb_id = NULL; + $imdb_link = $crawler->filter('a[href*="imdb.com"]'); + if ($imdb_link->count() > 0) { + preg_match("/tt\\d{7,8}/", $imdb_link->first()->attr('href'), $imdb_ids); + if (!empty($imdb_ids)) { + $imdb_id = reset($imdb_ids); + } + } + + } + +}