Refactored classes and functionality, added more data caching, working on topic parser.

master
conky 2022-10-18 02:16:43 +03:00
parent 058bbfbc03
commit a102009ec1
10 changed files with 311 additions and 173 deletions

View File

@ -2,30 +2,41 @@
namespace Toloka\PhpApi;
use Dflydev\FigCookies\Cookie;
use Dflydev\FigCookies\Cookies;
use Dflydev\FigCookies\FigRequestCookies;
use Dflydev\FigCookies\SetCookies;
use GuzzleHttp\Psr7\Request;
use Psr\Http\Client\ClientInterface as HttpClientInterface;
use Psr\Http\Message\RequestInterface;
use Psr\Log\LoggerInterface;
use Psr\SimpleCache\CacheInterface;
use GuzzleHttp\Psr7\Utils;
use Symfony\Component\DomCrawler\Crawler;
use Toloka\PhpApi\Exception\Auth\AuthException;
use Toloka\PhpApi\Exception\Auth\InvalidAuthCredentials;
use Toloka\PhpApi\Traits\AuthTrait;
use Toloka\PhpApi\Traits\HttpHelperTrait;
use Toloka\PhpApi\Traits\TopicTrait;
class Client implements ClientInterface {
use HttpHelperTrait;
use AuthTrait;
use TopicTrait;
const CACHE_KEY_COOKIES = 'cookies';
const CACHE_KEY_LOGGED_IN = 'is_logged_in';
/**
* @var ClientInterface|null
*/
private static ?ClientInterface $instance;
/**
* Hurtom Toloka base url.
*
* @var string
*/
protected string $base_url;
public static string $base_url = 'https://toloka.to';
/**
* User agent to use for requests.
*
* @var string
*/
public static string $user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36';
/**
* Http client.
@ -48,25 +59,18 @@ class Client implements ClientInterface {
*/
protected LoggerInterface $logger;
/**
* @var Cookies
*/
protected ?Cookies $cookies = NULL;
/**
* Toloka client constructor.
*
* @param string $base_url
* @param CacheInterface|NULL $cache
* @param LoggerInterface|NULL $logger
* @param \Psr\Http\Client\ClientInterface $httpClient
* @param \Psr\SimpleCache\CacheInterface $cache
* @param \Psr\Log\LoggerInterface $logger
*/
public function __construct(
string $base_url,
HttpClientInterface $httpClient,
CacheInterface $cache,
LoggerInterface $logger
) {
$this->base_url = $base_url;
$this->httpClient = $httpClient;
$this->cache = $cache;
$this->logger = $logger;
@ -79,104 +83,8 @@ class Client implements ClientInterface {
}
}
/**
* {@inheritDoc}
*/
public function getBaseUrl(): string {
return $this->base_url;
}
/**
* {@inheritDoc}
*/
public function login(string $login, string $password): void {
if ($this->isLoggedIn()) {
return;
}
$body = http_build_query([
'username' => $login,
'password' => $password,
'autologin' => 'on',
'ssl' => 'on',
'redirect' => '',
'login' => 'Вхід',
]);
$request = new Request('POST', $this->getBaseUrl() . '/login.php', [
'Content-Type' => 'application/x-www-form-urlencoded',
], Utils::streamFor($body));
$response = $this->httpClient->sendRequest($request);
if ($response->getStatusCode() === 302) {
// If redirected - this means successful login, can save cookies.
$cookies = SetCookies::fromResponse($response);
$this->saveCookies($cookies);
}
elseif ($response->getStatusCode() === 200) {
// In case of returned page - try to find an error and throw exception.
$crawler = new Crawler($response->getBody()->getContents());
$text = $crawler->filter('.forumline .row1 span.gen')->text(NULL, TRUE);
if (str_contains($text, 'Такий псевдонім не існує, або не збігається пароль.')) {
throw new InvalidAuthCredentials();
}
}
throw new AuthException();
}
private function saveCookies($data) {
if ($data instanceof SetCookies) {
$cookies = [];
foreach ($data->getAll() as $cookie) {
$cookies[] = Cookie::create($cookie->getName(), $cookie->getValue());
}
$this->cookies = new Cookies($cookies);
}
elseif ($data instanceof Cookies) {
$this->cookies = $data;
}
$this->cache->set(self::CACHE_KEY_COOKIES, $this->cookies);
}
private function applyRequestCookies(RequestInterface $request): RequestInterface {
if (!$this->cookies) {
return $request;
}
foreach ($this->cookies->getAll() as $cookie) {
$request = FigRequestCookies::set($request, $cookie);
}
return $request;
}
/**
* {@inheritDoc}
*/
public function isLoggedIn(): bool {
$request = new Request('GET', $this->getBaseUrl());
$request = $this->applyRequestCookies($request);
$response = $this->httpClient->sendRequest($request);
if ($response->getStatusCode() === 200) {
$crawler = new Crawler($response->getBody()->getContents());
$menu_links = $crawler->filter('table.navie6fix a');
foreach ($menu_links as $menu_link) {
// If there is a link to profile, then we authorized.
if ($menu_link->textContent === 'Профіль') {
return TRUE;
}
}
}
return FALSE;
}
/**
* {@inheritDoc}
*/
public function logout(): void {
// TODO: Implement logout() method.
}
/**
* {@inheritDoc}
*/
public function getTopic(int $id): TopicInterface {
public static function getInstance(): ClientInterface {
return self::$instance;
}
}

View File

@ -4,14 +4,6 @@ namespace Toloka\PhpApi;
interface ClientInterface {
/**
* Get Toloka base url.
*
* @return string
* Absolute URL to toloka.
*/
public function getBaseUrl(): string;
/**
* Login using credentials.
*
@ -22,6 +14,7 @@ interface ClientInterface {
*
* @throws \Toloka\PhpApi\Exception\Auth\InvalidAuthCredentials
* @throws \Toloka\PhpApi\Exception\Auth\TooManyLoginAttempts
* @throws \Toloka\PhpApi\Exception\Auth\AuthException
* @throws \Psr\Http\Client\ClientExceptionInterface
*/
public function login(string $login, string $password): void;

View File

@ -0,0 +1,9 @@
<?php
namespace Toloka\PhpApi\Exception\Topic;
class TopicNotFound extends \Exception {
protected $message = 'Selected topic not found.';
}

View File

@ -0,0 +1,7 @@
<?php
namespace Toloka\PhpApi\Models\Topic;
class Movie extends TopicBase {
}

View File

@ -0,0 +1,49 @@
<?php
namespace Toloka\PhpApi\Models\Topic;
use Toloka\PhpApi\Client;
use Toloka\PhpApi\TopicInterface;
class TopicBase implements TopicInterface {
protected int $id;
protected string $title;
public static function create(array $data): TopicInterface {
switch ($data['type']) {
default:
$class = TopicBase::class;
break;
case 'movie':
$class = Movie::class;
break;
}
$topic = new $class;
}
/**
* @return int
*/
public function id(): int {
return $this->id;
}
/**
* @return string
*/
public function title(): string {
return $this->title;
}
/**
* {@inheritDoc}
*/
public function url(bool $absolute = TRUE): string {
return Client::$base_url . '/t' . $this->id();
}
}

View File

@ -1,9 +0,0 @@
<?php
namespace Toloka\PhpApi\Topic;
use Toloka\PhpApi\TopicBase;
class Movie extends TopicBase {
}

View File

@ -1,38 +0,0 @@
<?php
namespace Toloka\PhpApi;
class TopicBase implements TopicInterface {
protected Client $client;
protected int $id;
public function __construct(
Client $client
) {
$this->client = $client;
}
/**
* @return int
*/
public function id(): int {
return $this->id;
}
/**
* @param int $id
*/
public function setId(int $id): void {
$this->id = $id;
}
/**
* {@inheritDoc}
*/
public function url(bool $absolute = TRUE): string {
return $this->client->getBaseUrl() . 't' . $this->id();
}
}

View File

@ -0,0 +1,109 @@
<?php
namespace Toloka\PhpApi\Traits;
use GuzzleHttp\Psr7\Utils;
use Symfony\Component\DomCrawler\Crawler;
use Toloka\PhpApi\Exception\Auth\AuthException;
use Toloka\PhpApi\Exception\Auth\InvalidAuthCredentials;
trait AuthTrait {
/**
* {@inheritDoc}
*/
public function login(string $login, string $password): void {
if ($this->isLoggedIn()) {
return;
}
// Flush cached logged in state.
$this->cache->delete(self::CACHE_KEY_LOGGED_IN);
$body = http_build_query([
'username' => $login,
'password' => $password,
'autologin' => 'on',
'ssl' => 'on',
'redirect' => '',
'login' => 'Вхід',
]);
$response = $this->makeRequest('POST', '/login.php', [
'Content-Type' => 'application/x-www-form-urlencoded',
], Utils::streamFor($body), FALSE);
if ($response->getStatusCode() === 302) {
// Force check if we logged in successfully, to be sure + cache this info.
if (!$this->isLoggedIn()) {
throw new AuthException();
}
return;
}
elseif ($response->getStatusCode() === 200) {
// In case of returned page - try to find an error and throw exception.
$crawler = new Crawler($response->getBody()->getContents());
$text = $crawler->filter('.forumline .row1 span.gen')->text(NULL, TRUE);
if (str_contains($text, 'Такий псевдонім не існує, або не збігається пароль.')) {
throw new InvalidAuthCredentials();
}
}
throw new AuthException();
}
/**
* {@inheritDoc}
*/
public function isLoggedIn(): bool {
$cached = $this->cache->get(self::CACHE_KEY_LOGGED_IN, NULL);
if ($cached === NULL) {
$response = $this->makeRequest('GET', '/');
$cached = FALSE;
if ($response->getStatusCode() === 200) {
$crawler = new Crawler($response->getBody()->getContents());
$menu_links = $crawler->filter('table.navie6fix a');
foreach ($menu_links as $menu_link) {
// If there is a link to profile, then we authorized.
if ($menu_link->textContent === 'Профіль') {
$cached = TRUE;
break;
}
}
}
// Save logged in state in cache.
$this->cache->set(self::CACHE_KEY_LOGGED_IN, $cached);
}
return $cached;
}
/**
* {@inheritDoc}
*/
public function logout(): void {
if (!$this->isLoggedIn()) {
return;
}
// Flush cached logged in state.
$this->cache->delete(self::CACHE_KEY_LOGGED_IN);
$session_id = $this->getUserSessionId();
if (!$session_id) {
return;
}
$response = $this->makeRequest('GET', '/login.php?logout=true&sid=' . $session_id);
if ($response->getStatusCode() === 302) {
$this->cache->set(self::CACHE_KEY_LOGGED_IN, FALSE);
}
}
/**
* Get current session id.
*
* @return string|null
* String value if session exists, null otherwise.
*/
private function getUserSessionId(): ?string {
if ($this->cookies->has('toloka_sid')) {
return $this->cookies->get('toloka_sid')->getValue();
}
return NULL;
}
}

View File

@ -0,0 +1,69 @@
<?php
namespace Toloka\PhpApi\Traits;
use Dflydev\FigCookies\Cookie;
use Dflydev\FigCookies\Cookies;
use Dflydev\FigCookies\FigRequestCookies;
use Dflydev\FigCookies\SetCookies;
use GuzzleHttp\Psr7\Request;
use Psr\Http\Message\RequestInterface;
use Psr\Http\Message\ResponseInterface;
use Toloka\PhpApi\Client;
trait HttpHelperTrait {
/**
* @var Cookies
*/
protected ?Cookies $cookies = NULL;
protected function applyRequestCookies(RequestInterface $request): RequestInterface {
if (!$this->cookies) {
return $request;
}
foreach ($this->cookies->getAll() as $cookie) {
$request = FigRequestCookies::set($request, $cookie);
}
return $request;
}
private function saveCookies($data) {
if ($data instanceof SetCookies) {
$cookies = [];
foreach ($data->getAll() as $cookie) {
$cookies[] = Cookie::create($cookie->getName(), $cookie->getValue());
}
$this->cookies = new Cookies($cookies);
}
elseif ($data instanceof Cookies) {
$this->cookies = $data;
}
$this->cache->set(self::CACHE_KEY_COOKIES, $this->cookies);
}
public function makeRequest(
string $method,
$uri,
array $headers = [],
$body = null,
bool $include_cookies = TRUE,
bool $save_cookies = TRUE
): ResponseInterface {
$headers['User-Agent'] = Client::$user_agent;
$request = new Request($method, Client::$base_url . $uri, $headers, $body);
if ($include_cookies) {
$request = $this->applyRequestCookies($request);
}
$response = $this->httpClient->sendRequest($request);
if ($save_cookies) {
$cookies = SetCookies::fromResponse($response);
// Sometimes server don't respond with cookie header.
if (count($cookies->getAll()) > 0) {
$this->saveCookies($cookies);
}
}
return $response;
}
}

View File

@ -0,0 +1,41 @@
<?php
namespace Toloka\PhpApi\Traits;
use Symfony\Component\DomCrawler\Crawler;
use Toloka\PhpApi\Exception\Topic\TopicNotFound;
use Toloka\PhpApi\TopicInterface;
trait TopicTrait {
/**
* {@inheritDoc}
*/
public function getTopic(int $id): TopicInterface {
$response = $this->makeRequest('GET', "/t{$id}?spmode=full&dl=names");
if ($response->getStatusCode() !== 200) {
throw new TopicNotFound();
}
$crawler = new Crawler($response->getBody()->getContents());
$title = $crawler->filter('a.maintitle')->first()->text();
$crumbs = $crawler->filter('.bodyline td.nav > span, .bodyline td.nav > h2');
if ($crumbs->eq(1)->text() === 'Відео') {
}
$g = 1;
$imdb_id = NULL;
$imdb_link = $crawler->filter('a[href*="imdb.com"]');
if ($imdb_link->count() > 0) {
preg_match("/tt\\d{7,8}/", $imdb_link->first()->attr('href'), $imdb_ids);
if (!empty($imdb_ids)) {
$imdb_id = reset($imdb_ids);
}
}
}
}