I have the following SQL query
select s.channel_id, count(1) as unique_views from ( select m.channel_id from view_statistics vs inner join medias m on m.id = vs.media_id group by m.channel_id, vs.session_id ) s group by s.channel_id
that selects from this table view_statistics
(which stores millions of rows, narrowed down to ~2M here for testing purposes):
CREATE TABLE `view_statistics` ( `id` int(10) unsigned NOT NULL AUTO_INCREMENT, `media_id` int(10) unsigned NOT NULL, `file_id` int(10) unsigned NOT NULL, `session_id` int(10) unsigned NOT NULL, `browser_id` int(10) unsigned NOT NULL, `device_id` int(10) unsigned NOT NULL, `operating_system_id` int(10) unsigned NOT NULL, `country_id` int(10) unsigned NOT NULL, `datetime_from` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', `datetime_to` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', `datetime_yearly` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', `datetime_monthly` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', `datetime_weekly` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', `datetime_daily` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', `datetime_hourly` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', `hits` int(10) unsigned NOT NULL, `bytes` bigint(20) unsigned NOT NULL, `created_at` timestamp NULL DEFAULT NULL, `updated_at` timestamp NULL DEFAULT NULL, PRIMARY KEY (`id`), KEY `view_statistics_datetime_hourly_index` (`datetime_hourly`), KEY `view_statistics_session_id_datetime_hourly_index` (`session_id`,`datetime_hourly`), KEY `view_statistics_datetime_daily_index` (`datetime_daily`), KEY `view_statistics_session_id_datetime_daily_index` (`session_id`,`datetime_daily`), KEY `view_statistics_datetime_weekly_index` (`datetime_weekly`), KEY `view_statistics_session_id_datetime_weekly_index` (`session_id`,`datetime_weekly`), KEY `view_statistics_datetime_monthly_index` (`datetime_monthly`), KEY `view_statistics_session_id_datetime_monthly_index` (`session_id`,`datetime_monthly`), KEY `view_statistics_datetime_yearly_index` (`datetime_yearly`), KEY `view_statistics_session_id_datetime_yearly_index` (`session_id`,`datetime_yearly`), KEY `view_statistics_media_id_foreign` (`media_id`), KEY `view_statistics_file_id_foreign` (`file_id`), KEY `view_statistics_browser_id_foreign` (`browser_id`), KEY `view_statistics_device_id_foreign` (`device_id`), KEY `view_statistics_operating_system_id_foreign` (`operating_system_id`), KEY `view_statistics_country_id_foreign` (`country_id`), CONSTRAINT `view_statistics_browser_id_foreign` FOREIGN KEY (`browser_id`) REFERENCES `statistic_browsers` (`id`), CONSTRAINT `view_statistics_country_id_foreign` FOREIGN KEY (`country_id`) REFERENCES `statistic_countries` (`id`), CONSTRAINT `view_statistics_device_id_foreign` FOREIGN KEY (`device_id`) REFERENCES `statistic_devices` (`id`), CONSTRAINT `view_statistics_file_id_foreign` FOREIGN KEY (`file_id`) REFERENCES `files` (`id`), CONSTRAINT `view_statistics_media_id_foreign` FOREIGN KEY (`media_id`) REFERENCES `medias` (`id`), CONSTRAINT `view_statistics_operating_system_id_foreign` FOREIGN KEY (`operating_system_id`) REFERENCES `statistic_operating_systems` (`id`), CONSTRAINT `view_statistics_session_id_foreign` FOREIGN KEY (`session_id`) REFERENCES `statistic_sessions` (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=1366805 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
And medias
CREATE TABLE `medias` ( `id` int(10) unsigned NOT NULL AUTO_INCREMENT, `uuid` binary(16) NOT NULL, `channel_id` int(10) unsigned NOT NULL, `folder_id` int(10) unsigned NOT NULL, `category_id` int(10) unsigned DEFAULT NULL, `language_id` int(10) unsigned DEFAULT NULL, `name` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL, `slug` varchar(255) CHARACTER SET ascii COLLATE ascii_bin NOT NULL, `description` text COLLATE utf8mb4_unicode_ci DEFAULT NULL, `validated` tinyint(1) NOT NULL DEFAULT 0, `published` tinyint(1) NOT NULL DEFAULT 0, `streams` tinyint(3) unsigned DEFAULT NULL, `duration` decimal(7,2) DEFAULT NULL, `client` enum('http','cli') COLLATE utf8mb4_unicode_ci DEFAULT NULL, `http_ip` varchar(45) COLLATE utf8mb4_unicode_ci DEFAULT NULL, `http_user_agent` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL, `cli_user` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL, `cli_hostname` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL, `debug` tinyint(1) NOT NULL DEFAULT 0, `v1` tinyint(1) NOT NULL DEFAULT 0 COMMENT 'Imported from V1', `collected_at` timestamp NULL DEFAULT NULL, `ready_at` timestamp NULL DEFAULT NULL, `published_at` timestamp NULL DEFAULT NULL, `created_at` timestamp NULL DEFAULT NULL, `updated_at` timestamp NULL DEFAULT NULL, `discarded_at` timestamp NULL DEFAULT NULL, `deleted_at` timestamp NULL DEFAULT NULL, `uniq` binary(32) GENERATED ALWAYS AS (unhex(sha2(concat_ws('|',`folder_id`,`name`,ifnull(`deleted_at`,0)),256))) STORED, PRIMARY KEY (`id`), UNIQUE KEY `medias_uuid_unique` (`uuid`), UNIQUE KEY `medias_uniq_unique` (`uniq`), KEY `medias_channel_id_foreign` (`channel_id`), KEY `medias_folder_id_foreign` (`folder_id`), KEY `medias_category_id_foreign` (`category_id`), KEY `medias_language_id_foreign` (`language_id`), KEY `medias_ready_at_index` (`ready_at`), KEY `medias_discarded_at_index` (`discarded_at`), CONSTRAINT `medias_category_id_foreign` FOREIGN KEY (`category_id`) REFERENCES `categories` (`id`), CONSTRAINT `medias_channel_id_foreign` FOREIGN KEY (`channel_id`) REFERENCES `channels` (`id`), CONSTRAINT `medias_folder_id_foreign` FOREIGN KEY (`folder_id`) REFERENCES `folders` (`id`), CONSTRAINT `medias_language_id_foreign` FOREIGN KEY (`language_id`) REFERENCES `languages` (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=1036 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci"
Other related tables (browsers,device, operating_system, etc.) are listing tables, nothing fancy.
and altought the query is simple to me, it runs over many seconds with the following explain:
id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY <derived2> ALL NULL NULL NULL NULL 649152 Using temporary; Using filesort 2 DERIVED m index PRIMARY medias_channel_id_foreign 4 NULL 1008 Using index; Using temporary; Using filesort 2 DERIVED vs ref view_statistics_media_id_foreign view_statistics_media_id_foreign 4 vod.m.id 644
I don’t see how I can optimize this so simple query at all, can anyone point me to the right direction ?
Advertisement
Answer
An equivalent query would be:
select m.channel_id, count(distinct vs.session_id) as unique_views from view_statistics vs inner join medias m on m.id = vs.media_id group by m.channel_id
Ideal indexes for this query would probably be medias(channel_id, id)
(already exists) and view_statistics(media_id, session_id)
. However – It might already improve the performance with the given indexes.