Skip to content
Advertisement

Optimizing SQL query from derived table

I have the following SQL query

select s.channel_id, count(1) as unique_views
from (
    select m.channel_id
    from view_statistics vs
    inner join medias m on m.id = vs.media_id
    group by m.channel_id, vs.session_id
) s
group by s.channel_id

that selects from this table view_statistics (which stores millions of rows, narrowed down to ~2M here for testing purposes):

CREATE TABLE `view_statistics` ( 
    `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
    `media_id` int(10) unsigned NOT NULL,
    `file_id` int(10) unsigned NOT NULL,
    `session_id` int(10) unsigned NOT NULL,
    `browser_id` int(10) unsigned NOT NULL,
    `device_id` int(10) unsigned NOT NULL,
    `operating_system_id` int(10) unsigned NOT NULL,
    `country_id` int(10) unsigned NOT NULL,
    `datetime_from` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
    `datetime_to` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
    `datetime_yearly` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
    `datetime_monthly` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
    `datetime_weekly` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
    `datetime_daily` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
    `datetime_hourly` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
    `hits` int(10) unsigned NOT NULL,
    `bytes` bigint(20) unsigned NOT NULL,
    `created_at` timestamp NULL DEFAULT NULL,
    `updated_at` timestamp NULL DEFAULT NULL, 
    PRIMARY KEY (`id`), 
    KEY `view_statistics_datetime_hourly_index` (`datetime_hourly`), 
    KEY `view_statistics_session_id_datetime_hourly_index` (`session_id`,`datetime_hourly`), 
    KEY `view_statistics_datetime_daily_index` (`datetime_daily`),
    KEY `view_statistics_session_id_datetime_daily_index` (`session_id`,`datetime_daily`),
    KEY `view_statistics_datetime_weekly_index` (`datetime_weekly`),
    KEY `view_statistics_session_id_datetime_weekly_index` (`session_id`,`datetime_weekly`),
    KEY `view_statistics_datetime_monthly_index` (`datetime_monthly`),
    KEY `view_statistics_session_id_datetime_monthly_index` (`session_id`,`datetime_monthly`),
    KEY `view_statistics_datetime_yearly_index` (`datetime_yearly`),
    KEY `view_statistics_session_id_datetime_yearly_index` (`session_id`,`datetime_yearly`),
    KEY `view_statistics_media_id_foreign` (`media_id`),
    KEY `view_statistics_file_id_foreign` (`file_id`),
    KEY `view_statistics_browser_id_foreign` (`browser_id`),
    KEY `view_statistics_device_id_foreign` (`device_id`),
    KEY `view_statistics_operating_system_id_foreign` (`operating_system_id`),
    KEY `view_statistics_country_id_foreign` (`country_id`),
    CONSTRAINT `view_statistics_browser_id_foreign` FOREIGN KEY (`browser_id`) REFERENCES `statistic_browsers` (`id`),
    CONSTRAINT `view_statistics_country_id_foreign` FOREIGN KEY (`country_id`) REFERENCES `statistic_countries` (`id`),
    CONSTRAINT `view_statistics_device_id_foreign` FOREIGN KEY (`device_id`) REFERENCES `statistic_devices` (`id`),
    CONSTRAINT `view_statistics_file_id_foreign` FOREIGN KEY (`file_id`) REFERENCES `files` (`id`),
    CONSTRAINT `view_statistics_media_id_foreign` FOREIGN KEY (`media_id`) REFERENCES `medias` (`id`),
    CONSTRAINT `view_statistics_operating_system_id_foreign` FOREIGN KEY (`operating_system_id`) REFERENCES `statistic_operating_systems` (`id`),
    CONSTRAINT `view_statistics_session_id_foreign` FOREIGN KEY (`session_id`) REFERENCES `statistic_sessions` (`id`) 
) ENGINE=InnoDB AUTO_INCREMENT=1366805 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci

And medias

CREATE TABLE `medias` (
  `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
  `uuid` binary(16) NOT NULL,
  `channel_id` int(10) unsigned NOT NULL,
  `folder_id` int(10) unsigned NOT NULL,
  `category_id` int(10) unsigned DEFAULT NULL,
  `language_id` int(10) unsigned DEFAULT NULL,
  `name` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
  `slug` varchar(255) CHARACTER SET ascii COLLATE ascii_bin NOT NULL,
  `description` text COLLATE utf8mb4_unicode_ci DEFAULT NULL,
  `validated` tinyint(1) NOT NULL DEFAULT 0,
  `published` tinyint(1) NOT NULL DEFAULT 0,
  `streams` tinyint(3) unsigned DEFAULT NULL,
  `duration` decimal(7,2) DEFAULT NULL,
  `client` enum('http','cli') COLLATE utf8mb4_unicode_ci DEFAULT NULL,
  `http_ip` varchar(45) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
  `http_user_agent` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
  `cli_user` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
  `cli_hostname` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
  `debug` tinyint(1) NOT NULL DEFAULT 0,
  `v1` tinyint(1) NOT NULL DEFAULT 0 COMMENT 'Imported from V1',
  `collected_at` timestamp NULL DEFAULT NULL,
  `ready_at` timestamp NULL DEFAULT NULL,
  `published_at` timestamp NULL DEFAULT NULL,
  `created_at` timestamp NULL DEFAULT NULL,
  `updated_at` timestamp NULL DEFAULT NULL,
  `discarded_at` timestamp NULL DEFAULT NULL,
  `deleted_at` timestamp NULL DEFAULT NULL,
  `uniq` binary(32) GENERATED ALWAYS AS (unhex(sha2(concat_ws('|',`folder_id`,`name`,ifnull(`deleted_at`,0)),256))) STORED,
  PRIMARY KEY (`id`),
  UNIQUE KEY `medias_uuid_unique` (`uuid`),
  UNIQUE KEY `medias_uniq_unique` (`uniq`),
  KEY `medias_channel_id_foreign` (`channel_id`),
  KEY `medias_folder_id_foreign` (`folder_id`),
  KEY `medias_category_id_foreign` (`category_id`),
  KEY `medias_language_id_foreign` (`language_id`),
  KEY `medias_ready_at_index` (`ready_at`),
  KEY `medias_discarded_at_index` (`discarded_at`),
  CONSTRAINT `medias_category_id_foreign` FOREIGN KEY (`category_id`) REFERENCES `categories` (`id`),
  CONSTRAINT `medias_channel_id_foreign` FOREIGN KEY (`channel_id`) REFERENCES `channels` (`id`),
  CONSTRAINT `medias_folder_id_foreign` FOREIGN KEY (`folder_id`) REFERENCES `folders` (`id`),
  CONSTRAINT `medias_language_id_foreign` FOREIGN KEY (`language_id`) REFERENCES `languages` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1036 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci"

Other related tables (browsers,device, operating_system, etc.) are listing tables, nothing fancy.

and altought the query is simple to me, it runs over many seconds with the following explain:

id  select_type     table       type    possible_keys                    key                                key_len ref         rows    Extra
1   PRIMARY         <derived2>  ALL     NULL                             NULL                               NULL    NULL        649152  Using temporary; Using filesort
2   DERIVED         m           index   PRIMARY                          medias_channel_id_foreign          4       NULL        1008    Using index; Using temporary; Using filesort
2   DERIVED         vs          ref     view_statistics_media_id_foreign view_statistics_media_id_foreign   4       vod.m.id    644 

I don’t see how I can optimize this so simple query at all, can anyone point me to the right direction ?

Advertisement

Answer

An equivalent query would be:

select m.channel_id, count(distinct vs.session_id) as unique_views
from view_statistics vs
inner join medias m on m.id = vs.media_id
group by m.channel_id

Ideal indexes for this query would probably be medias(channel_id, id) (already exists) and view_statistics(media_id, session_id). However – It might already improve the performance with the given indexes.

User contributions licensed under: CC BY-SA
2 People found this is helpful
Advertisement