Skip to content

Latest commit

 

History

History
913 lines (758 loc) · 29.3 KB

5.DQL及其他规范.md

File metadata and controls

913 lines (758 loc) · 29.3 KB

DQL及其他规范

1.建议使用原生SQL语句创建表结构,Django的migrate存在一定的缺陷

  • Django migrate 的缺陷

    • 无法生成表注释
    • 无法生成字段注释
    • 索引名称无法指定
    • Django内置的ORM字段类型不丰富、不严谨
  • 示例

# django migrate创建的表结构
CREATE TABLE `t_xxx` (
  `id` bigint(20) NOT NULL AUTO_INCREMENT,
  `xxx_id` char(32) NOT NULL,
  `xxx_type` tinyint(4) NOT NULL,
  `xxx_name` varchar(50) NOT NULL,
  `xxx_name_bin` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL,
  `is_deleted` tinyint(1) NOT NULL,
  `create_time` datetime(6) NOT NULL,
  `update_time` datetime(6) NOT NULL,
  PRIMARY KEY (`id`),
  UNIQUE KEY `xxx_id` (`xxx_id`),
  KEY `t_xxx_xxx_type_xxx_name_1318ffcb_idx` (`xxx_type`,`xxx_name`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4

# 原生SQL创建的表结构
CREATE TABLE `t_xxx` (
  `id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '表id',
  `xxx_id` char(32) NOT NULL DEFAULT '' COMMENT 'xxxid',
  `xxx_type` tinyint(4) NOT NULL DEFAULT '1' COMMENT 'xxx类型',
  `xxx_name` varchar(50) NOT NULL DEFAULT '' COMMENT 'xxx名称',
  `xxx_name_bin` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL DEFAULT '' COMMENT 'xxx名称(区分大小写)',
  `is_deleted` tinyint(1) NOT NULL DEFAULT '0' COMMENT '是否已删除',
  `create_time` datetime(6) NOT NULL COMMENT '创建时间',
  `update_time` datetime(6) NOT NULL COMMENT '更新时间',
  PRIMARY KEY (`id`),
  UNIQUE KEY `uk_xxxid` (`xxx_id`),
  KEY `idx_xxxtype_xxxname` (`xxx_type`,`xxx_name`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='xxx表'

2.单条查询建议使用.values().first(),禁止直接.first()或直接.get()

  • .values() 实现了指定字段的查询,降低全字段查询的I/O消耗,部分情况可以实现覆盖索引扫描

  • .first() 保证了任何情况下至多返回一行数据且不报错

  • .get() 与 .first() 的区别

    • .get() -> Model
      • 全字段查询
      • 至多返回一行数据,超出一行则抛出异常
    • 示例
    Xxx.objects.filter(xxx_id='babbc46ea7d13f03abaed27899f145e3').get()
    
    """实际执行SQL
    SELECT `t_xxx`.`id`
    	,`t_xxx`.`xxx_id`
    	,`t_xxx`.`xxx_type`
    	,`t_xxx`.`xxx_name`
    	,`t_xxx`.`is_deleted`
    	,`t_xxx`.`create_time`
    	,`t_xxx`.`update_time`
    FROM `t_xxx`
    WHERE `t_xxx`.`xxx_id` = 'babbc46ea7d13f03abaed27899f145e3'
    """
    
    • .values().get() -> dict
      • 指定字段查询
      • 至多返回一行数据,超出一行则抛出异常
    • 示例
    Xxx.objects.filter(xxx_id='babbc46ea7d13f03abaed27899f145e3').values('id', 'xxx_type').get()
    
    """实际执行SQL
    SELECT `t_xxx`.`id`
    	,`t_xxx`.`xxx_type`
    FROM `t_xxx`
    WHERE `t_xxx`.`xxx_id` = 'babbc46ea7d13f03abaed27899f145e3'
    """
    
    • .first() -> Optional[Model]
      • 全字段查询
      • 没有指定order_by时默认添加以主键正序排序
      • 自动添加limit 1确保至多返回一行
      • 在使用主键或唯一键做筛选条件时,并不会产生额外的排序
    Xxx.objects.filter(xxx_id='babbc46ea7d13f03abaed27899f145e3').first()
      
    """实际执行SQL
    SELECT `t_xxx`.`id`
    	,`t_xxx`.`xxx_id`
    	,`t_xxx`.`xxx_type`
    	,`t_xxx`.`xxx_name`
    	,`t_xxx`.`is_deleted`
    	,`t_xxx`.`create_time`
    	,`t_xxx`.`update_time`
    FROM `t_xxx`
    WHERE `t_xxx`.`xxx_id` = 'babbc46ea7d13f03abaed27899f145e3'
    ORDER BY `t_xxx`.`id` ASC 
    LIMIT 1
      
    # 执行计划
    +----+-------------+-------+------------+-------+---------------+--------+---------+-------+------+----------+-------+
    | id | select_type | table | partitions | type  | possible_keys | key    | key_len | ref   | rows | filtered | Extra |
    +----+-------------+-------+------------+-------+---------------+--------+---------+-------+------+----------+-------+
    |  1 | SIMPLE      | t_xxx | NULL       | const | xxx_id        | xxx_id | 128     | const |    1 |   100.00 | NULL  |
    +----+-------------+-------+------------+-------+---------------+--------+---------+-------+------+----------+-------+
    1 row in set, 1 warning (0.00 sec)
    """
    
    • .values().first() -> Optional[dict]
      • 指定字段查询
      • 没有指定order_by时默认添加以主键正序排序
      • 自动添加limit 1确保至多返回一行
      • 在使用主键或唯一键做筛选条件时,并不会产生额外的排序
    • 示例
    Xxx.objects.filter(xxx_id='babbc46ea7d13f03abaed27899f145e3').values('id', 'xxx_type').first()
      
    """实际执行SQL
    SELECT `t_xxx`.`id`
    	,`t_xxx`.`xxx_type`
    FROM `t_xxx`
    WHERE `t_xxx`.`xxx_id` = 'babbc46ea7d13f03abaed27899f145e3'
    ORDER BY `t_xxx`.`id` ASC 
    LIMIT 1
    
    # 执行计划
    +----+-------------+-------+------------+-------+---------------+--------+---------+-------+------+----------+-------+
    | id | select_type | table | partitions | type  | possible_keys | key    | key_len | ref   | rows | filtered | Extra |
    +----+-------------+-------+------------+-------+---------------+--------+---------+-------+------+----------+-------+
    |  1 | SIMPLE      | t_xxx | NULL       | const | xxx_id        | xxx_id | 128     | const |    1 |   100.00 | NULL  |
    +----+-------------+-------+------------+-------+---------------+--------+---------+-------+------+----------+-------+
    1 row in set, 1 warning (0.00 sec)
    """
    

3.多条查询建议使用 .values().all(),可简写成 .values(),禁止直接使用 .all()

  • .values() 实现了指定字段的查询,降低全字段查询的I/O消耗,部分情况可以实现覆盖索引扫描

  • 注意:返回的是 QuerySet 对象,实现了 __iter__ 魔法方法,在迭代过程才执行SQL并将结果集存入 self._result_cache 中,以此实现了惰性查询

  • .all() 与 .values().all() 的区别

    • .all() -> Iterable[Model]
      • 全字段查询
    • 示例
    queryset = Xxx.objects.filter(id__lte=4).all()
    
    # 执行SQL
    list(queryset)
      
    """实际执行SQL
    SELECT `t_xxx`.`id`
    	,`t_xxx`.`xxx_id`
    	,`t_xxx`.`xxx_type`
    	,`t_xxx`.`xxx_name`
    	,`t_xxx`.`is_deleted`
    	,`t_xxx`.`create_time`
    	,`t_xxx`.`update_time`
    FROM `t_xxx`
    WHERE `t_xxx`.`id` <= 4
    """
    
    • .values().all() -> Iterable[QuerySet]
      • 指定字段查询
    • 示例
    queryset = Xxx.objects.filter(id__lte=4).values('id', 'xxx_type')
      
    # 执行SQL
    list(queryset)
    
    """实际执行SQL
    SELECT `t_xxx`.`id`
      ,`t_xxx`.`xxx_type`
    FROM `t_xxx`
    WHERE `t_xxx`.`id` <= 4
    """
    

4.如果SQL仅用于条件判断必须加 .exists(),禁止直接判断 QuerySet

  • .exists() -> bool
    • 生成的SQL格式 select 1 from .... limit 1,极大提高性能
    • 直接判断QuerySet会获取全字段,造成I/O浪费
is_exists = Xxx.objects.filter(id__lte=4).exists()

"""实际执行SQL
SELECT (1) AS `a`
FROM `t_xxx`
WHERE `t_xxx`.`id` <= 4 
LIMIT 1
"""

5.避免在循环中执行SQL,使用 __in

  • 多次与数据库交互,多次网络I/O,多次解析,损耗性能
  • 使用 __in,仅一次交互
  • 示例
xxx_id_list: List[str] = ['a1', 'a2', 'a3']

# 多次查询
for xxx_id in xxx_id_list:
    xxx_info = Xxx.objects.filter(xxx_id=xxx_id).values(...).first()
    ...

# 仅一次查询
queryset = Xxx.objects.filter(xxx_id__in=xxx_id_list).values(...)

for items in queryset:
    ...

6.判断记录有n条,考虑分页后再求count

  • 比如用户粉丝数场景,超过99人则显示99+
    • 当需要用count统计时,如果行数极大,直接count对数据库压力极大时,可以考虑使用此方案
    • 更好的解决方案是定义表结构时,直接用户表冗余一个字段表示 粉丝数,由业务逻辑控制对粉丝数的增减
  • 注意
    • 分页数量为 n+1
    • 该方案是限制扫描行数以实现优化,但是会使用内存临时表,因此行数一定不可过高
  • 示例
# 这里用Xxx表数据为例

# 直接求count
Xxx.objects.filter(xxx_name__gt='test', is_deleted=0).count()

"""实际运行SQL
SELECT COUNT(*) AS `__count`
FROM `t_xxx`
WHERE (
		`t_xxx`.`is_deleted` = 0
		AND `t_xxx`.`xxx_name` > 'test'
		)
"""

# 分页后求count
n = 99
Xxx.objects.filter(xxx_name__gt='test', is_deleted=0)[:n+1].count()

"""实际运行SQL
SELECT COUNT(*)
FROM (
	SELECT `t_xxx`.`id` AS Col1
	FROM `t_xxx`
	WHERE (
			`t_xxx`.`is_deleted` = 0
			AND `t_xxx`.`xxx_name` > 'test'
			) 
	LIMIT 100
	) su

7.禁止无意义的print(QuerySet)

  • print() 会调用 QuerySet 的 __repr__ 魔法方法,内部逻辑为每次都重新查询,无缓存

8.临时定义的字段必须以 udc_ 为前缀,否则建议使用原生SQL查询

  • udc:User Define Column
  • django 要求临时定义的字段不能与模型中定义的字段重名
  • 以固定前缀开头用于区分实际字段与自定义字段
  • 如果不区分,以后新增字段如果重复则会报错
from django.db.models import Value, IntegerField

# 与定义字段重名导致报错
queryset = Xxx.objects.annotate(xxx_type=Value(1, IntegerField())).values('id', 'xxx_type')[:10]
"""报错:
ValueError: The annotation 'xxx_type' conflicts with a field on the model.
"""

# 推荐以 `udc` 为前缀
queryset = Xxx.objects.annotate(udc_xxx_type=Value(1, IntegerField())).values('id', 'udc_xxx_type')[:10]

9.查询常数

  • 示例
from django.db.models import Value, IntegerField, CharField

# 写法1
queryset = Xxx.objects.annotate(udc_mark=Value(1, IntegerField())).values('id', 'udc_mark')[:10]
list(queryset)

# 写法2
queryset = Xxx.objects.values('id', udc_mark=Value(1, IntegerField()))[:10]
list(queryset)

"""实际执行SQL
SELECT `t_xxx`.`id`
	,1 AS `udc_mark`
FROM `t_xxx` 
LIMIT 10
"""

10.exists语句使用

  • 示例
from django.db.models import Exists, OuterRef


# 仅select,作标量子查询
# yyy_exists: bool
exists_qs = Yyy.objects.filter(is_deleted=0, xxx_id=OuterRef('xxx_id')).values('id')
queryset = Xxx.objects.annotate(udc_yyy_exists=Exists(exists_qs)). \
    values('id', 'xxx_name', 'udc_yyy_exists')[:10]
list(queryset)

"""实际执行SQL
SELECT `t_xxx`.`id`
	,`t_xxx`.`xxx_name`
	,EXISTS (
		SELECT U0.`id`
		FROM `t_yyy` U0
		WHERE (
				U0.`is_deleted` = 0
				AND U0.`xxx_id` = (`t_xxx`.`xxx_id`)
				)
		) AS `udc_yyy_exists`
FROM `t_xxx` 
LIMIT 10
"""

# 仅作为where条件,.values()中不要出现
exists_qs = Yyy.objects.filter(is_deleted=0, xxx_id=OuterRef('xxx_id')).values('id')
queryset = Xxx.objects.annotate(udc_yyy_exists=Exists(exists_qs)).filter(udc_yyy_exists=True). \
    values('id', 'xxx_name')[:10]

"""实际执行SQL
SELECT `t_xxx`.`id`
	,`t_xxx`.`xxx_name`
FROM `t_xxx`
WHERE EXISTS (
		SELECT U0.`id`
		FROM `t_yyy` U0
		WHERE (
				U0.`is_deleted` = 0
				AND U0.`xxx_id` = (`t_xxx`.`xxx_id`)
				)
		) = 1 
LIMIT 10
"""

11.union 与 union all 语句使用

  • union 相比 union all 多了去重的操作,会使用内存临时表,尽可能使用 union all
  • 示例
# 展示10条,优先展示未删除的,若不足则由已删除补足

# union all
non_deleted_qs = Xxx.objects.filter(is_deleted=0).values('id', 'xxx_name', 'xxx_type')[:10]
is_deleted_qs = Xxx.objects.filter(is_deleted=1).values('id', 'xxx_name', 'xxx_type')[:10]
queryset = non_deleted_qs.union(is_deleted_qs, all=True)[:10]

"""实际执行SQL
(
	SELECT `t_xxx`.`id`
		,`t_xxx`.`xxx_name`
		,`t_xxx`.`xxx_type`
	FROM `t_xxx`
	WHERE `t_xxx`.`is_deleted` = 0 
	LIMIT 10
	)

UNION ALL

(
	SELECT `t_xxx`.`id`
		,`t_xxx`.`xxx_name`
		,`t_xxx`.`xxx_type`
	FROM `t_xxx`
	WHERE `t_xxx`.`is_deleted` = 1 
	LIMIT 10
	) 
LIMIT 10
"""

12.子查询

  • in子查询,... WHERE column IN subquery

xxx_id_sub = Yyy.objects.filter(id__lte=10).values('xxx_id')
subquery = Xxx.objects.filter(xxx_id__in=xxx_id_sub).values('id', 'xxx_id', 'xxx_type')[:10]
list(subquery)

"""实际执行SQL
SELECT `t_xxx`.`id`
	,`t_xxx`.`xxx_id`
	,`t_xxx`.`xxx_type`
FROM `t_xxx`
WHERE `t_xxx`.`xxx_id` IN (
		SELECT U0.`xxx_id`
		FROM `t_yyy` U0
		WHERE U0.`id` <= 10
		) 
LIMIT 10
"""
  • 标量子查询,SELECT subquery ...
from django.db.models import Subquery, OuterRef, DateTimeField

# 标量子查询要求至多返回一行一列,所以必须指定.values()只取一个字段,且在结尾加 limit 1
yyy_sub = Yyy.objects.filter(is_deleted=0, xxx_id=OuterRef('xxx_id')).values('create_time').order_by('-update_time')[:1]
queryset = Xxx.objects.annotate(udc_yyy_ctime=Subquery(yyy_sub, DateTimeField())).filter(is_deleted=0).values('xxx_id', 'udc_yyy_ctime')[:10]
list(queryset)

"""实际执行SQL
SELECT `t_xxx`.`xxx_id`
	,(
		SELECT U0.`create_time`
		FROM `t_yyy` U0
		WHERE (
				U0.`is_deleted` = 0
				AND U0.`xxx_id` = (`t_xxx`.`xxx_id`)
				)
		ORDER BY U0.`update_time` DESC LIMIT 1
		) AS `udc_yyy_ctime`
FROM `t_xxx`
WHERE `t_xxx`.`is_deleted` = 0 
LIMIT 10
"""

13.join查询

  • inner join

    • 依赖于模型定义的外键,关联查询时会使用inner join 的条件
      • (1).外键字段null=False
      • (2).外键字段null=True, 对源表进行过滤
    • 示例
    # (1).外键字段null=False
    """模型外键
    class Yyy(models.Model):
        xxx = models.ForeignKey('Xxx', to_field='xxx_id', null=False, ...)
    """
    
    queryset = Yyy.objects.filter(is_deleted=0).values('yyy_id', 'xxx__xxx_id', 'xxx__xxx_type')[:10]
    list(queryset)
    
    """实际执行SQL
    SELECT `t_yyy`.`yyy_id`
        ,`t_yyy`.`xxx_id`
        ,`t_xxx`.`xxx_type`
    FROM `t_yyy`
    INNER JOIN `t_xxx` ON (`t_yyy`.`xxx_id` = `t_xxx`.`xxx_id`)
    WHERE `t_yyy`.`is_deleted` = 0 
    LIMIT 10
    """
    
    # (2).外键字段null=True, 对源表进行过滤
    """模型外键
    class Yyy(models.Model):
        xxx = models.ForeignKey('Xxx', to_field='xxx_id', null=True, ...)
    """
    
    queryset = Yyy.objects.filter(is_deleted=0, xxx__is_deleted=0).values('yyy_id', 'xxx__xxx_id', 'xxx__xxx_type')[:10]
    list(queryset)
    
    """实际执行SQL
    SELECT `t_yyy`.`yyy_id`
        ,`t_yyy`.`xxx_id`
        ,`t_xxx`.`xxx_type`
    FROM `t_yyy`
    INNER JOIN `t_xxx` ON (`t_yyy`.`xxx_id` = `t_xxx`.`xxx_id`)
    WHERE (
            `t_yyy`.`is_deleted` = 0
            AND `t_xxx`.`is_deleted` = 0
            ) 
    LIMIT 10
    """
    
  • left join

    • 依赖于模型定义的外键,关联查询时,会使用left join的条件
      • 外键字段null=True且没有对源表进行过滤
    • 示例
    # 外键字段null=True且没有对源表进行过滤
    """模型外键
    class Yyy(models.Model):
        xxx = models.ForeignKey('Xxx', to_field='xxx_id', null=True, ...)
    """
    
    queryset = Yyy.objects.filter(is_deleted=0).values('yyy_id', 'xxx__xxx_id', 'xxx__xxx_type')[:10]
    list(queryset)
    
    """实际执行SQL
    SELECT `t_yyy`.`yyy_id`
        ,`t_yyy`.`xxx_id`
        ,`t_xxx`.`xxx_type`
    FROM `t_yyy`
    LEFT OUTER JOIN `t_xxx` ON (`t_yyy`.`xxx_id` = `t_xxx`.`xxx_id`)
    WHERE `t_yyy`.`is_deleted` = 0 
    LIMIT 10
    """
    

14.如果小序列数据来自于数据库,且仅作为in子句使用,推荐直接使用 in子查询

  • 减少一次数据库交互
  • MySQL可以使用 semi join 优化in子查询
  • 示例
# 两次查询
xxx_id_tuple = tuple(Yyy.objects.filter(is_deleted=0, id__lte=10).values_list('xxx_id', flat=True))
subquery = Xxx.objects.filter(xxx_id__in=xxx_id_tuple).values('xxx_id', 'xxx_name')
list(subquery)

"""实际执行SQL
SELECT `t_yyy`.`xxx_id` FROM `t_yyy` WHERE (`t_yyy`.`id` <= 10 AND `t_yyy`.`is_deleted` = 0)
SELECT `t_xxx`.`xxx_id`, `t_xxx`.`xxx_name` FROM `t_xxx` WHERE `t_xxx`.`xxx_id` IN (...)
"""

# 改写成in子查询,仅一次查询,且利用到semi join
xxx_id_sub = Yyy.objects.filter(is_deleted=0, id__lte=10).values_list('xxx_id', flat=True)
subquery = Xxx.objects.filter(xxx_id__in=xxx_id_sub).values('xxx_id', 'xxx_name')
list(subquery)

"""实际执行SQL
SELECT `t_xxx`.`xxx_id`
	,`t_xxx`.`xxx_name`
FROM `t_xxx`
WHERE `t_xxx`.`xxx_id` IN (
		SELECT U0.`xxx_id`
		FROM `t_yyy` U0
		WHERE (
				U0.`id` <= 10
				AND U0.`is_deleted` = 0
				)
		)

# 执行计划
+----+--------------+-------------+------------+--------+-------------------------------+---------+---------+--------------------+------+----------+-------------+
| id | select_type  | table       | partitions | type   | possible_keys                 | key     | key_len | ref                | rows | filtered | Extra       |
+----+--------------+-------------+------------+--------+-------------------------------+---------+---------+--------------------+------+----------+-------------+
|  1 | SIMPLE       | <subquery2> | NULL       | ALL    | NULL                          | NULL    | NULL    | NULL               | NULL |   100.00 | NULL        |
|  1 | SIMPLE       | t_xxx       | NULL       | eq_ref | xxx_id                        | xxx_id  | 128     | <subquery2>.xxx_id |    1 |   100.00 | NULL        |
|  2 | MATERIALIZED | U0          | NULL       | range  | PRIMARY,t_yyy_xxx_id_ef705c68 | PRIMARY | 8       | NULL               |    3 |    33.33 | Using where |
+----+--------------+-------------+------------+--------+-------------------------------+---------+---------+--------------------+------+----------+-------------+
3 rows in set, 1 warning (0.00 sec)

# 优化器改写后的SQL
mysql> show warnings\G
*************************** 1. row ***************************
  Level: Note
   Code: 1003
Message: /* select#1 */ select `waimai`.`t_xxx`.`xxx_id` AS `xxx_id`,`waimai`.`t_xxx`.`xxx_name` AS `xxx_name` from `waimai`.`t_xxx` semi join (`waimai`.`t_yyy` `u0`) where ((`waimai`.`t_xxx`.`xxx_id` = `<subquery2>`.`xxx_id`) and (`waimai`.`u0`.`is_deleted` = 0) and (`waimai`.`u0`.`id` <= 10))
1 row in set (0.00 sec)
"""

15.禁止in超大序列 filter(column__in=huge_sequence),考虑改写成exists语句

  • 超大序列会增加SQL的长度,增加网络I/O,MySQL也可能无法使用索引
  • exists语句,对于被驱动表,查询到一行复合条件即返回,性能极佳

16.禁止对日期类型字段使用 __year__month__day

  • 以上写法会对mysql字段进行函数化处理,导致无法使用索引,建议改写成 __gte__lt
  • 示例
# 仅查询 2021年3月9号的数据
import datetime

today = datetime.date(2021, 3, 9)

# 使用 __year, __month, __day
subquery = Xxx.objects.filter(create_time__year=today.year, create_time__month=today.month, create_time__day=today.day).values('xxx_id')
list(subquery)

"""实际执行SQL
SELECT `t_xxx`.`xxx_id`
FROM `t_xxx`
WHERE (
		EXTRACT(DAY FROM `t_xxx`.`create_time`) = 9
		AND EXTRACT(MONTH FROM `t_xxx`.`create_time`) = 3
		AND `t_xxx`.`create_time` BETWEEN '2021-01-01 00:00:00'
			AND '2021-12-31 23:59:59.999999'
		)
"""

# __gte, __lt 改写
tomorrow = today + datetime.timedelta(days=1)
subquery = Xxx.objects.filter(create_time__gte=today, create_time__lt=tomorrow).values('xxx_id')
list(subquery)

"""实际执行SQL
SELECT `t_xxx`.`xxx_id`
FROM `t_xxx`
WHERE (
		`t_xxx`.`create_time` >= '2021-03-09 00:00:00'
		AND `t_xxx`.`create_time` < '2021-03-10 00:00:00'
		)
"""

17.对于类型字段禁止手动指定数值,建议使用枚举类指定(Django>=3.0 后推荐使用:models.TextChoices

  • 避免误操作
# 推荐
Xxx.objects.filter(xxx_type=Xxx.XxxTypeEnum.TYPE_ONE.val). \
    values('xxx_id', 'xxx_name')[:1]

# 禁止
Xxx.objects.filter(xxx_type=1). \
    values('xxx_id', 'xxx_name')[:1]

18.慎用 update_or_create

  • 优点

    • 语法简介
    • 内部为Model对象的操作,可以触发 auto_nowauto_now_add
    • 显示加锁(悲观锁),防止唯一键冲突
  • 缺点

    • 底层SQL使用了 select *for update
      • I/O浪费
      • MySQL默认是一致性非锁定读,select ... for update 显示加排他锁,会降低并发
# 使用update_or_create
Yyy.objects.update_or_create(defaults={'is_deleted': 0}, xxx_id='3b2ac46a292e39f88f103eb0ff876906')

"""实际执行SQL
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| argument                                                                                                                                                                                                                                             |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| SET autocommit=0                                                                                                                                                                                                                                     |
| SELECT `t_yyy`.`id`, `t_yyy`.`yyy_id`, `t_yyy`.`xxx_id`, `t_yyy`.`is_deleted`, `t_yyy`.`create_time`, `t_yyy`.`update_time` FROM `t_yyy` WHERE `t_yyy`.`xxx_id` = '3b2ac46a292e39f88f103eb0ff876906' FOR UPDATE                                      |
| UPDATE `t_yyy` SET `yyy_id` = 'fda6f9193ca03f30a5a94216a88beb57', `xxx_id` = '3b2ac46a292e39f88f103eb0ff876906', `is_deleted` = 0, `create_time` = '2021-03-09 10:39:19.743246', `update_time` = '2021-03-10 09:48:46.752148' WHERE `t_yyy`.`id` = 1 |
| COMMIT                                                                                                                                                                                                                                               |
| SET autocommit=1                                                                                                                                                                                                                                     |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
5 rows in set (0.00 sec)
"""

# 非并发场景,更好的写法
# 缺点:并发场景下,会出现唯一键冲突
now = datetime.datetime.now()
xxx_id = '3b2ac46a292e39f88f103eb0ff876906'
queryset = Yyy.objects.filter(xxx_id=xxx_id)
if queryset.exists():
    queryset.update(is_deleted=0, update_time=now)
else:
    Yyy.objects.create(xxx_id=xxx_id, is_deleted=0)

19.NULL值的判断,禁止直接等值查询 推荐使用 __isnull

  • 已更正,Django-2.2.24版本实测,可直接使用等值查询,Django ORM会自动转换
  • filter(column=None) 对应的SQL语句为 WHERE column IS NULL
  • exclude(column=None) 对应的SQL语句未 WHERE NOT (column IS NULL)
    • 可以被MySQL优化器改写成 WHERE column IS NULL
  • 也可以使用 column__isnull: bool
    • 优点
      • 与SQL语法一致
    • 缺点
      • 多种类型的值,需要不同的写法
xxx_name: Optional[str]

# 1.等值查询
queryset = Xxx.objects.exclude(xxx_name=xxx_name).values('xxx_id', 'xxx_type')[:10]
list(queryset)

# 2.使用 __isnull
queryset = Xxx.objects.filter()
if xxx_name is None:
    queryset = queryset.filter(xxx_name__isnull=False)
else:
    queryset = queryset.filter(xxx_name=xxx_name)
queryset = queryset.values('xxx_id', 'xxx_type')[:10]
list(queryset)

20.已知n个(通常5-10个)id,每个id取m条(通常小于5)数据的优化思路

  • 可以考虑使用 union all
    • 优点
      • 将循环中执行SQL变成仅一次查询
      • 减少与mysql交互次数(网络I/O次数)
    • 缺点
      • union all 拼接的SQL可能过长(通常不会造成性能问题)
"""业务场景案例
    已知3个用户id,每个用户展示最新的3个用户标签
    这里把 Xxx 当作用户表,Yyy 当作用户标签表
"""

user_id_list: list[str] = ["1", "2", "3"] 

# 1.循环中查询,执行了 len(user_id_list) 次数据库查询
for user_id in user_id_list:
    queryset = Yyy.objects.filter(is_deleted=0, xxx_id=user_id).values("yyy_id").order_by("-create_time")[:3]
    ...

# 2.改写union all,仅执行1次数据库查询
qs: Optional[QuerySet] = None
qs_list: list[QuerySet] = []
for user_id in user_id_list:
    queryset = Yyy.objects.filter(is_deleted=0, xxx_id=user_id).values("yyy_id").order_by("-create_time")[:3]
    if qs is None:
        qs = queryset
    else:
        qs_list.append(queryset)
queryset = qs.union(*qs_list, all=True)
...

"""实际执行SQL
(
    SELECT `t_yyy`.`yyy_id`
    FROM `t_yyy`
    WHERE (
            `t_yyy`.`is_deleted` = 0
            AND `t_yyy`.`xxx_id` = '1'
            )
    ORDER BY `t_yyy`.`create_time` DESC 
    LIMIT 3
    )

UNION ALL

(
	SELECT `t_yyy`.`yyy_id`
	FROM `t_yyy`
	WHERE (
			`t_yyy`.`is_deleted` = 0
			AND `t_yyy`.`xxx_id` = '2'
			)
	ORDER BY `t_yyy`.`create_time` DESC 
    LIMIT 3
	)

UNION ALL

(
	SELECT `t_yyy`.`yyy_id`
	FROM `t_yyy`
	WHERE (
			`t_yyy`.`is_deleted` = 0
			AND `t_yyy`.`xxx_id` = '3'
			)
	ORDER BY `t_yyy`.`create_time` DESC 
    LIMIT 3
	)
"""
...

21.不推荐使用only(),如果使用,必须带上主键和关联字段

  • 不推荐使用
  • 主键自动查出,显示指定上语义更清晰
  • 关联查询时,如果不带上关联字段,在执行中会自动循环查询,降低性能
  • 注意:如果getattr()中指定了only()中未指定的字段,会降级为select *
# 错误写法
@cached_property
def udp_parameter_info_list(self) -> List[Dict[str, Any]]:
    """参数信息列表"""
    parameter_list: List[Dict[str, Any]] = []
    queryset: QuerySet["XxxParameter"] = self.rel_parameter_xxx.filter(is_deleted=0). \
        only("name", "value").order_by("sort", "id")
    if queryset:
        for obj in queryset:
            elem: Dict[str, Any] = {
                "name": obj.name,
                "value": obj.value
            }
            parameter_list.append(elem)
    return parameter_list

"""实际执行SQL
SELECT `t_xxx_parameter`.`id`
	,`t_xxx_parameter`.`name`
	,`t_xxx_parameter`.`value`
FROM `t_xxx_parameter`
WHERE (
		`t_xxx_parameter`.`xxx_id` = ?
		AND `t_xxx_parameter`.`is_deleted` = 0
		)
ORDER BY `t_xxx_parameter`.`sort` ASC
	,`t_xxx_parameter`.`id` ASC

# django自动在迭代中查询出关联字段,执行次数取决于queryset中元素的数量
SELECT `t_xxx_parameter`.`id`, `t_xxx_parameter`.`xxx_id` FROM `t_xxx_parameter` WHERE `t_xxx_parameter`.`id` = ?
SELECT `t_xxx_parameter`.`id`, `t_xxx_parameter`.`xxx_id` FROM `t_xxx_parameter` WHERE `t_xxx_parameter`.`id` = ?
...
"""

# 正确写法
@cached_property
def udp_parameter_info_list(self) -> List[Dict[str, Any]]:
    """参数信息列表"""
    parameter_list: List[Dict[str, Any]] = []
    queryset: QuerySet["XxxParameter"] = self.rel_parameter_xxx.filter(is_deleted=0). \
        only("id", "name", "value", "xxx_id").order_by("sort", "id")
    if queryset:
        for obj in queryset:
            elem: Dict[str, Any] = {
                "name": obj.name,
                "value": obj.value
            }
            parameter_list.append(elem)
    return parameter_list

"""实际执行SQL
SELECT `t_xxx_parameter`.`id`
	,`t_xxx_parameter`.`name`
	,`t_xxx_parameter`.`value`
	,`t_xxx_parameter`.`xxx_id`
FROM `t_xxx_parameter`
WHERE (
		`t_xxx_parameter`.`xxx_id` = ?
		AND `t_xxx_parameter`.`is_deleted` = 0
		)
ORDER BY `t_xxx_parameter`.`sort` ASC
	,`t_xxx_parameter`.`id` ASC
"""

23.临时定义的表必须以 udt_ 为前缀,left join 多条件(on)时使用 django.db.models.FilteredRelation

  • udt:User Define Table
from django.db.models import QuerySet, FilteredRelation, Q

# 1.全字段查询
category_rel = FilteredRelation("category", condition=Q(category__is_deleted=0))
queryset = News.objects.annotate(udt_category=category_rel).filter(is_deleted=0). \
    select_related("udt_category")[:10]
list(queryset)

"""实际执行SQL
SELECT `t_news`.`id`
	...
	,udt_category.`id`
	...
FROM `t_news`
LEFT OUTER JOIN `t_category` udt_category ON (
		`t_news`.`category_id` = udt_category.`id`
		AND (udt_category.`is_deleted` = 0)
		)
WHERE `t_news`.`is_deleted` = 0
LIMIT 10
"""

# 2.指定字段查询
category_rel = FilteredRelation("category", condition=Q(category__is_deleted=0))
queryset = News.objects.annotate(udt_category=category_rel).filter(is_deleted=0). \
    values("udt_category__name")[:10]
list(queryset)

"""实际执行SQL
SELECT udt_category.`name`
FROM `t_news`
LEFT OUTER JOIN `t_category` udt_category ON (
		`t_news`.`category_id` = udt_category.`id`
		AND (udt_category.`is_deleted` = 0)
		)
WHERE `t_news`.`is_deleted` = 0
LIMIT 10
"""