180.Consecutive_Numbers

本文介绍了一种SQL查询技巧,用于找出表中连续出现至少三次的数字,并提供了两种实现方法:通过连接查询和使用变量计数。此外,还讨论了如何将此方法推广到查询任意次数N的连续重复数字。

Write a SQL query to find all numbers that appear at least three times consecutively.

#+----+-----+
#| Id | Num |
#+----+-----+
#| 1  |  1  |
#| 2  |  1  |
#| 3  |  1  |
#| 4  |  2  |
#| 5  |  1  |
#| 6  |  2  |
#| 7  |  2  |
#+----+-----+

For example, given the above Logs table, 1 is the only number that appears consecutively for at least three times.

+-----------------+
| ConsecutiveNums |
+-----------------+
| 1               |
+-----------------+

sql脚本

SELECT DISTINCT L1.Num as ConsecutiveNums
FROM Logs L1, Logs L2, Logs L3
WHERE L2.Id = L1.Id + 1
AND L3.Id = L2.Id + 1
AND L1.Num = L2.Num
AND L2.Num = L3.Num

也可以使用JOIN子句完成同样的功能:

SELECT DISTINCT L1.Num 
FROM Logs L1
JOIN Logs L2 ON L1.Id + 1 = L2.Id
JOIN Logs L3 ON L1.Id + 2 = L3.Id
WHERE L1.Num = L2.Num AND L1.Num = L3.Num
ORDER BY L1.Num

上面两种方法可以用于找到至少三次连续出现的数字,如果将连续出现的数字扩展到N个,按照上面思路写出的SQL语句就会比较长。因此可以用下面这种方式来查询:

SELECT DISTINCT Num
FROM (
  SELECT Num, 
    CASE 
      WHEN @prev = Num THEN @count := @count + 1
      WHEN (@prev := Num) IS NOT NULL THEN @count := 1
    END CNT
  FROM Logs, (SELECT @prev := NULL) X
  ORDER BY Id
) AS A
WHERE A.CNT >= 3

将最后一行的3改为N,即可用于查询至少N次连续出现的数字。

import tkinter as tk from tkinter import filedialog, messagebox from itertools import combinations class DataShrinkTool: def __init__(self, root): self.root = root self.root.title("数据缩水工具") # 初始化变量 self.file_path = tk.StringVar() self.number_length = tk.IntVar(value=5) self.consecutive_options = {} self.even_odd_options = {} self.mod_options = {} self.span_options = {} self.distance_options = {} self.bold_numbers = tk.StringVar(value="") self.bold_count = tk.IntVar(value=1) self.create_widgets() def create_widgets(self): frame = tk.Frame(self.root) frame.pack(padx=10, pady=10) # 文件选择 tk.Label(frame, text="选择文件:").grid(row=0, column=0, sticky="w") tk.Entry(frame, textvariable=self.file_path, width=50).grid(row=0, column=1, padx=5) tk.Button(frame, text="浏览", command=self.open_file).grid(row=0, column=2) # 数字长度选择 tk.Label(frame, text="数字长度:").grid(row=1, column=0, sticky="w") tk.Radiobutton(frame, text="5位", variable=self.number_length, value=5).grid(row=1, column=1, sticky="w") tk.Radiobutton(frame, text="6位", variable=self.number_length, value=6).grid(row=1, column=2, sticky="w") # 连号筛选 tk.Label(frame, text="连号筛选:").grid(row=2, column=0, sticky="w") self.consecutive_options["count"] = tk.IntVar(value=0) tk.Spinbox(frame, from_=0, to=5, textvariable=self.consecutive_options["count"], width=5).grid(row=2, column=1, sticky="w") self.consecutive_options["group"] = tk.IntVar(value=0) tk.Spinbox(frame, from_=0, to=5, textvariable=self.consecutive_options["group"], width=5).grid(row=2, column=2, sticky="w") # 奇偶选择 tk.Label(frame, text="奇偶选择:").grid(row=3, column=0, sticky="w") for i in range(6): self.even_odd_options[i] = tk.StringVar(value="any") tk.OptionMenu(frame, self.even_odd_options[i], "any", "odd", "even").grid(row=3, column=i + 1) # 012路筛选 tk.Label(frame, text="012路筛选:").grid(row=4, column=0, sticky="w") for i in range(6): self.mod_options[i] = tk.StringVar(value="any") tk.OptionMenu(frame, self.mod_options[i], "any", "0", "1", "2").grid(row=4, column=i + 1) # 首位跨度 tk.Label(frame, text="首位跨度:").grid(row=5, column=0, sticky="w") self.span_options["min"] = tk.IntVar(value=0) tk.Spinbox(frame, from_=0, to=9, textvariable=self.span_options["min"], width=5).grid(row=5, column=1, sticky="w") self.span_options["max"] = tk.IntVar(value=9) tk.Spinbox(frame, from_=0, to=9, textvariable=self.span_options["max"], width=5).grid(row=5, column=2, sticky="w") # 邻位距离 tk.Label(frame, text="邻位距离:").grid(row=6, column=0, sticky="w") self.distance_options["min"] = tk.IntVar(value=0) tk.Spinbox(frame, from_=0, to=9, textvariable=self.distance_options["min"], width=5).grid(row=6, column=1, sticky="w") self.distance_options["max"] = tk.IntVar(value=9) tk.Spinbox(frame, from_=0, to=9, textvariable=self.distance_options["max"], width=5).grid(row=6, column=2, sticky="w") # 胆码选择 tk.Label(frame, text="胆码选择:").grid(row=7, column=0, sticky="w") tk.Entry(frame, textvariable=self.bold_numbers, width=20).grid(row=7, column=1, sticky="w") tk.Spinbox(frame, from_=0, to=6, textvariable=self.bold_count, width=5).grid(row=7, column=2, sticky="w") # 筛选按钮 tk.Button(frame, text="开始筛选", command=self.filter_numbers).grid(row=8, column=1, pady=10) def open_file(self): file_path = filedialog.askopenfilename(filetypes=[("Text Files", "*.txt")]) if file_path: self.file_path.set(file_path) def filter_numbers(self): file_path = self.file_path.get() number_length = self.number_length.get() consecutive_count = self.consecutive_options["count"].get() consecutive_group = self.consecutive_options["group"].get() even_odd = {i: self.even_odd_options[i].get() for i in range(number_length)} mod = {i: self.mod_options[i].get() for i in range(number_length)} span_min = self.span_options["min"].get() span_max = self.span_options["max"].get() distance_min = self.distance_options["min"].get() distance_max = self.distance_options["max"].get() bold_numbers = set(map(int, self.bold_numbers.get().split(","))) if self.bold_numbers.get() else set() bold_count = self.bold_count.get() try: with open(file_path, 'r') as f: numbers = [line.strip() for line in f if len(line.strip()) == number_length] filtered_numbers = [] for number in numbers: digits = list(map(int, number)) if not self.check_consecutive(digits, consecutive_count, consecutive_group): continue if not self.check_even_odd(digits, even_odd): continue if not self.check_mod(digits, mod): continue if not self.check_span(digits, span_min, span_max): continue if not self.check_distance(digits, distance_min, distance_max): continue if not self.check_bold_numbers(digits, bold_numbers, bold_count): continue filtered_numbers.append(number) result = "\n".join(filtered_numbers) messagebox.showinfo("筛选结果", result) except Exception as e: messagebox.showerror("错误", str(e)) def check_consecutive(self, digits, count, group): groups = [] current_group = [] for i in range(len(digits)): if not current_group or digits[i] == digits[i - 1] + 1: current_group.append(digits[i]) else: if len(current_group) >= 2: groups.append(current_group) current_group = [digits[i]] if len(current_group) >= 2: groups.append(current_group) return len([g for g in groups if len(g) >= count]) >= group def check_even_odd(self, digits, even_odd): for i, digit in enumerate(digits): option = even_odd.get(i, "any") if option != "any": if (digit % 2 == 0 and option != "even") or (digit % 2 != 0 and option != "odd"): return False return True def check_mod(self, digits, mod): for i, digit in enumerate(digits): option = mod.get(i, "any") if option != "any" and str(digit % 3) != option: return False return True def check_span(self, digits, min_span, max_span): return min_span <= abs(digits[0] - digits[-1]) <= max_span def check_distance(self, digits, min_distance, max_distance): for i in range(len(digits) - 1): if not (min_distance <= abs(digits[i] - digits[i + 1]) <= max_distance): return False return True def check_bold_numbers(self, digits, bold_numbers, bold_count): return len(set(digits) & bold_numbers) >= bold_count if __name__ == "__main__": root = tk.Tk() app = DataShrinkTool(root) root.mainloop()对这段代码进行优化
05-22
WITH consecutive_sessions AS ( -- First, get all study sessions with row numbers to identify consecutive sessions SELECT ss.student_id, ss.subject, ss.session_date, ss.hours_studied, ROW_NUMBER() OVER (PARTITION BY ss.student_id ORDER BY ss.session_date) as session_order FROM study_sessions ss ), session_groups AS ( -- Group consecutive sessions by checking date gaps SELECT cs1.*, -- Check if there's a gap > 2 days from previous session CASE WHEN LAG(session_date) OVER (PARTITION BY student_id ORDER BY session_date) IS NULL THEN 1 WHEN DATEDIFF(session_date, LAG(session_date) OVER (PARTITION BY student_id ORDER BY session_date)) <= 2 THEN 0 ELSE 1 END as is_new_group FROM consecutive_sessions cs1 ), grouped_sessions AS ( -- Create group numbers for consecutive session sequences SELECT *, SUM(is_new_group) OVER (PARTITION BY student_id ORDER BY session_date) as group_id FROM session_groups ), pattern_analysis AS ( -- For each group of consecutive sessions, analyze the pattern SELECT student_id, group_id, COUNT(DISTINCT subject) as unique_subjects, COUNT(*) as total_sessions, SUM(hours_studied) as total_hours, -- Create the subject sequence string to check for repeating patterns GROUP_CONCAT(subject ORDER BY session_date SEPARATOR '|') as subject_sequence FROM grouped_sessions GROUP BY student_id, group_id HAVING COUNT(*) >= 6 -- At least 6 sessions for 2 complete cycles of 3 subjects AND COUNT(DISTINCT subject) >= 3 -- At least 3 different subjects ), valid_patterns AS ( -- Check if the pattern actually repeats for at least 2 complete cycles SELECT student_id, group_id, unique_subjects as cycle_length, total_sessions, total_hours, subject_sequence, -- Check if we have at least 2 complete cycles CASE WHEN total_sessions >= (unique_subjects * 2) THEN 1 ELSE 0 END as has_complete_cycles FROM pattern_analysis ), cycle_validation AS ( -- Validate that the pattern actually repeats correctly SELECT gs.student_id, gs.group_id, vp.cycle_length, vp.total_hours, -- Check if the subject sequence repeats correctly COUNT(*) as session_count, -- Get the first cycle pattern GROUP_CONCAT( CASE WHEN gs.session_order <= vp.cycle_length THEN gs.subject ELSE NULL END ORDER BY gs.session_date SEPARATOR '|' ) as first_cycle, -- Check if subsequent sessions follow the same pattern COUNT(CASE WHEN gs.session_order > vp.cycle_length AND gs.subject = ( SELECT subject FROM grouped_sessions gs2 WHERE gs2.student_id = gs.student_id AND gs2.group_id = gs.group_id AND gs2.session_order = ((gs.session_order - 1) % vp.cycle_length) + 1 ) THEN 1 ELSE NULL END) as matching_pattern_count FROM grouped_sessions gs JOIN valid_patterns vp ON gs.student_id = vp.student_id AND gs.group_id = vp.group_id WHERE vp.has_complete_cycles = 1 GROUP BY gs.student_id, gs.group_id, vp.cycle_length, vp.total_hours HAVING session_count >= (cycle_length * 2) AND matching_pattern_count = (session_count - cycle_length) -- All sessions after first cycle match pattern ) -- Final result with student information SELECT s.student_id, s.student_name, s.major, cv.cycle_length, cv.total_hours as total_study_hours FROM cycle_validation cv JOIN students s ON cv.student_id = s.student_id ORDER BY cv.cycle_length DESC, cv.total_hours DESC
最新发布
11-21
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值