aiRouter.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. import os, json, time, asyncio
  2. from base_config import ai_key, path
  3. from fastapi import APIRouter, BackgroundTasks
  4. from pathlib import Path
  5. from pydantic import BaseModel
  6. from git import Repo
  7. from http import HTTPStatus
  8. from dashscope import Application
  9. from models.aiModels import Scan_Tasks
  10. from models.gitModels import Repos
  11. airouter = APIRouter()
  12. class RequestBody(BaseModel):
  13. uuid: str
  14. repo_url: str
  15. def generate_repo_path(uuid, repo_url):
  16. repo_name = repo_url.split("/")[-1].replace(".git", "")
  17. base_path = os.path.join(path, uuid)
  18. return os.path.join(base_path, repo_name), repo_name
  19. def filter_code_files(prompt):
  20. response = Application.call(
  21. # 若没有配置环境变量,可用百炼API Key将下行替换为:api_key="sk-xxx"。但不建议在生产环境中直接将API Key硬编码到代码中,以减少API Key泄露风险。
  22. api_key=ai_key,
  23. app_id='c1a6dbb6d2314e469bfcbe44c2fe0a5f',
  24. prompt=prompt)
  25. if response.status_code == HTTPStatus.OK:
  26. try:
  27. json_data = json.loads(response.output.text)
  28. print(json_data)
  29. except json.JSONDecodeError:
  30. print("返回内容不是有效的 JSON 格式!")
  31. json_data={"files":[]}
  32. else:
  33. print(f"请求失败: {response.message}")
  34. json_data = {"files": []}
  35. return json_data
  36. def analysis_results(local_path,path):
  37. prompt=""
  38. file_path=os.path.join(local_path,path)
  39. with open(file_path, 'r',encoding="utf8") as f:
  40. for line_num, line in enumerate(f, start=1):
  41. prompt+=f"{line_num}\t{line}"
  42. response = Application.call(
  43. # 若没有配置环境变量,可用百炼API Key将下行替换为:api_key="sk-xxx"。但不建议在生产环境中直接将API Key硬编码到代码中,以减少API Key泄露风险。
  44. api_key=ai_key,
  45. app_id='2f288f146e2d492abb3fe22695e70635', # 替换为实际的应用 ID
  46. prompt=prompt)
  47. if response.status_code == HTTPStatus.OK:
  48. try:
  49. json_data = json.loads(response.output.text)
  50. except json.JSONDecodeError:
  51. print("返回内容不是有效的 JSON 格式!")
  52. print(response.output.text)
  53. json_data={"summary":None}
  54. else:
  55. print(f"请求失败: {response.message}")
  56. json_data = {"summary":None}
  57. json_data["path"]=file_path
  58. return json_data
  59. async def get_filtered_files(folder_path):
  60. base_path = Path(folder_path).resolve()
  61. if not base_path.is_dir():
  62. raise ValueError("无效的目录路径")
  63. file_list = []
  64. for root, dirs, files in os.walk(base_path):
  65. dirs[:] = [d for d in dirs if not d.startswith('.')]
  66. files = [f for f in files if not f.startswith('.')]
  67. for file in files:
  68. abs_path = Path(root) / file
  69. rel_path = abs_path.relative_to(base_path)
  70. file_list.append(str(rel_path))
  71. return file_list
  72. async def process_batch1(batch_files):
  73. """多线程处理单个文件批次的函数"""
  74. try:
  75. js = filter_code_files(str(batch_files))
  76. return js["files"]
  77. except Exception as e:
  78. print(f"处理批次时出错: {e}")
  79. return []
  80. async def get_code_files(path):
  81. file_list = []
  82. files = await get_filtered_files(path)
  83. print(files)
  84. print(f"找到 {len(files)} 个文件")
  85. # 将文件列表分块(每500个一组)
  86. chunks = [files[i * 500: (i + 1) * 500]
  87. for i in range(0, len(files) // 500 + 1)]
  88. # 提交所有批次任务
  89. # futures = [executor.submit(process_batch1, chunk) for chunk in chunks]
  90. tasks = [process_batch1(chunk) for chunk in chunks]
  91. futures = await asyncio.gather(*tasks, return_exceptions=True)
  92. # 实时获取已完成任务的结果
  93. for future in futures[0]:
  94. if isinstance(future, Exception):
  95. print(f"处理出错: {future}")
  96. else:
  97. file_list.append(future)
  98. return file_list
  99. async def process_batch2(local_path,path):
  100. """多线程处理单个文件批次的函数"""
  101. try:
  102. # print(local_path, path)
  103. js = analysis_results(local_path,path)
  104. return js
  105. except Exception as e:
  106. print(11111)
  107. print(f"处理批次时出错: {e}")
  108. return {"summary":None}
  109. async def analysis(local_path, repo_id):
  110. file_list = await get_code_files(local_path)
  111. print(file_list)
  112. results = []
  113. tasks = [process_batch2(local_path, file) for file in file_list] # 假设process_batch2已改为异步函数
  114. batch_results = await asyncio.gather(*tasks, return_exceptions=True)
  115. for result in batch_results:
  116. if isinstance(result, Exception):
  117. print(f"处理出错: {result}")
  118. await write_to_db({"results": results}, repo_id, 3)
  119. else:
  120. results.append(result)
  121. await write_to_db({"results": results}, repo_id, 2)
  122. print("扫描完成")
  123. async def write_to_db(results_dict,repo_id,state):
  124. await Scan_Tasks.filter(repo_id=repo_id).update(state=state, result=results_dict,scan_end_time=int(time.time()))
  125. @airouter.post("/scan")
  126. async def scan(request: RequestBody, background_tasks: BackgroundTasks):
  127. local_path, repo_name = generate_repo_path(request.uuid, request.repo_url)
  128. repo_hash = Repo(local_path).head.commit.hexsha[:7]
  129. repo = await Repos.get(name=repo_name)
  130. repo_id = repo.id
  131. print(f"开始扫描仓库: {repo_name}")
  132. await Scan_Tasks.create(repo_id=repo_id, state=1, create_time=int(time.time()),scan_start_time=int(time.time())
  133. , create_user=request.uuid, repo_hash=repo_hash)
  134. background_tasks.add_task(analysis, local_path, repo_id)
  135. return {"code": 200, "meg": "添加扫描任务成功"}