Browse Source

新增数据库显示克隆结果,新增ai代码分析(未写入数据库)

fulian23 4 months ago
parent
commit
8400b6b0dd
9 changed files with 255 additions and 7 deletions
  1. 133 0
      api/aiRouter.py
  2. 17 4
      api/gitRouter.py
  3. 1 2
      api/testapi.py
  4. 3 0
      demo.py
  5. 3 0
      models/AIModels.py
  6. 7 1
      models/gitModels.py
  7. 88 0
      test/file.py
  8. 1 0
      test/output.txt
  9. 2 0
      test/tmp.py

+ 133 - 0
api/aiRouter.py

@@ -0,0 +1,133 @@
+import os, json
+from base_config import ai_key, path
+from fastapi import APIRouter, BackgroundTasks
+from pathlib import Path
+from pydantic import BaseModel
+from models.gitModels import Users
+from concurrent.futures import ThreadPoolExecutor
+from http import HTTPStatus
+from dashscope import Application
+
+airouter = APIRouter()
+class RequestBody(BaseModel):
+    uuid: str
+    repo_url: str
+
+def generate_repo_path(uuid, repo_url):
+    repo_name = repo_url.split("/")[-1].replace(".git", "")
+    base_path = os.path.join(path, uuid)
+    return os.path.join(base_path, repo_name), repo_name
+def filter_code_files(prompt):
+    response = Application.call(
+        # 若没有配置环境变量,可用百炼API Key将下行替换为:api_key="sk-xxx"。但不建议在生产环境中直接将API Key硬编码到代码中,以减少API Key泄露风险。
+        api_key=ai_key,
+        app_id='c1a6dbb6d2314e469bfcbe44c2fe0a5f',
+        prompt=prompt)
+    if response.status_code == HTTPStatus.OK:
+        try:
+            json_data = json.loads(response.output.text)
+            print(json_data)
+        except json.JSONDecodeError:
+            print("返回内容不是有效的 JSON 格式!")
+            json_data={"files":[]}
+    else:
+        print(f"请求失败: {response.message}")
+        json_data = {"files": []}
+    return json_data
+
+def analysis_results(local_path,path):
+    prompt=""
+    file_path=os.path.join(local_path,path)
+    with open(file_path, 'r',encoding="utf8") as f:
+        for line_num, line in enumerate(f, start=1):
+            prompt+=f"{line_num}\t{line}"
+    response = Application.call(
+        # 若没有配置环境变量,可用百炼API Key将下行替换为:api_key="sk-xxx"。但不建议在生产环境中直接将API Key硬编码到代码中,以减少API Key泄露风险。
+        api_key=ai_key,
+        app_id='2f288f146e2d492abb3fe22695e70635',  # 替换为实际的应用 ID
+        prompt=prompt)
+
+    if response.status_code == HTTPStatus.OK:
+        try:
+            json_data = json.loads(response.output.text)
+        except json.JSONDecodeError:
+            print("返回内容不是有效的 JSON 格式!")
+            print(response.output.text)
+            json_data={"summary":None}
+
+    else:
+        print(f"请求失败: {response.message}")
+        json_data = {"summary":None}
+    json_data["path"]=file_path
+    print(json_data)
+    return json_data
+
+
+def get_filtered_files(folder_path):
+    base_path = Path(folder_path).resolve()
+    if not base_path.is_dir():
+        raise ValueError("无效的目录路径")
+    file_list = []
+    for root, dirs, files in os.walk(base_path):
+        dirs[:] = [d for d in dirs if not d.startswith('.')]
+        files = [f for f in files if not f.startswith('.')]
+        for file in files:
+            abs_path = Path(root) / file
+            rel_path = abs_path.relative_to(base_path)
+            file_list.append(str(rel_path))
+    return file_list
+def process_batch1(batch_files):
+    """多线程处理单个文件批次的函数"""
+    try:
+        js = filter_code_files(str(batch_files))
+        return js.get("files", [])
+    except Exception as e:
+        print(f"处理批次时出错: {e}")
+        return []
+def get_code_files(path):
+    file_list = []
+    files = get_filtered_files(path)
+    print(files)
+    print(f"找到 {len(files)} 个文件")
+
+    # 将文件列表分块(每500个一组)
+    chunks = [files[i * 500: (i + 1) * 500]
+              for i in range(0, len(files) // 500 + 1)]
+    with ThreadPoolExecutor(max_workers=min(5, len(chunks))) as executor:
+        # 提交所有批次任务
+        futures = [executor.submit(process_batch1, chunk) for chunk in chunks]
+        # 实时获取已完成任务的结果
+        for future in futures:
+            try:
+                batch_result = future.result()
+                file_list.extend(batch_result)
+            except Exception as e:
+                print(f"获取结果时出错: {e}")
+    print(f"最终合并文件数: {len(file_list)}")
+    return file_list
+def process_batch2(local_path,path):
+    """多线程处理单个文件批次的函数"""
+    try:
+        js = analysis_results(local_path,path)
+        return js
+    except Exception as e:
+        print(f"处理批次时出错: {e}")
+        return {"summary":None}
+def analysis(local_path):
+    file_list = get_code_files(local_path)
+    print(file_list)
+    with ThreadPoolExecutor(max_workers=5) as executor:
+        futures = [executor.submit(process_batch2, local_path, file) for file in file_list]
+        for future in futures:
+            try:
+                batch_result = future.result()
+                file_list.extend(batch_result)
+            except Exception as e:
+                print(f"获取结果时出错: {e}")
+
+@airouter.post("/scan")
+async def ai(request: RequestBody, background_tasks: BackgroundTasks):
+    local_path, _ = generate_repo_path(request.uuid, request.repo_url)
+    background_tasks.add_task(analysis, local_path)
+    return {"code": 200, "meg": "添加扫描任务成功"}
+

+ 17 - 4
api/gitRouter.py

@@ -1,12 +1,16 @@
-import os, json,hashlib,re
+import asyncio
+import os, json,hashlib,re,shutil,time
 from fastapi import APIRouter, BackgroundTasks
+
+
 from base_config import path, avatar_url
 
 from git import Repo, GitCommandError
 
 from pydantic import BaseModel
 
-from models.gitModels import Users
+from models.gitModels import Repos
+
 
 
 class RequestBody(BaseModel):
@@ -43,11 +47,20 @@ def git_stats_to_json(text):
             result["insertions"] = int(item[1])
         if item[2]:
             result["deletions"] = int(item[2])
-
     return result
 
 gitrouter = APIRouter()
 
+async def clone_task(repo_url, local_path,uuid,repo_name):
+    current_time = int(time.time())
+    print(f"开始克隆仓库: {repo_url}")
+    try:
+        loop = asyncio.get_event_loop()
+        await loop.run_in_executor(None, Repo.clone_from, repo_url, local_path)
+        await Repos.filter(create_user=uuid,name=repo_name).update(path=local_path, state=1, update_time=current_time)
+    except:
+        await Repos.filter(create_user=uuid,name=repo_name).update(path=local_path, state=0, update_time=current_time)
+        shutil.rmtree(local_path)
 
 @gitrouter.post("/clone")
 async def clone(request: RequestBody, background_tasks: BackgroundTasks):
@@ -56,7 +69,7 @@ async def clone(request: RequestBody, background_tasks: BackgroundTasks):
         return {"status": "400", "msg": "仓库已存在", "uuid": request.uuid, "repo_url": request.repo_url,
                 "path": local_path}
     else:
-        background_tasks.add_task(Repo.clone_from, request.repo_url, local_path)
+        background_tasks.add_task(clone_task, request.repo_url, local_path, request.uuid, repo_name)
         response = {"status": "200", "msg": "成功创建克隆任务", "uuid": request.uuid, "repo_name": repo_name,
                     "local_path": local_path}
         return response

+ 1 - 2
api/testapi.py

@@ -136,8 +136,7 @@ async def log(request: RequestBody):
         log.append(current_commit)
 
     # 按时间倒序排列(git log默认最新在前)
-    log = log[::-1]
-
+    print(log)
     return {
         "status": "200",
         "msg": "成功获取日志",

+ 3 - 0
demo.py

@@ -5,6 +5,7 @@ from models.gitModels import *
 
 from api.gitRouter import gitrouter
 from api.testapi import testapi
+from api.aiRouter import airouter
 
 from tortoise.contrib.fastapi import register_tortoise
 from db_config import TORTOISE_ORM
@@ -21,5 +22,7 @@ async def test(id: int):
 app.include_router(gitrouter,prefix="/git")
 app.include_router(testapi,prefix="/test")
 
+app.include_router(airouter,prefix="/ai")
+
 if __name__ == "__main__":
     run(app,host="127.0.0.1",port=8000)

+ 3 - 0
models/AIModels.py

@@ -1,2 +1,5 @@
 from tortoise.models import Model
 from tortoise import fields
+
+class File_Summary_Tasks(Model):
+    pass

+ 7 - 1
models/gitModels.py

@@ -11,4 +11,10 @@ class Users(Model):
     git_info = fields.JSONField()
     avatar = fields.CharField(max_length=500, null=True)
     email = fields.CharField(max_length=50, null=True)
-    registTime = fields.BigIntField()
+    registTime = fields.BigIntField()
+class Repos(Model):
+    name = fields.CharField(max_length=50)
+    state = fields.IntField()
+    create_user = fields.CharField(max_length=36)
+    path = fields.CharField(max_length=500)
+    update_time = fields.BigIntField()

+ 88 - 0
test/file.py

@@ -0,0 +1,88 @@
+import os
+from pathlib import Path
+
+import json
+from http import HTTPStatus
+from dashscope import Application
+
+a=r"C:\www\gitnexus\9992cddb-b7d1-99ec-1bd2-35fdc177e623\electron"
+b=r"C:\www\gitnexus\9992cddb-b7d1-99ec-1bd2-35fdc177e623\Gitnexus-python"
+c=r"C:\www\gitnexus\9992cddb-b7d1-99ec-1bd2-35fdc177e623\GitNexus-backend"
+d=r"C:\www\gitnexus\9992cddb-b7d1-99ec-1bd2-35fdc177e623\U-Clean-Reserve"
+e=r"C:\www\gitnexus\9992cddb-b7d1-99ec-1bd2-35fdc177e623\nsrhwebNew"
+def filter_code_files(prompt):
+    response = Application.call(
+        # 若没有配置环境变量,可用百炼API Key将下行替换为:api_key="sk-xxx"。但不建议在生产环境中直接将API Key硬编码到代码中,以减少API Key泄露风险。
+        api_key="sk-0164613e1a2143fc808fc4cc2451bef0",
+        app_id='c1a6dbb6d2314e469bfcbe44c2fe0a5f',  # 替换为实际的应用 ID
+        prompt=prompt)
+
+    if response.status_code == HTTPStatus.OK:
+        try:
+            json_data = json.loads(response.output.text)
+            print(json_data)
+        except json.JSONDecodeError:
+            print("返回内容不是有效的 JSON 格式!")
+            print(response.output.text)
+            json_data={"dirs":[]}
+    else:
+        print(f"请求失败: {response.message}")
+    return json_data
+def get_filtered_files(folder_path):
+
+    base_path = Path(folder_path).resolve()
+    if not base_path.is_dir():
+        raise ValueError("无效的目录路径")
+
+    file_list = []
+    for root, dirs, files in os.walk(base_path):
+
+        dirs[:] = [d for d in dirs if not d.startswith('.')]
+
+        files = [f for f in files if not f.startswith('.')]
+
+        for file in files:
+            abs_path = Path(root) / file
+            rel_path = abs_path.relative_to(base_path)
+            file_list.append(str(rel_path))
+
+    return file_list
+
+
+import threading
+from concurrent.futures import ThreadPoolExecutor
+
+
+def process_batch(batch_files):
+    """多线程处理单个文件批次的函数"""
+    try:
+        js = filter_code_files(str(batch_files))
+        return js.get("files", [])
+    except Exception as e:
+        print(f"处理批次时出错: {e}")
+        return []
+
+
+def get_code_files(path):
+    file_list = []
+    files = get_filtered_files(path)
+    print(f"找到 {len(files)} 个文件")
+
+    # 将文件列表分块(每500个一组)
+    chunks = [files[i * 500: (i + 1) * 500]
+              for i in range(0, len(files) // 500 + 1)]
+
+    with ThreadPoolExecutor(max_workers=min(5, len(chunks))) as executor:
+        # 提交所有批次任务
+        futures = [executor.submit(process_batch, chunk) for chunk in chunks]
+
+        # 实时获取已完成任务的结果
+        for future in futures:
+            try:
+                batch_result = future.result()
+                file_list.extend(batch_result)
+            except Exception as e:
+                print(f"获取结果时出错: {e}")
+    print(f"最终合并文件数: {len(file_list)}")
+    return {"files": file_list}
+get_code_files(a)

+ 1 - 0
test/output.txt

@@ -1,3 +1,4 @@
+C:\Users\32965\PycharmProjects\GitNexus\test\output.txt
 1	import os, json,hashlib
 2	from fastapi import APIRouter, BackgroundTasks
 3	

+ 2 - 0
test/tmp.py

@@ -0,0 +1,2 @@
+import time
+print(int(time.time()))