#include "pch.h"
#include "TCalcFuncSets.h"
#include <windows.h>
#include <string>
#include <vector>
#include <fstream>
#include <sstream>
#include <map>
#include <algorithm>
#include <iostream>
#include <cmath>
#include <cctype>
#include <unordered_map>
#include <mutex>
// 全局缓存,用于存储已计算的数据
static std::unordered_map<std::string, std::map<std::string, float>> g_columnQuantileCache;
static std::unordered_map<std::string, std::map<std::string, float>> g_returnsQuantileCache;
static std::mutex g_cacheMutex;
// 清理字符串中的空白字符
std::string CleanString(const std::string& str) {
std::string result;
result.reserve(str.size());
for (unsigned int i = 0; i < str.size(); i++) {
char c = str[i];
if (!std::isspace((unsigned char)c)) {
result.push_back(c);
}
}
return result;
}
// 清理路径中的无效字符
std::string CleanPath(const std::string& path) {
std::string result;
result.reserve(path.size());
for (unsigned int i = 0; i < path.size(); i++) {
char c = path[i];
if (c == ' ' || !std::isspace((unsigned char)c)) {
result.push_back(c);
}
}
return result;
}
// 将通达信日期格式转换为标准日期格式(yyyymmdd)
std::string ConvertTDXDateToYYYYMMDD(float tdxDate) {
int dateInt = (int)tdxDate + 19000000;
if (dateInt <= 19000000 || dateInt > 21000000) {
return "";
}
char buffer[16];
sprintf_s(buffer, sizeof(buffer), "%d", dateInt);
return std::string(buffer);
}
// 快速解析日期字符串
std::string ParseDateString(const std::string& dateStr) {
// 移除引号
std::string cleaned = dateStr;
if (!cleaned.empty() && cleaned[0] == '"') {
cleaned = cleaned.substr(1);
}
if (!cleaned.empty() && cleaned[cleaned.length() - 1] == '"') {
cleaned = cleaned.substr(0, cleaned.length() - 1);
}
// 如果已经是yyyymmdd格式
if (cleaned.length() == 8 && cleaned.find_first_not_of("0123456789") == std::string::npos) {
return cleaned;
}
// 处理分隔符格式
int year = 0, month = 0, day = 0;
size_t firstDelim = cleaned.find_first_of("-/");
if (firstDelim != std::string::npos) {
size_t secondDelim = cleaned.find_first_of("-/", firstDelim + 1);
if (secondDelim != std::string::npos) {
year = atoi(cleaned.substr(0, firstDelim).c_str());
month = atoi(cleaned.substr(firstDelim + 1, secondDelim - firstDelim - 1).c_str());
day = atoi(cleaned.substr(secondDelim + 1).c_str());
}
}
else if (cleaned.length() == 8) {
year = atoi(cleaned.substr(0, 4).c_str());
month = atoi(cleaned.substr(4, 2).c_str());
day = atoi(cleaned.substr(6, 2).c_str());
}
if (year > 1900 && year < 2100 && month >= 1 && month <= 12 && day >= 1 && day <= 31) {
char buffer[16];
sprintf_s(buffer, sizeof(buffer), "%04d%02d%02d", year, month, day);
return std::string(buffer);
}
return "";
}
// 处理单个CSV文件并提取指定列的数据
bool ProcessSingleCSVFileForColumn(const std::string& filePath,
std::map<std::string, std::vector<double> >& dateColumnMap,
int columnIdx) {
std::ifstream inFile(filePath.c_str());
if (!inFile.is_open()) {
return false;
}
// 跳过表头
std::string headerLine;
if (!std::getline(inFile, headerLine)) {
inFile.close();
return false;
}
std::string line;
while (std::getline(inFile, line)) {
std::istringstream lineIss(line);
std::string token;
int columnIndex = 0;
std::string dateStr;
double columnValue = 0.0;
bool hasValue = false;
while (std::getline(lineIss, token, ',')) {
std::string cleanedToken = CleanString(token);
if (columnIndex == 0) { // date列
dateStr = ParseDateString(cleanedToken);
}
else if (columnIndex == columnIdx) { // 指定列
if (!cleanedToken.empty()) {
columnValue = atof(cleanedToken.c_str());
hasValue = true;
}
}
columnIndex++;
}
// 只有当日期有效且指定列有值时才添加
if (!dateStr.empty() && hasValue) {
dateColumnMap[dateStr].push_back(columnValue);
}
}
inFile.close();
return true;
}
// 处理单个CSV文件并计算N日涨跌幅
bool ProcessSingleCSVFileForReturns(const std::string& filePath,
std::map<std::string, std::vector<double> >& dateReturnsMap,
int N) {
std::ifstream inFile(filePath.c_str());
if (!inFile.is_open()) {
return false;
}
// 跳过表头
std::string headerLine;
if (!std::getline(inFile, headerLine)) {
inFile.close();
return false;
}
// 预分配向量空间以提高性能
std::vector<std::string> dates;
std::vector<double> closePrices;
dates.reserve(1000); // 预估每只股票约1000个交易日
closePrices.reserve(1000);
std::string line;
while (std::getline(inFile, line)) {
std::istringstream lineIss(line);
std::string token;
int columnIndex = 0;
std::string dateStr;
double closePrice = 0.0;
while (std::getline(lineIss, token, ',')) {
std::string cleanedToken = CleanString(token);
if (columnIndex == 0) { // date列
dateStr = ParseDateString(cleanedToken);
}
else if (columnIndex == 5) { // close列
if (!cleanedToken.empty()) {
closePrice = atof(cleanedToken.c_str());
}
}
columnIndex++;
}
dates.push_back(dateStr);
closePrices.push_back(closePrice);
}
inFile.close();
// 计算N日涨跌幅
for (unsigned int i = N; i < closePrices.size(); i++) {
if (!dates[i].empty() && closePrices[i] > 0 && closePrices[i - N] > 0) {
double returnRate = (closePrices[i] - closePrices[i - N]) / closePrices[i - N] * 100.0;
dateReturnsMap[dates[i]].push_back(returnRate);
}
}
return true;
}
// 优化的文件处理函数 - 用于计算指定列的分位数
void ReadCSVAndCalculateColumnData(const std::string& directory,
std::map<std::string, std::vector<double> >& dateColumnMap,
int columnIdx) {
WIN32_FIND_DATAA findData;
std::string cleanedDirectory = CleanPath(directory);
std::string searchPath = cleanedDirectory + "\\*.csv";
HANDLE hFind = FindFirstFileA(searchPath.c_str(), &findData);
if (hFind == INVALID_HANDLE_VALUE) {
return;
}
do {
if (!(findData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
std::string fileName(findData.cFileName);
std::string filePath = cleanedDirectory + "\\" + fileName;
filePath = CleanPath(filePath);
ProcessSingleCSVFileForColumn(filePath, dateColumnMap, columnIdx);
}
} while (FindNextFileA(hFind, &findData) != 0);
FindClose(hFind);
}
// 优化的文件处理函数 - 用于计算N日涨跌幅
void ReadCSVAndCalculateReturns(const std::string& directory,
std::map<std::string, std::vector<double> >& dateReturnsMap,
int N) {
WIN32_FIND_DATAA findData;
std::string cleanedDirectory = CleanPath(directory);
std::string searchPath = cleanedDirectory + "\\*.csv";
HANDLE hFind = FindFirstFileA(searchPath.c_str(), &findData);
if (hFind == INVALID_HANDLE_VALUE) {
return;
}
do {
if (!(findData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
std::string fileName(findData.cFileName);
std::string filePath = cleanedDirectory + "\\" + fileName;
filePath = CleanPath(filePath);
ProcessSingleCSVFileForReturns(filePath, dateReturnsMap, N);
}
} while (FindNextFileA(hFind, &findData) != 0);
FindClose(hFind);
}
// 计算分位数 - 使用更高效的算法
float CalculateQuantile(const std::vector<double>& data, float quantile) {
if (data.empty()) {
return 0.0f;
}
if (quantile < 0.0f) quantile = 0.0f;
if (quantile > 1.0f) quantile = 1.0f;
// 对于中位数的特殊情况使用快速选择算法
if (quantile == 0.5f) {
std::vector<double> sortedData = data;
size_t n = sortedData.size();
if (n % 2 == 0) {
// 偶数个元素,取中间两个数的平均值
std::nth_element(sortedData.begin(), sortedData.begin() + n / 2 - 1, sortedData.end());
double lower = *(sortedData.begin() + n / 2 - 1);
std::nth_element(sortedData.begin() + n / 2, sortedData.begin() + n / 2, sortedData.end());
double upper = *(sortedData.begin() + n / 2);
return (float)((lower + upper) / 2.0);
}
else {
// 奇数个元素,取中间的数
std::nth_element(sortedData.begin(), sortedData.begin() + n / 2, sortedData.end());
return (float)(*(sortedData.begin() + n / 2));
}
}
else {
// 对于其他分位数使用排序方法
std::vector<double> sortedData = data;
std::sort(sortedData.begin(), sortedData.end());
double pos = quantile * (sortedData.size() - 1);
int lowerIndex = (int)pos;
int upperIndex = lowerIndex + 1;
if (upperIndex >= (int)sortedData.size()) {
return (float)sortedData[lowerIndex];
}
double fractionalPart = pos - lowerIndex;
double quantileValue = sortedData[lowerIndex] + fractionalPart * (sortedData[upperIndex] - sortedData[lowerIndex]);
return (float)quantileValue;
}
}
// 生成缓存键值
std::string GenerateColumnCacheKey(int columnIdx, float quantile) {
char buffer[64];
sprintf_s(buffer, "column_%d_%.2f", columnIdx, quantile);
return std::string(buffer);
}
std::string GenerateReturnsCacheKey(int N, float quantile) {
char buffer[64];
sprintf_s(buffer, "returns_%d_%.2f", N, quantile);
return std::string(buffer);
}
// 函数2 - 计算指定列的分位数
void CalculateColumnQuantileReturns(int DataLen, float* pfOUT, float* pfINa, float* pfINb, float* pfINc)
{
// 初始化输出数组为0
for (int i = 0; i < DataLen; i++) {
pfOUT[i] = 0.0f;
}
// 获取列索引和分位数参数
int columnIdx = (int)(pfINb[0] + 0.5f); // M参数表示列号
float quantile = pfINc[0]; // N参数表示分位数
// 生成缓存键值
std::string cacheKey = GenerateColumnCacheKey(columnIdx, quantile);
// 检查缓存中是否已有计算结果
std::unique_lock<std::mutex> lock(g_cacheMutex);
auto cacheIt = g_columnQuantileCache.find(cacheKey);
bool hasCache = (cacheIt != g_columnQuantileCache.end());
std::map<std::string, float> dateQuantileMap;
if (hasCache) {
dateQuantileMap = cacheIt->second;
}
lock.unlock();
// 如果缓存中没有,则进行计算
if (!hasCache) {
// 设置CSV文件的目录
const std::string csvDirectory = "F:\\His_STOCKDATA";
// 存储日期对应的指定列数据
std::map<std::string, std::vector<double> > dateColumnMap;
// 读取CSV文件并提取指定列的数据
ReadCSVAndCalculateColumnData(csvDirectory, dateColumnMap, columnIdx);
// 计算每个日期的分位数
std::map<std::string, std::vector<double> >::const_iterator it;
for (it = dateColumnMap.begin(); it != dateColumnMap.end(); ++it) {
dateQuantileMap[it->first] = CalculateQuantile(it->second, quantile);
}
// 清理dateColumnMap以释放内存
dateColumnMap.clear();
// 将结果存入缓存
lock.lock();
g_columnQuantileCache[cacheKey] = dateQuantileMap;
lock.unlock();
}
// 将结果匹配到输出数组
for (int i = 0; i < DataLen; i++) {
std::string dateStr = ConvertTDXDateToYYYYMMDD(pfINa[i]);
if (dateStr.empty() || dateStr.length() != 8) {
pfOUT[i] = 0.0f;
continue;
}
std::map<std::string, float>::const_iterator iter = dateQuantileMap.find(dateStr);
if (iter != dateQuantileMap.end()) {
pfOUT[i] = iter->second;
}
else {
pfOUT[i] = 0.0f;
}
}
}
// 函数3 - 快速计算分位数收益
void CalculateQuantileReturns(int DataLen, float* pfOUT, float* pfINa, float* pfINb, float* pfINc)
{
// 初始化输出数组为0
for (int i = 0; i < DataLen; i++) {
pfOUT[i] = 0.0f;
}
// 获取N和M的值
int N = (int)(pfINb[0] + 0.5f);
float M = pfINc[0];
// 生成缓存键值
std::string cacheKey = GenerateReturnsCacheKey(N, M);
// 检查缓存中是否已有计算结果
std::unique_lock<std::mutex> lock(g_cacheMutex);
auto cacheIt = g_returnsQuantileCache.find(cacheKey);
bool hasCache = (cacheIt != g_returnsQuantileCache.end());
std::map<std::string, float> dateQuantileMap;
if (hasCache) {
dateQuantileMap = cacheIt->second;
}
lock.unlock();
// 如果缓存中没有,则进行计算
if (!hasCache) {
// 设置CSV文件的目录
const std::string csvDirectory = "F:\\His_STOCKDATA";
// 存储日期对应的涨跌幅数据
std::map<std::string, std::vector<double> > dateReturnsMap;
// 读取CSV文件并计算N日涨跌幅
ReadCSVAndCalculateReturns(csvDirectory, dateReturnsMap, N);
// 计算每个日期的分位数
std::map<std::string, std::vector<double> >::const_iterator it;
for (it = dateReturnsMap.begin(); it != dateReturnsMap.end(); ++it) {
dateQuantileMap[it->first] = CalculateQuantile(it->second, M);
}
// 清理dateReturnsMap以释放内存
dateReturnsMap.clear();
// 将结果存入缓存
lock.lock();
g_returnsQuantileCache[cacheKey] = dateQuantileMap;
lock.unlock();
}
// 将结果匹配到输出数组
for (int i = 0; i < DataLen; i++) {
std::string dateStr = ConvertTDXDateToYYYYMMDD(pfINa[i]);
if (dateStr.empty() || dateStr.length() != 8) {
pfOUT[i] = 0.0f;
continue;
}
std::map<std::string, float>::const_iterator iter = dateQuantileMap.find(dateStr);
if (iter != dateQuantileMap.end()) {
pfOUT[i] = iter->second;
}
else {
pfOUT[i] = 0.0f;
}
}
}
// 函数注册
PluginTCalcFuncInfo g_CalcFuncSets[] = {
{2, (pPluginFUNC)CalculateColumnQuantileReturns},
{3, (pPluginFUNC)CalculateQuantileReturns},
{0, NULL},
};
BOOL RegisterTdxFunc(PluginTCalcFuncInfo** pFun)
{
if (*pFun == NULL)
{
(*pFun) = g_CalcFuncSets;
return TRUE;
}
return FALSE;
}有哪些优化的地方