C:\Users\gkxk\AppData\Roaming\Python\Python313\site-packages\xtquant\__init__.py:8: UserWarning:
pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
1 初始化
2 拿到数据
***** xtdata连接成功 2025-10-04 02:44:20*****
服务信息: {'tag': 'sp3', 'version': '1.0'}
服务地址: 127.0.0.1:58610
数据路径: C:\storage\software\国金QMT交易端模拟\bin.x64/../userdata_mini/datadir
设置xtdata.enable_hello = False可隐藏此消息
3 转化数据
3.1 转化为multiindex
def convert_to_multiindex(data):
"""
将xtquant获取的行情数据转换为multiindex格式的DataFrame
参数:
data: dict, 包含时间序列数据的字典
返回:
pd.DataFrame: 具有多级索引(标的, 日期)的DataFrame
"""
# 获取标的代码
symbols = data['time'].index.tolist()
# 准备数据容器
dfs = []
# 遍历每个标的
for symbol in symbols:
# 提取时间列并转换为datetime
timestamps = data['time'].loc[symbol].values
dates = pd.to_datetime(timestamps, unit='ms').strftime('%Y%m%d')
# 为当前标的创建DataFrame
symbol_data = {}
for key in data.keys():
if key != 'time': # 时间列已经处理
symbol_data[key] = data[key].loc[symbol].values
df = pd.DataFrame(symbol_data, index=dates)
df.index.name = 'date'
df['symbol'] = symbol
dfs.append(df)
# 合并所有标的的数据
result = pd.concat(dfs)
# 设置多级索引
result.set_index('symbol', append=True, inplace=True)
result = result.swaplevel('symbol', 'date').sort_index()
return result
df = convert_to_multiindex(data)
df
open | high | low | close | volume | amount | settelementPrice | openInterest | preClose | suspendFlag | ||
---|---|---|---|---|---|---|---|---|---|---|---|
symbol | date | ||||||||||
513300.SH | 20240529 | 1.722 | 1.723 | 1.715 | 1.717 | 1147389 | 197178032.0 | 0.0 | 15 | 1.738 | 0 |
20240530 | 1.701 | 1.713 | 1.699 | 1.713 | 1180885 | 201524147.0 | 0.0 | 15 | 1.717 | 0 | |
20240602 | 1.720 | 1.723 | 1.715 | 1.720 | 1921837 | 330476180.0 | 0.0 | 15 | 1.713 | 0 | |
20240603 | 1.720 | 1.721 | 1.714 | 1.716 | 620769 | 106668071.0 | 0.0 | 15 | 1.720 | 0 | |
20240604 | 1.725 | 1.730 | 1.723 | 1.729 | 913487 | 157834741.0 | 0.0 | 15 | 1.716 | 0 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | |
20250903 | 2.099 | 2.099 | 2.092 | 2.094 | 2363893 | 495212539.0 | 0.0 | 15 | 2.086 | 0 | |
20250904 | 2.113 | 2.118 | 2.113 | 2.117 | 2376351 | 502701515.0 | 0.0 | 15 | 2.094 | 0 | |
20250907 | 2.115 | 2.116 | 2.112 | 2.113 | 1650252 | 348847553.0 | 0.0 | 15 | 2.117 | 0 | |
20250908 | 2.115 | 2.120 | 2.113 | 2.119 | 2108937 | 446304333.0 | 0.0 | 15 | 2.113 | 0 | |
20250909 | 2.123 | 2.129 | 2.122 | 2.128 | 1738874 | 369720487.0 | 0.0 | 15 | 2.119 | 0 |
314 rows × 10 columns
3.2 转化为标准列名和类型
def convert_to_standard(df):
"""
将multiindex格式的DataFrame转换为标准列名和类型
参数:
df: pd.DataFrame, 具有多级索引(标的, 日期)的DataFrame
返回:
pd.DataFrame: 具有标准列名和类型的DataFrame
"""
# 确保列名标准化
df.columns = [col.lower() for col in df.columns]
# 确保数据类型标准化
for col in df.select_dtypes(include=['float']).columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df
# standard_df = convert_to_standard(df)
# standard_df