我开发了下面的代码来检查三个人的组是否同时连接
import pandas as pd
from itertools import combinations
data = {
'User': ['Esther','Jonh', 'Ann', 'Alex', 'Jonh', 'Alex', 'Ann', 'Beatrix'],
'InitialTime': ['01/01/2023 00:00:00','01/01/2023 00:00:00', '01/01/2023 00:00:05', '01/01/2023 00:00:07', '01/01/2023 00:00:12', '01/01/2023 00:00:14', '01/01/2023 00:00:15', '01/01/2023 00:00:16'],
'FinalTime': ['01/01/2023 00:10:00','01/01/2023 00:00:10', '01/01/2023 00:00:12', '01/01/2023 00:00:12','01/01/2023 00:00:16', '01/01/2023 00:00:16', '01/01/2023 00:00:17', '01/01/2023 00:00:17']
}
df=pd.DataFrame(data)
def calculate_overlapped_time(df):
df['InitialTime'] = pd.to_datetime(df['InitialTime'], format='%d/%m/%Y %H:%M:%S')
df['FinalTime'] = pd.to_datetime(df['FinalTime'], format='%d/%m/%Y %H:%M:%S')
overlapped_time = {}
for i, row_i in df.iterrows():
for j, row_j in df.iterrows():
for k, row_k in df.iterrows():
if i != j and i != k and j != k:
initial_time = max(row_i['InitialTime'], row_j['InitialTime'], row_k['InitialTime'])
final_time = min(row_i['FinalTime'], row_j['FinalTime'], row_k['FinalTime'])
superposicion = max(0, (final_time - initial_time).total_seconds())
clave = f"{row_i['User']}-{row_j['User']}-{row_k['User']}"
if clave not in overlapped_time:
overlapped_time[clave] = 0
overlapped_time[clave] += superposicion
results = pd.DataFrame(list(overlapped_time.items()), columns=['Group', 'OverlappingTime'])
results['OverlappingTime'] = results['OverlappingTime'].astype(int)
return results
results_df = calculate_overlapped_time(df)
字符串
我想计算大约10人一组的循环时间,因此,有这么多重叠循环的代码变得不切实际。
有没有人能告诉我,是否有一种替代方案,使这段代码更具可扩展性,能够找到更大规模的组,而无需for循环?
1条答案
按热度按时间eimct9ow1#
看起来你只是从同一个DataFrame中提取行的组合。在这种情况下,你可以只使用
itertools.combination
,只使用一个循环:字符串