544 eff.c:1761处loop vect 分析
2.6 带有mask的向量数学函数
gcc 支持的svml向量数学函数
32652 GCC currently emits calls to @code{vmldExp2}, 32653 @code{vmldLn2}, @code{vmldLog102}, @code{vmldPow2}, 32654 @code{vmldTanh2}, @code{vmldTan2}, @code{vmldAtan2}, @code{vmldAtanh2}, 32655 @code{vmldCbrt2}, @code{vmldSinh2}, @code{vmldSin2}, @code{vmldAsinh2}, 32656 @code{vmldAsin2}, @code{vmldCosh2}, @code{vmldCos2}, @code{vmldAcosh2}, 32657 @code{vmldAcos2}, @code{vmlsExp4}, @code{vmlsLn4}, 32658 @code{vmlsLog104}, @code{vmlsPow4}, @code{vmlsTanh4}, @code{vmlsTan4}, 32659 @code{vmlsAtan4}, @code{vmlsAtanh4}, @code{vmlsCbrt4}, @code{vmlsSinh4}, 32660 @code{vmlsSin4}, @code{vmlsAsinh4}, @code{vmlsAsin4}, @code{vmlsCosh4}, 32661 @code{vmlsCos4}, @code{vmlsAcosh4} and @code{vmlsAcos4} for corresponding 32662 function type when @option{-mveclibabi=svml} is used |
oneapi的IR:
%
3970
= call fast cc104 <
4
x
double
>
@__svml_log4_mask
(<
4
x
double
> %
3968
, <
4
x i64> %
3969
)
gcc的IR : _799 = _ZGVdN4v_logD.6143 (_800);
<__svml_log4_mask_e9>汇编代码的函数原名。
从如何调用不带mask的svml向量数学函数的流程出发,找出调用带有mask的方法。
设计方案:
vect__ifc__1252.1526_717 = VEC_COND_EXPR <mask__1460.1449_910, vect__1761.1465_870, { 0.0, 0.0 }>; 找到一个VEC_COND_EXPR,在同一个基本块中,根据第二个或者第三个参数所涉及到的运算(建立一个栈暂存每次找到的结果),顺着运算的关系一步步往上找,直到找到了需要进行mask的数学函数。如果在第二个参数中找到,VEC_COND_EXPR中的第一个参数mask就是数学函数需要进行mask的值。如果在第三个参数的关系链中找到,其所需的mask就是VEC_COND_EXPR中的mask的取反。将数学函数和mask一起生成带有mask的数学函数的IR,替换掉原来的不带mask的。(在生成cond_expr之后做还是在loop vect pass之后另外新建一个pass做。)
#include "config.h"2 #include "system.h"3 #include "coretypes.h"4 #include "backend.h"5 #include "tree.h"6 #include "gimple.h"7 #include "predict.h"8 #include "tree-pass.h"9 #include "ssa.h"10 #include "cgraph.h"11 #include "fold-const.h"12 #include "stor-layout.h"13 #include "gimple-iterator.h"14 #include "gimple-walk.h"15 #include "tree-ssa-loop-manip.h"16 #include "tree-ssa-loop-niter.h"17 #include "tree-cfg.h"18 #include "cfgloop.h"19 #include "tree-vectorizer.h"20 #include "tree-ssa-propagate.h"21 #include "dbgcnt.h"22 #include "tree-scalar-evolution.h"23 #include "stringpool.h"24 #include "attribs.h"25 #include "gimple-pretty-print.h"26 #include "opt-problem.h"27 #include "internal-fn.h"28 #include "tree-ssa-sccvn.h"29 #include "gimple-expr.h"30 #include <cstdio>31 32 namespace33 {34 const pass_data pass_data_test = {35 GIMPLE_PASS, /* type */36 "mask_vecmath_func", /* name */37 OPTGROUP_NONE, /* optinfo_flags */38 TV_TREE_VECT_MASK_VECMATH_FUNC, /* tv_id */39 (PROP_cfg | PROP_ssa), /* properties_required */40 0, /* properties_provided */41 0, /* properties_destroyed */42 0, /* todo_flags_start */43 0, /* todo_flags_finish */44 };46 class pass_mask_vecmath_func : public gimple_opt_pass47 {48 public:49 pass_mask_vecmath_func (gcc::context *ctxt) : gimple_opt_pass (pass_data_test, ctxt) {}50 virtual bool51 gate (function *fun)52 {53 // printf ("gate function noipa.\n");54 return flag_tree_mask_vecmath_func;55 }56 57 virtual unsigned int execute (function *);58 };59 60 61 static gimple *find_relate_operand(tree operand, gimple *stmt)62 {63 if (!stmt)64 return NULL;65 66 if (TREE_CODE (operand) == SSA_NAME && is_gimple_call(stmt)) { // operand is ssa && stmt is gimple call67 tree fndecl = gimple_call_fndecl(stmt); // 获取函数声明68 if (fndecl && DECL_P(fndecl)) { // 确保fndecl有效并且是一个声明69 const char *func_name = IDENTIFIER_POINTER(DECL_NAME(fndecl)); // 获取函数名称70 // if (strcmp(func_name, "vmldLn2") == 0) {71 if (strcmp(func_name, "__svml_log4_mask_e9") == 0) {72 return stmt;73 }74 }75 }76 if (TREE_CODE (operand) == SSA_NAME && is_gimple_assign(stmt)) { // only find gimple assign77 78 for (unsigned i = 1; i < gimple_num_ops(stmt); ++i) { // get gimple assign right hand side operand79 tree op = gimple_op(stmt, i);80 if(TREE_CODE (op) == SSA_NAME) {81 82 gimple *stmt_2 = SSA_NAME_DEF_STMT (op);83 gimple *result = find_relate_operand(op,stmt_2);84 if(result) return result;85 }86 }87 }88 return NULL;89 }90 91 static void add_mask_to_call(gimple *stmt, tree new_arg) {92 if (!is_gimple_call(stmt)) {93 // 如果不是函数调用语句,则不做任何操作94 return;95 }96 97 // 获取原始函数调用的目标和参数列表98 tree call_fn = gimple_call_fndecl(stmt);99 100 // 获取或创建新的标识符节点来表示新的函数名称 101 // tree new_func_id = get_identifier("vmldLn2Mask"); 102 tree new_func_id = get_identifier("__svml_log4_mask_e9"); 103 tree fntype = TREE_TYPE(call_fn); 104 105 tree new_fndecl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, new_func_id, fntype); 106 107 TREE_PUBLIC (new_fndecl) = 1; 108 DECL_EXTERNAL (new_fndecl) = 1; 109 DECL_IS_NOVOPS (new_fndecl) = 1; 110 TREE_READONLY (new_fndecl) = 1; 111 112 113 // 将新的标识符节点分配给函数声明的汇编名 114 // DECL_ASSEMBLER_NAME(call_fn) = new_func_id; 115 116 int num_args = gimple_call_num_args(stmt); 117 vec<tree> vargs = vNULL; 118 vargs.create (num_args+1); 119 120 // 创建一个新的参数列表,包含原始的参数和新的参数 121 for (int i = 0; i < num_args; i++) { 122 tree arg = gimple_call_arg(stmt, i); 123 vargs.safe_push(arg); 124 } 125 vargs.safe_push(new_arg); 126 127 tree lhs = gimple_call_lhs(stmt); 128 129 // 创建新的函数调用语句,包含新的参数 130 gimple *new_call = gimple_build_call_vec(new_fndecl,vargs); 131 gimple_call_set_lhs (new_call, lhs); 132 133 // 替换原始的函数调用语句 134 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 135 136 // printf ("-------------finish add mask to vecmath func call------------.\n"); 137 138 gsi_replace(&gsi, new_call,true); 139 stmt = new_call; 140 141 // 释放参数列表的内存 142 vargs.release (); 143 } 144 145 unsigned 146 pass_mask_vecmath_func::execute (function *fun) 147 { 148 unsigned ret = 0; 149 150 // printf ("-----------begin mask vecmath func------------.\n"); 151 // printf ("current function name:%s\n", function_name (fun)); 152 basic_block bb; 153 enum tree_code code; 154 155 // 遍历所有基本块 156 FOR_EACH_BB_FN(bb, fun) { 157 gimple_stmt_iterator gsi; 158 159 // 遍历基本块中的所有 GIMPLE 语句 160 for (gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) { 161 gimple *stmt = gsi_stmt(gsi); 162 if (is_gimple_assign(stmt)) { 163 164 gassign *stmt_assign = dyn_cast <gassign *> (gsi_stmt (gsi)); 165 code = gimple_assign_rhs_code (stmt_assign); 166 167 // 检查语句是否为 VEC_COND_EXPR 168 if (code == VEC_COND_EXPR) { 169 170 // printf ("-----------find out vec cond expr------------.\n"); 171 tree true_vector_operand = gimple_assign_rhs2(stmt_assign); // add wrong vec operand 172 tree mask_operand = gimple_assign_rhs1(stmt_assign); 173 if(TREE_CODE (true_vector_operand) == SSA_NAME) { 174 175 gimple *stmt_def = SSA_NAME_DEF_STMT (true_vector_operand); 176 gimple *stmt_vecmath = find_relate_operand(true_vector_operand,stmt_def); 177 if(stmt_vecmath) { 178 // printf ("-----------find out vecmath stmt------------.\n"); 179 add_mask_to_call(stmt_vecmath,mask_operand); 180 181 } 182 } 183 } 184 } 185 } 186 } 187 return ret; 188 } 189 } 190 191 gimple_opt_pass * 192 make_pass_mask_vecmath_func (gcc::context *ctxt) 193 { 194 return new pass_mask_vecmath_func (ctxt); 195 } |
生成了正确的IR之后,使用buitlin的方式调用svml中的带有mask的数学函数。
gcc调用svml函数在gimple阶段的过程:
1:examining statement:
vect_analyze_stmt函数中检查stmt, 在vectorizable_xxx函数里面判断操作数的类型。vect_is_simple_use: 计算向量化的cost, vect_model_simple_cost,先不进行transform。
调用svml需要使用target-specific built-in function,使用此函数targetm.vectorize.builtin_vectorized_function,根据优化选项(config/i386/i386-options.cc:2567)定位到(ix86_veclib_handler = &ix86_veclibabi_svml)后端ix86_veclibabi_svml函数处,返回向量svml函数fndecl。
2:vectorizing statement:
vect_transform_loop_stmt函数中,进行transform,同样也会调用vectorizable_xxx函数进行此转化。gimple_build_call_vec (fndecl, vargs):根据获取到的fndecl以及对参数的向量化,构建一个新的gimple vec call。
loop vec pass的调用栈
vect_analyze_loop_2:
Apply a set of analyses on LOOP, and create a loop_vec_info struct for it. The different analyses will record information in the loop_vec_info struct
loop_vec_info 里面放的是对loop 分析完成后的整个loop的信息
vect_analyze_loop_operations:
Scan the loop stmts and make sure they are all vectorizable.
vect_analyze_stmt:
Make sure the statement is vectorizable.
ziyuan 2.3 和 2.4修改对于其他课题的影响 aggressive_if_conv && use_gather_2parts result.xlsx 采用HygonGCC 1.3.2编译器最新版本 和最新配置文件Hygon7490-2p-HygonGCC1.3.2.202403-hgalloc-znver1-base.cfg
跑1copy的时候整个node最好不要跑其他程序,不然性能数据会波动较大。会抢占node的内存等资源。
可能优化的方向:
- gcc调用svml向量数学库的接口函数只能支持128bit的输入。修改接口调用256bit的输入。
- -mtune-ctrl=^avx256_split_regs,^avx128_optimal,256_unaligned_store_optimal可以使程序使用256bit的ymm寄存器,提高循环向量化的vf,对性能有提高2069 4%,1761:8%。
- oneapi使用将条件和条件里面的计算分别放在不同的bb块中,通过控制流来选择需要执行哪些分支,可以减少冗余运算。Gcc向量化只能在同一个bb块中进行,无法控制每个分支,只支持在log函数上进行mask操作,和最终运算的结果上进行选择,其他操作- + *等只能在支持avx512的机器上。只能想办法在gcc上也进行将不同分支分为不同bb块的操作,模仿oneapi。
- gcc上的vf是8,使用两次log4,oneapi的vf为4,使用一次log4,通过将i32扩展为i64,使用256bit ymm,尝试将gcc变为vf4使用一次log4,使用相似的方法,未能成功。并且怀疑3才是性能的主要点,此操作应该不是性能的主要点。
5. gcc循环向量化无法处理跨bb的问题,如果向量化后拆分成不同bb,后续的pass可能无法处理会对拆分的bb做一些未知的操作,不建议使用此方法,可以在原有的bb里面插入一些 根据mask进行选择的指令,来模拟分支选择的操作。
void calc(double *src1,double *src2,double *src3)5 {6 int i;7 for(i=0;i<100;i++)8 {9 if(src3[i] > 10.0)10 {11 src1[i] = exp(src2[i]);12 }13 else if(src3[i] > 5.0)14 {15 src1[i] = log(src2[i]);16 }17 else if(src3[i] > 2.5)18 {19 src1[i] = sin(src2[i]);20 }21 }22 } |
对于有mask store的操作,会将if-conversion操作进行回退。optimize_mask_stores
1:新建一个对mask进行判断是否全为0的GIMPLE_COND。
2:新建一个then bb块,并且维护其边。
3:在mask store后分割一个新的bb,并且把stmt全部移到bb里面,新建一个边。
create_basic_block_1 (void *head, void *end, basic_block after):
int vf为4,double vf 为2.
test_mask_vecmath.c:13:18: note: === vect_determine_vectorization_factor ===681 test_mask_vecmath.c:13:18: note: ==> examining phi: i_114 = PHI <i_85(20), 0(35)>682 test_mask_vecmath.c:13:18: note: ==> examining phi: sumi1_115 = PHI <_136(20), 0.0(35)>683 test_mask_vecmath.c:13:18: note: get vectype for scalar type: double684 test_mask_vecmath.c:13:18: note: vectype: vector(2) double685 test_mask_vecmath.c:13:18: note: nunits = 2686 test_mask_vecmath.c:13:18: note: ==> examining phi: sumi2_117 = PHI <_138(20), 0.0(35)>687 test_mask_vecmath.c:13:18: note: get vectype for scalar type: double688 test_mask_vecmath.c:13:18: note: vectype: vector(2) double689 test_mask_vecmath.c:13:18: note: nunits = 2690 test_mask_vecmath.c:13:18: note: ==> examining phi: sumi3_119 = PHI <_140(20), 0.0(35)>691 test_mask_vecmath.c:13:18: note: get vectype for scalar type: double692 test_mask_vecmath.c:13:18: note: vectype: vector(2) double693 test_mask_vecmath.c:13:18: note: nunits = 2694 test_mask_vecmath.c:13:18: note: ==> examining phi: ivtmp_106 = PHI <ivtmp_101(20), 100(35)>695 test_mask_vecmath.c:13:18: note: ==> examining statement: _62 = (long unsigned int) i_114;696 test_mask_vecmath.c:13:18: note: skip.697 test_mask_vecmath.c:13:18: note: ==> examining statement: _63 = _62 * 4;698 test_mask_vecmath.c:13:18: note: skip.699 test_mask_vecmath.c:13:18: note: ==> examining pattern def stmt: patt_151 = i_114 w* 4;700 test_mask_vecmath.c:13:18: note: skip.701 test_mask_vecmath.c:13:18: note: ==> examining pattern statement: patt_152 = (long unsigned int) patt_151;702 test_mask_vecmath.c:13:18: note: skip.703 test_mask_vecmath.c:13:18: note: ==> examining statement: _64 = &src3 + _63;704 test_mask_vecmath.c:13:18: note: skip.705 test_mask_vecmath.c:13:18: note: ==> examining statement: j_65 = *_64;706 test_mask_vecmath.c:13:18: note: precomputed vectype: vector(4) int707 test_mask_vecmath.c:13:18: note: nunits = 4708 test_mask_vecmath.c:13:18: note: ==> examining statement: _66 = (long unsigned int) j_65;709 test_mask_vecmath.c:13:18: note: skip.710 test_mask_vecmath.c:13:18: note: ==> examining statement: _67 = _66 * 8;711 test_mask_vecmath.c:13:18: note: skip.712 test_mask_vecmath.c:13:18: note: ==> examining pattern def stmt: patt_153 = j_65 w* 8;713 test_mask_vecmath.c:13:18: note: skip.714 test_mask_vecmath.c:13:18: note: ==> examining pattern statement: patt_154 = (long unsigned int) patt_153;715 test_mask_vecmath.c:13:18: note: skip.716 test_mask_vecmath.c:13:18: note: ==> examining statement: _142 = _141 + _67;717 test_mask_vecmath.c:13:18: note: skip. test_mask_vecmath.c:13:18: note: ==> examining statement: _68 = (double *) _142;719 test_mask_vecmath.c:13:18: note: skip.720 test_mask_vecmath.c:13:18: note: ==> examining statement: _143 = j_65 > 10;721 test_mask_vecmath.c:13:18: note: vectype: vector(4) <signed-boolean:32>722 test_mask_vecmath.c:13:18: note: nunits = 4723 test_mask_vecmath.c:13:18: note: ==> examining statement: _69 = .MASK_LOAD (_68, 64B, _143);724 test_mask_vecmath.c:13:18: note: skip.725 test_mask_vecmath.c:13:18: note: ==> examining pattern def stmt: patt_155 = (<signed-boolean:64>) _143;726 test_mask_vecmath.c:13:18: note: precomputed vectype: vector(2) <signed-boolean:64>727 test_mask_vecmath.c:13:18: note: nunits = 2728 test_mask_vecmath.c:13:18: note: ==> examining pattern statement: patt_156 = .MASK_LOAD (_68, 64B, patt_155);729 test_mask_vecmath.c:13:18: note: precomputed vectype: vector(2) double730 test_mask_vecmath.c:13:18: note: nunits = 2731 test_mask_vecmath.c:13:18: note: ==> examining statement: _70 = log (_69);732 test_mask_vecmath.c:13:18: note: get vectype for scalar type: double733 test_mask_vecmath.c:13:18: note: vectype: vector(2) double734 test_mask_vecmath.c:13:18: note: nunits = 2735 test_mask_vecmath.c:13:18: note: ==> examining statement: _89 = (unsigned int) j_65;736 test_mask_vecmath.c:13:18: note: get vectype for scalar type: unsigned int737 test_mask_vecmath.c:13:18: note: vectype: vector(4) unsigned int738 test_mask_vecmath.c:13:18: note: nunits = 4739 test_mask_vecmath.c:13:18: note: ==> examining statement: _87 = _89 + 4294967288;740 test_mask_vecmath.c:13:18: note: get vectype for scalar type: unsigned int741 test_mask_vecmath.c:13:18: note: vectype: vector(4) unsigned int742 test_mask_vecmath.c:13:18: note: nunits = 4743 test_mask_vecmath.c:13:18: note: ==> examining statement: _73 = _62 * 8;744 test_mask_vecmath.c:13:18: note: skip.745 test_mask_vecmath.c:13:18: note: ==> examining pattern def stmt: patt_157 = i_114 w* 8;746 test_mask_vecmath.c:13:18: note: skip.747 test_mask_vecmath.c:13:18: note: ==> examining pattern statement: patt_158 = (long unsigned int) patt_157;748 test_mask_vecmath.c:13:18: note: skip.749 test_mask_vecmath.c:13:18: note: ==> examining statement: _145 = _73 + _141;750 test_mask_vecmath.c:13:18: note: skip.751 test_mask_vecmath.c:13:18: note: ==> examining statement: _74 = (double *) _145;752 test_mask_vecmath.c:13:18: note: skip.753 test_mask_vecmath.c:13:18: note: ==> examining statement: _146 = _87 <= 2;754 test_mask_vecmath.c:13:18: note: vectype: vector(4) <signed-boolean:32>755 test_mask_vecmath.c:13:18: note: nunits = 4756 test_mask_vecmath.c:13:18: note: ==> examining statement: _75 = .MASK_LOAD (_74, 64B, _146);757 test_mask_vecmath.c:13:18: note: skip.758 test_mask_vecmath.c:13:18: note: ==> examining pattern def stmt: patt_159 = (<signed-boolean:64>) _146;759 test_mask_vecmath.c:13:18: note: precomputed vectype: vector(2) <signed-boolean:64>760 test_mask_vecmath.c:13:18: note: nunits = 2 761 test_mask_vecmath.c:13:18: note: ==> examining pattern statement: patt_160 = .MASK_LOAD (_74, 64B, patt_159);762 test_mask_vecmath.c:13:18: note: precomputed vectype: vector(2) double763 test_mask_vecmath.c:13:18: note: nunits = 2764 test_mask_vecmath.c:13:18: note: ==> examining statement: _76 = log (_75);765 test_mask_vecmath.c:13:18: note: get vectype for scalar type: double766 test_mask_vecmath.c:13:18: note: vectype: vector(2) double767 test_mask_vecmath.c:13:18: note: nunits = 2768 test_mask_vecmath.c:13:18: note: ==> examining statement: _148 = _73 + _147;769 test_mask_vecmath.c:13:18: note: skip.770 test_mask_vecmath.c:13:18: note: ==> examining statement: _80 = (double *) _148;771 test_mask_vecmath.c:13:18: note: skip.772 test_mask_vecmath.c:13:18: note: ==> examining statement: _149 = j_65 == 7;773 test_mask_vecmath.c:13:18: note: vectype: vector(4) <signed-boolean:32>774 test_mask_vecmath.c:13:18: note: nunits = 4775 test_mask_vecmath.c:13:18: note: ==> examining statement: _81 = .MASK_LOAD (_80, 64B, _149);776 test_mask_vecmath.c:13:18: note: skip.777 test_mask_vecmath.c:13:18: note: ==> examining pattern def stmt: patt_161 = (<signed-boolean:64>) _149;778 test_mask_vecmath.c:13:18: note: precomputed vectype: vector(2) <signed-boolean:64>779 test_mask_vecmath.c:13:18: note: nunits = 2780 test_mask_vecmath.c:13:18: note: ==> examining pattern statement: patt_162 = .MASK_LOAD (_80, 64B, patt_161);781 test_mask_vecmath.c:13:18: note: precomputed vectype: vector(2) double782 test_mask_vecmath.c:13:18: note: nunits = 2783 test_mask_vecmath.c:13:18: note: ==> examining statement: _82 = log (_81);784 test_mask_vecmath.c:13:18: note: get vectype for scalar type: double785 test_mask_vecmath.c:13:18: note: vectype: vector(2) double786 test_mask_vecmath.c:13:18: note: nunits = 2787 test_mask_vecmath.c:13:18: note: ==> examining statement: _ifc__135 = j_65 > 10 ? _70 : 0.0;788 test_mask_vecmath.c:13:18: note: skip.789 test_mask_vecmath.c:13:18: note: ==> examining pattern def stmt: patt_163 = j_65 > 10;790 test_mask_vecmath.c:13:18: note: precomputed vectype: vector(4) <signed-boolean:32>791 test_mask_vecmath.c:13:18: note: nunits = 4792 test_mask_vecmath.c:13:18: note: ==> examining pattern def stmt: patt_164 = (<signed-boolean:64>) patt_163;793 test_mask_vecmath.c:13:18: note: precomputed vectype: vector(2) <signed-boolean:64>794 test_mask_vecmath.c:13:18: note: nunits = 2795 test_mask_vecmath.c:13:18: note: ==> examining pattern statement: patt_165 = patt_164 ? _70 : 0.0; 796 test_mask_vecmath.c:13:18: note: precomputed vectype: vector(2) double797 test_mask_vecmath.c:13:18: note: nunits = 2798 test_mask_vecmath.c:13:18: note: ==> examining statement: _136 = sumi1_115 + _ifc__135;799 test_mask_vecmath.c:13:18: note: get vectype for scalar type: double800 test_mask_vecmath.c:13:18: note: vectype: vector(2) double801 test_mask_vecmath.c:13:18: note: nunits = 2802 test_mask_vecmath.c:13:18: note: ==> examining statement: _ifc__137 = _87 <= 2 ? _76 : 0.0;803 test_mask_vecmath.c:13:18: note: skip.804 test_mask_vecmath.c:13:18: note: ==> examining pattern def stmt: patt_166 = _87 <= 2;805 test_mask_vecmath.c:13:18: note: precomputed vectype: vector(4) <signed-boolean:32>806 test_mask_vecmath.c:13:18: note: nunits = 4807 test_mask_vecmath.c:13:18: note: ==> examining pattern def stmt: patt_167 = (<signed-boolean:64>) patt_166;808 test_mask_vecmath.c:13:18: note: precomputed vectype: vector(2) <signed-boolean:64>809 test_mask_vecmath.c:13:18: note: nunits = 2810 test_mask_vecmath.c:13:18: note: ==> examining pattern statement: patt_168 = patt_167 ? _76 : 0.0;811 test_mask_vecmath.c:13:18: note: precomputed vectype: vector(2) double812 test_mask_vecmath.c:13:18: note: nunits = 2813 test_mask_vecmath.c:13:18: note: ==> examining statement: _138 = sumi2_117 + _ifc__137;814 test_mask_vecmath.c:13:18: note: get vectype for scalar type: double815 test_mask_vecmath.c:13:18: note: vectype: vector(2) double816 test_mask_vecmath.c:13:18: note: nunits = 2817 test_mask_vecmath.c:13:18: note: ==> examining statement: _ifc__139 = j_65 == 7 ? _82 : 0.0;818 test_mask_vecmath.c:13:18: note: skip.819 test_mask_vecmath.c:13:18: note: ==> examining pattern def stmt: patt_169 = j_65 == 7;820 test_mask_vecmath.c:13:18: note: precomputed vectype: vector(4) <signed-boolean:32>821 test_mask_vecmath.c:13:18: note: nunits = 4822 test_mask_vecmath.c:13:18: note: ==> examining pattern def stmt: patt_170 = (<signed-boolean:64>) patt_169;823 test_mask_vecmath.c:13:18: note: precomputed vectype: vector(2) <signed-boolean:64>824 test_mask_vecmath.c:13:18: note: nunits = 2825 test_mask_vecmath.c:13:18: note: ==> examining pattern statement: patt_171 = patt_170 ? _82 : 0.0;826 test_mask_vecmath.c:13:18: note: precomputed vectype: vector(2) double827 test_mask_vecmath.c:13:18: note: nunits = 2828 test_mask_vecmath.c:13:18: note: ==> examining statement: _140 = sumi3_119 + _ifc__139;829 test_mask_vecmath.c:13:18: note: get vectype for scalar type: double830 test_mask_vecmath.c:13:18: note: vectype: vector(2) double831 test_mask_vecmath.c:13:18: note: nunits = 2832 test_mask_vecmath.c:13:18: note: ==> examining statement: i_85 = i_114 + 1;833 test_mask_vecmath.c:13:18: note: skip.834 test_mask_vecmath.c:13:18: note: ==> examining statement: ivtmp_101 = ivtmp_106 - 1;835 test_mask_vecmath.c:13:18: note: skip.836 test_mask_vecmath.c:13:18: note: ==> examining statement: if (ivtmp_101 != 0)837 test_mask_vecmath.c:13:18: note: skip.838 test_mask_vecmath.c:13:18: note: vectorization factor = 4 |
既有int 也有double的loop
#include<stdio.h>2 #include<math.h>3 #include<stdlib.h>4 void calc(double *src1,double *src2,int *src3)5 {6 int i;7 int j;8 double sumi = 0;9 double sumi1 = 0;10 double sumi2 = 0;11 double sumi3 = 0;12 double sumi_temp[100];13 for(i=0;i<100;i++)14 {15 j = src3[i];16 if(src3[i] > 10)17 {18 // src1[i] = exp(src2[j]);19 sumi1 += log(src2[j]);20 // sumi = exp(src3[i]);21 // sumi += 2;22 }23 else if(src3[i] > 7)24 {25 // src1[i] = log(src2[j]);26 // sumi = log(src2[j]);27 sumi2 += log(src2[i]);28 // sumi += 3;29 }30 31 else if(src3[i] > 6)32 {33 // src1[i] = sin(src2[j]);34 sumi3 += log(src1[i]);35 // sumi += 2;36 }37 }38 /* for(int i=0;i<100;i++) {39 sumi+=src1[i];40 }*/41 sumi = sumi1 + sumi2 + sumi3;42 printf("sumi is %lf\n",sumi);43 44 }46 int main()47 {48 srand(12);49 double src1[100];50 double src2[100];51 // double src3[100];52 int src3[100];53 double rand_double_min2 = 5.0;54 double rand_double_max2 = 15.0;55 56 int rand_int_min2 = 5;57 int rand_int_max2 = 15;58 59 for(int k = 0;k<100;k++) {60 src1[k] = rand_double_min2+1.0 * rand() / RAND_MAX * ( rand_double_max2 - rand_double_min2 );61 src2[k] = rand_double_min2+1.0 * rand() / RAND_MAX * ( rand_double_max2 - rand_double_min2 );62 // src3[k] = rand_double_min2+1.0 * rand() / RAND_MAX * ( rand_double_max2 - rand_double_min2 );63 }64 for(int k = 0;k<100;k++) {65 src3[k] = rand_int_min2+ rand() % ( rand_int_max2 - rand_int_min2 );66 }67 68 for(int k = 0;k<100;k++) {69 printf("src1 is %lf ",src1[k]);70 }71 calc(src1,src2,src3);72 double res= 0;73 for(int m = 0;m<100;m++) {74 res += src1[m];75 }76 printf("res is %lf\n",res);77 return 0;78 } |
bb分块
COUNT:1604735257<bb 78>: # # RANGE [0, 2147483647] NONZERO 2147483647 k_3019 = PHI <k_1827(216), 0(301)> # temp0_1543 = PHI <_1251(216), 0.0(301)> # temp1_2883 = PHI <_1249(216), 0.0(301)> # temp2_224 = PHI <_1247(216), 0.0(301)> # temp3_2699 = PHI <_1245(216), 0.0(301)> # temp4_1545 = PHI <_1243(216), 0.0(301)> # vect_temp0_1543.1410_1003 = PHI <vect__1251.1527_708(216), { 0.0, 0.0, 0.0, 0.0 }(301)> # vect_temp1_2883.1411_1002 = PHI <vect__1249.1530_701(216), { 0.0, 0.0, 0.0, 0.0 }(301)> # vect_temp2_224.1412_1001 = PHI <vect__1247.1533_694(216), { 0.0, 0.0, 0.0, 0.0 }(301)> # vect_temp3_2699.1413_1000 = PHI <vect__1245.1536_687(216), { 0.0, 0.0, 0.0, 0.0 }(301)> # vect_temp4_1545.1414_999 = PHI <vect__1243.1539_670(216), { 0.0, 0.0, 0.0, 0.0 }(301)> # # PT = nonlocal escaped null # ALIGN = 4, MISALIGN = 0 vectp.1415_998 = PHI <vectp.1415_997(216), _1703(301)> # ivtmp_667 = PHI <ivtmp_666(216), 0(301)> # DEBUG temp4D.7772 => NULL # DEBUG temp3D.7771 => NULL # DEBUG temp2D.7770 => NULL # DEBUG temp1D.7769 => NULL # DEBUG temp0D.7768 => NULL # DEBUG kD.7615 => NULL # DEBUG BEGIN_STMT # DEBUG BEGIN_STMT # RANGE [0, 2147483646] NONZERO 2147483647 _1705 = (long unsigned intD.10) k_3019; # RANGE [0, 8589934584] NONZERO 8589934588 _1706 = _1705 * 4; # PT = nonlocal escaped null _1707 = _1703 + _1706; # VUSE <.MEM_2600> vect_j_1708.1417_996 = MEM <vector(8) intD.6> [(INT_TD.3736 *)vectp.1415_998]; # VUSE <.MEM_2600> j_1708 = *_1707; # DEBUG jD.7613 => NULL # DEBUG BEGIN_STMT vect__1709.1418_994 = vect_j_1708.1417_996 * { 3, 3, 3, 3, 3, 3, 3, 3 }; _1709 = j_1708 * 3; # RANGE ~[2147483648, 18446744071562067967] _1710 = (long unsigned intD.10) _1709; # RANGE [0, 18446744073709551608] NONZERO 18446744073709551608 _1711 = _1710 * 8; # PT = nonlocal null _1712 = x_242(D) + _1711; # VUSE <.MEM_2600> # USE = anything vect__1713.1419_991 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, x_242(D), vect__1709.1418_994, { Nan, Nan, Nan, Nan }, 8); vect__1713.1420_990 = VEC_PERM_EXPR <vect__1709.1418_994, vect__1709.1418_994, { 4, 5, 6, 7, 4, 5, 6, 7 }>; # VUSE <.MEM_2600> # USE = anything vect__1713.1419_989 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, x_242(D), vect__1713.1420_990, { Nan, Nan, Nan, Nan }, 8); # VUSE <.MEM_2600> _1713 = *_1712; vect_xij_1714.1421_987 = vect_cst__988 - vect__1713.1419_991; vect_xij_1714.1421_986 = vect_cst__988 - vect__1713.1419_989; xij_1714 = xi_1687 - _1713; # DEBUG xijD.7655 => NULL # DEBUG BEGIN_STMT # RANGE ~[2147483649, 18446744071562067968] _1715 = _1710 + 1; # RANGE [0, 18446744073709551608] NONZERO 18446744073709551608 _1716 = _1715 * 8; # PT = nonlocal null _1717 = x_242(D) + _1716; # VUSE <.MEM_2600> # USE = anything vect__1718.1422_980 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, _983, vect__1709.1418_994, { Nan, Nan, Nan, Nan }, 8); # VUSE <.MEM_2600> # USE = anything vect__1718.1422_977 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, _983, vect__1713.1420_990, { Nan, Nan, Nan, Nan }, 8); # VUSE <.MEM_2600> _1718 = *_1717; vect_yij_1719.1424_975 = vect_cst__976 - vect__1718.1422_980; vect_yij_1719.1424_974 = vect_cst__976 - vect__1718.1422_977; yij_1719 = yi_1691 - _1718; # DEBUG yijD.7656 => NULL # DEBUG BEGIN_STMT # RANGE ~[2147483650, 18446744071562067969] _1720 = _1710 + 2; # RANGE [0, 18446744073709551608] NONZERO 18446744073709551608 _1721 = _1720 * 8; # PT = nonlocal null _1722 = x_242(D) + _1721; # VUSE <.MEM_2600> # USE = anything vect__1723.1425_967 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, _971, vect__1709.1418_994, { Nan, Nan, Nan, Nan }, 8); # VUSE <.MEM_2600> # USE = anything vect__1723.1425_965 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, _971, vect__1713.1420_990, { Nan, Nan, Nan, Nan }, 8); # VUSE <.MEM_2600> _1723 = *_1722; vect_zij_1724.1427_963 = vect_cst__964 - vect__1723.1425_967; vect_zij_1724.1427_962 = vect_cst__964 - vect__1723.1425_965; zij_1724 = zi_1695 - _1723; # DEBUG zijD.7657 => NULL # DEBUG BEGIN_STMT vect_powmult_2740.1428_961 = vect_xij_1714.1421_987 * vect_xij_1714.1421_987; vect_powmult_2740.1428_960 = vect_xij_1714.1421_986 * vect_xij_1714.1421_986; powmult_2740 = xij_1714 * xij_1714; vect_powmult_2713.1429_959 = vect_yij_1719.1424_975 * vect_yij_1719.1424_975; vect_powmult_2713.1429_958 = vect_yij_1719.1424_974 * vect_yij_1719.1424_974; powmult_2713 = yij_1719 * yij_1719; vect_powmult_1661.1430_957 = vect_zij_1724.1427_963 * vect_zij_1724.1427_963; vect_powmult_1661.1430_956 = vect_zij_1724.1427_962 * vect_zij_1724.1427_962; powmult_1661 = zij_1724 * zij_1724; vect__1971.1431_955 = vect_powmult_1661.1430_957 + vect_powmult_2713.1429_959; vect__1971.1431_954 = vect_powmult_1661.1430_956 + vect_powmult_2713.1429_958; _1971 = powmult_1661 + powmult_2713; vect_r2_1729.1432_953 = vect__1971.1431_955 + vect_powmult_2740.1428_961; vect_r2_1729.1432_952 = vect__1971.1431_954 + vect_powmult_2740.1428_960; // compute r2 r2_1729 = _1971 + powmult_2740; # DEBUG r2D.7683 => NULL # DEBUG BEGIN_STMT # DEBUG r2D.7683 => NULL # DEBUG BEGIN_STMT # DEBUG BEGIN_STMT vect__1730.1433_950 = .SQRT (vect_r2_1729.1432_953); // after if (r2 > rgbmaxpsmax2) compute vect__1730.1433_949 = .SQRT (vect_r2_1729.1432_952); vect_dij1i_1731.1434_947 = { 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 } / vect__1730.1433_950; vect_dij1i_1731.1434_946 = { 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 } / vect__1730.1433_949; # DEBUG dij1iD.7664 => NULL # DEBUG BEGIN_STMT vect_dij_1732.1435_945 = vect_r2_1729.1432_953 * vect_dij1i_1731.1434_947; vect_dij_1732.1435_944 = vect_r2_1729.1432_952 * vect_dij1i_1731.1434_946; dij_1732 = r2_1729 * Inf; # DEBUG dijD.7673 => NULL # DEBUG BEGIN_STMT _1733 = (long unsigned intD.10) j_1708; _1734 = _1733 * 8; _1241 = _1242 + _1734; # PT = nonlocal escaped null _1735 = (doubleD.32 *) _1241; mask__1239.1436_942 = vect_r2_1729.1432_953 <= vect_cst__943; // if (r2 > rgbmaxpsmax2) mask__1239.1436_941 = vect_r2_1729.1432_952 <= vect_cst__943; _1239 = r2_1729 <= powmult_2494; stmp_938 = VIEW_CONVERT_EXPR<vector(4) doubleD.32>(mask__1239.1436_942); # VUSE <.MEM_2600> # USE = anything vect__1736.1437_937 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, _939, vect_j_1708.1417_996, stmp_938, 8); // after if (r2 > rgbmaxpsmax2) compute vect__1736.1438_936 = VEC_PERM_EXPR <vect_j_1708.1417_996, vect_j_1708.1417_996, { 4, 5, 6, 7, 4, 5, 6, 7 }>; stmp_935 = VIEW_CONVERT_EXPR<vector(4) doubleD.32>(mask__1239.1436_941); # VUSE <.MEM_2600> # USE = anything vect__1736.1437_934 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, _939, vect__1736.1438_936, stmp_935, 8); _1237 = _1238 + _1734; # PT = nonlocal escaped null _1737 = (doubleD.32 *) _1237; # VUSE <.MEM_2600> # USE = anything vect__1738.1439_931 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, _933, vect_j_1708.1417_996, stmp_938, 8); # VUSE <.MEM_2600> # USE = anything vect__1738.1439_924 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, _933, vect__1736.1438_936, stmp_935, 8); vect__1739.1441_922 = vect__1738.1439_931 + { -8.99999999999999966693309261245303787291049957275390625e-2, -8.99999999999999966693309261245303787291049957275390625e-2, -8.99999999999999966693309261245303787291049957275390625e-2, -8.99999999999999966693309261245303787291049957275390625e-2 }; vect__1739.1441_921 = vect__1738.1439_924 + { -8.99999999999999966693309261245303787291049957275390625e-2, -8.99999999999999966693309261245303787291049957275390625e-2, -8.99999999999999966693309261245303787291049957275390625e-2, -8.99999999999999966693309261245303787291049957275390625e-2 }; vect_sj_1740.1442_920 = vect__1736.1437_937 * vect__1739.1441_922; vect_sj_1740.1442_919 = vect__1736.1437_934 * vect__1739.1441_921; # DEBUG sjD.7686 => NULL # DEBUG BEGIN_STMT # DEBUG sj2D.7687 => NULL # DEBUG BEGIN_STMT vect__1743.1443_917 = vect_sj_1740.1442_920 + { 2.0e+1, 2.0e+1, 2.0e+1, 2.0e+1 }; vect__1743.1443_916 = vect_sj_1740.1442_919 + { 2.0e+1, 2.0e+1, 2.0e+1, 2.0e+1 }; mask__1463.1444_915 = vect_dij_1732.1435_945 <= vect__1743.1443_917; mask__1463.1444_914 = vect_dij_1732.1435_944 <= vect__1743.1443_916; _1463 = dij_1732 <= 2.0e+1; mask__1462.1445_913 = mask__1239.1436_942 & mask__1463.1444_915; // if (dij > rgbmax + sj) mask__1462.1445_912 = mask__1239.1436_941 & mask__1463.1444_914; _1462 = _1239 & _1463; vect_powmult_1725.1446_911 = vect_sj_1740.1442_920 * vect_sj_1740.1442_920; vect_powmult_1725.1446_910 = vect_sj_1740.1442_919 * vect_sj_1740.1442_919; # DEBUG BEGIN_STMT vect__1744.1447_908 = { 2.0e+1, 2.0e+1, 2.0e+1, 2.0e+1 } - vect_sj_1740.1442_920; // begin if ((dij > rgbmax - sj)) vect__1744.1447_907 = { 2.0e+1, 2.0e+1, 2.0e+1, 2.0e+1 } - vect_sj_1740.1442_919; mask__1461.1448_906 = vect_dij_1732.1435_945 > vect__1744.1447_908; mask__1461.1448_905 = vect_dij_1732.1435_944 > vect__1744.1447_907; _1461 = dij_1732 > 2.0e+1; mask__1460.1449_904 = mask__1461.1448_906 & mask__1462.1445_913; // if ((dij > rgbmax - sj)) enter if-else chain mask__1460.1449_903 = mask__1461.1448_905 & mask__1462.1445_912; _1460 = _1461 & _1462; else add # DEBUG BEGIN_STMT vect__1745.1450_902 = vect_dij_1732.1435_945 - vect_sj_1740.1442_920; vect__1745.1450_901 = vect_dij_1732.1435_944 - vect_sj_1740.1442_919; vect_uij_1746.1451_899 = { 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 } / vect__1745.1450_902; vect_uij_1746.1451_898 = { 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 } / vect__1745.1450_901; uij_1746 = 0.0 / r2_1729; # DEBUG uijD.7689 => NULL # DEBUG BEGIN_STMT vect__1748.1452_896 = vect_dij_1732.1435_945 * { 8.0e+1, 8.0e+1, 8.0e+1, 8.0e+1 }; vect__1748.1452_895 = vect_dij_1732.1435_944 * { 8.0e+1, 8.0e+1, 8.0e+1, 8.0e+1 }; _1748 = dij_1732 * 8.0e+1; vect__2057.1453_894 = vect_powmult_1725.1446_911 - vect_r2_1729.1432_953; vect__2057.1453_893 = vect_powmult_1725.1446_910 - vect_r2_1729.1432_952; _2057 = -r2_1729; vect__1750.1454_892 = vect__1748.1452_896 + vect__2057.1453_894; vect__1750.1454_891 = vect__1748.1452_895 + vect__2057.1453_893; _1750 = _1748 + _2057; vect__1751.1455_889 = vect__1750.1454_892 * { 2.50000000000000048572257327350598643533885478973388671875e-3, 2.50000000000000048572257327350598643533885478973388671875e-3, 2.50000000000000048572257327350598643533885478973388671875e-3, 2.50000000000000048572257327350598643533885478973388671875e-3 }; vect__1751.1455_888 = vect__1750.1454_891 * { 2.50000000000000048572257327350598643533885478973388671875e-3, 2.50000000000000048572257327350598643533885478973388671875e-3, 2.50000000000000048572257327350598643533885478973388671875e-3, 2.50000000000000048572257327350598643533885478973388671875e-3 }; _1751 = _1750 * 2.50000000000000048572257327350598643533885478973388671875e-3; vect__2086.1456_886 = vect_dij_1732.1435_945 * { 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0 }; vect__2086.1456_885 = vect_dij_1732.1435_944 * { 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0 }; _2086 = dij_1732 * 2.0e+0; vect__1753.1457_884 = vect_uij_1746.1451_899 * vect__2086.1456_886; vect__1753.1457_883 = vect_uij_1746.1451_898 * vect__2086.1456_885; _1753 = uij_1746 * _2086; vect__1754.1458_882 = vect__1751.1455_889 - vect__1753.1457_884; vect__1754.1458_881 = vect__1751.1455_888 - vect__1753.1457_883; _1754 = _1751 - _1753; vect__1755.1459_879 = vect__1745.1450_902 * { 5.000000000000000277555756156289135105907917022705078125e-2, 5.000000000000000277555756156289135105907917022705078125e-2, 5.000000000000000277555756156289135105907917022705078125e-2, 5.000000000000000277555756156289135105907917022705078125e-2 }; vect__1755.1459_878 = vect__1745.1450_901 * { 5.000000000000000277555756156289135105907917022705078125e-2, 5.000000000000000277555756156289135105907917022705078125e-2, 5.000000000000000277555756156289135105907917022705078125e-2, 5.000000000000000277555756156289135105907917022705078125e-2 }; _1755 = dij_1732 * 5.000000000000000277555756156289135105907917022705078125e-2; vect__1756.1460_877 = __svml_log4_mask_e9D.7954 (vect__1755.1459_879); vect__1756.1460_876 = __svml_log4_mask_e9D.7954 (vect__1755.1459_878); vect__1757.1461_874 = vect__1756.1460_877 * { 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0 }; vect__1757.1461_873 = vect__1756.1460_876 * { 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0 }; vect__2097.1462_871 = vect__1754.1458_882 + { -1.0e+0, -1.0e+0, -1.0e+0, -1.0e+0 }; vect__2097.1462_870 = vect__1754.1458_881 + { -1.0e+0, -1.0e+0, -1.0e+0, -1.0e+0 }; _2097 = _1754 - 1.0e+0; vect__1759.1463_869 = vect__2097.1462_871 - vect__1757.1461_874; vect__1759.1463_868 = vect__2097.1462_870 - vect__1757.1461_873; vect__2099.1464_866 = vect_dij1i_1731.1434_947 * { 1.25e-1, 1.25e-1, 1.25e-1, 1.25e-1 }; vect__2099.1464_865 = vect_dij1i_1731.1434_946 * { 1.25e-1, 1.25e-1, 1.25e-1, 1.25e-1 }; vect__1761.1465_864 = vect__1759.1463_869 * vect__2099.1464_866; vect__1761.1465_863 = vect__1759.1463_868 * vect__2099.1464_865; _1761 = _2097 * Inf; /// else add # DEBUG temp0D.7768 => NULL mask__1458.1466_862 = vect_dij_1732.1435_945 <= vect__1744.1447_908; // begin else if (dij > 4.0 * sj) mask__1458.1466_861 = vect_dij_1732.1435_944 <= vect__1744.1447_907; mask__1457.1467_860 = mask__1458.1466_862 & mask__1462.1445_913; mask__1457.1467_859 = mask__1458.1466_861 & mask__1462.1445_912; # DEBUG BEGIN_STMT vect__1764.1468_857 = vect_sj_1740.1442_920 * { 4.0e+0, 4.0e+0, 4.0e+0, 4.0e+0 }; vect__1764.1468_856 = vect_sj_1740.1442_919 * { 4.0e+0, 4.0e+0, 4.0e+0, 4.0e+0 }; mask__1456.1469_855 = vect_dij_1732.1435_945 > vect__1764.1468_857; mask__1456.1469_854 = vect_dij_1732.1435_944 > vect__1764.1468_856; _1456 = dij_1732 > 0.0; mask__1455.1470_853 = mask__1456.1469_855 & mask__1457.1467_860; // else if (dij > 4.0 * sj) mask__1455.1470_852 = mask__1456.1469_854 & mask__1457.1467_859; _1455 = _1456 & _1462; /// else add # DEBUG BEGIN_STMT vect_powmult_1726.1471_851 = vect_dij1i_1731.1434_947 * vect_dij1i_1731.1434_947; vect_powmult_1726.1471_846 = vect_dij1i_1731.1434_946 * vect_dij1i_1731.1434_946; # DEBUG dij2iD.7672 => NULL # DEBUG BEGIN_STMT vect_tmpsd_1766.1472_845 = vect_powmult_1725.1446_911 * vect_powmult_1726.1471_851; vect_tmpsd_1766.1472_844 = vect_powmult_1725.1446_910 * vect_powmult_1726.1471_846; # DEBUG tmpsdD.7695 => NULL # DEBUG BEGIN_STMT vect__1767.1473_842 = vect_tmpsd_1766.1472_845 * { 4.54545454545454530315140573293319903314113616943359375e-1, 4.54545454545454530315140573293319903314113616943359375e-1, 4.54545454545454530315140573293319903314113616943359375e-1, 4.54545454545454530315140573293319903314113616943359375e-1 }; vect__1767.1473_841 = vect_tmpsd_1766.1472_844 * { 4.54545454545454530315140573293319903314113616943359375e-1, 4.54545454545454530315140573293319903314113616943359375e-1, 4.54545454545454530315140573293319903314113616943359375e-1, 4.54545454545454530315140573293319903314113616943359375e-1 }; vect__1768.1474_839 = vect__1767.1473_842 + { 4.444444444444444197728216749965213239192962646484375e-1, 4.444444444444444197728216749965213239192962646484375e-1, 4.444444444444444197728216749965213239192962646484375e-1, 4.444444444444444197728216749965213239192962646484375e-1 }; vect__1768.1474_838 = vect__1767.1473_841 + { 4.444444444444444197728216749965213239192962646484375e-1, 4.444444444444444197728216749965213239192962646484375e-1, 4.444444444444444197728216749965213239192962646484375e-1, 4.444444444444444197728216749965213239192962646484375e-1 }; vect__1769.1475_837 = vect_tmpsd_1766.1472_845 * vect__1768.1474_839; vect__1769.1475_836 = vect_tmpsd_1766.1472_844 * vect__1768.1474_838; vect__1770.1476_834 = vect__1769.1475_837 + { 4.28571428571428547638078043746645562350749969482421875e-1, 4.28571428571428547638078043746645562350749969482421875e-1, 4.28571428571428547638078043746645562350749969482421875e-1, 4.28571428571428547638078043746645562350749969482421875e-1 }; vect__1770.1476_832 = vect__1769.1475_836 + { 4.28571428571428547638078043746645562350749969482421875e-1, 4.28571428571428547638078043746645562350749969482421875e-1, 4.28571428571428547638078043746645562350749969482421875e-1, 4.28571428571428547638078043746645562350749969482421875e-1 }; vect__1771.1477_831 = vect_tmpsd_1766.1472_845 * vect__1770.1476_834; vect__1771.1477_830 = vect_tmpsd_1766.1472_844 * vect__1770.1476_832; vect__1772.1478_824 = vect__1771.1477_831 + { 4.0000000000000002220446049250313080847263336181640625e-1, 4.0000000000000002220446049250313080847263336181640625e-1, 4.0000000000000002220446049250313080847263336181640625e-1, 4.0000000000000002220446049250313080847263336181640625e-1 }; vect__1772.1478_823 = vect__1771.1477_830 + { 4.0000000000000002220446049250313080847263336181640625e-1, 4.0000000000000002220446049250313080847263336181640625e-1, 4.0000000000000002220446049250313080847263336181640625e-1, 4.0000000000000002220446049250313080847263336181640625e-1 }; vect__1773.1479_822 = vect_tmpsd_1766.1472_845 * vect__1772.1478_824; vect__1773.1479_821 = vect_tmpsd_1766.1472_844 * vect__1772.1478_823; vect_dumbo_1774.1480_819 = vect__1773.1479_822 + { 3.33333333333333314829616256247390992939472198486328125e-1, 3.33333333333333314829616256247390992939472198486328125e-1, 3.33333333333333314829616256247390992939472198486328125e-1, 3.33333333333333314829616256247390992939472198486328125e-1 }; vect_dumbo_1774.1480_818 = vect__1773.1479_821 + { 3.33333333333333314829616256247390992939472198486328125e-1, 3.33333333333333314829616256247390992939472198486328125e-1, 3.33333333333333314829616256247390992939472198486328125e-1, 3.33333333333333314829616256247390992939472198486328125e-1 }; # DEBUG dumboD.7694 => NULL # DEBUG BEGIN_STMT vect__2892.1481_817 = vect_powmult_1726.1471_851 * vect_sj_1740.1442_920; vect__2892.1481_816 = vect_powmult_1726.1471_846 * vect_sj_1740.1442_919; vect__1776.1482_815 = vect_tmpsd_1766.1472_845 * vect__2892.1481_817; vect__1776.1482_814 = vect_tmpsd_1766.1472_844 * vect__2892.1481_816; vect__1777.1483_813 = vect_dumbo_1774.1480_819 * vect__1776.1482_815; vect__1777.1483_812 = vect_dumbo_1774.1480_818 * vect__1776.1482_814; # DEBUG temp1D.7769 => NULL mask__1453.1484_811 = vect_dij_1732.1435_945 <= vect__1764.1468_857; // begin else if (dij > ri + sj) mask__1453.1484_810 = vect_dij_1732.1435_944 <= vect__1764.1468_856; _1453 = dij_1732 <= 0.0; mask__1452.1485_809 = mask__1453.1484_811 & mask__1457.1467_860; mask__1452.1485_808 = mask__1453.1484_810 & mask__1457.1467_859; _1452 = _1453 & _1462; // esle add # DEBUG BEGIN_STMT vect__1780.1486_806 = vect_cst__807 + vect_sj_1740.1442_920; vect__1780.1486_805 = vect_cst__807 + vect_sj_1740.1442_919; _1780 = ri_1700; mask__1451.1487_804 = vect_dij_1732.1435_945 > vect__1780.1486_806; mask__1451.1487_803 = vect_dij_1732.1435_944 > vect__1780.1486_805; _1451 = dij_1732 > _1780; mask__1450.1488_802 = mask__1451.1487_804 & mask__1452.1485_809; mask__1450.1488_801 = mask__1451.1487_803 & mask__1452.1485_808; // else if (dij > ri + sj) _1450 = _1451 & _1452; # DEBUG BEGIN_STMT vect__1782.1489_800 = vect_sj_1740.1442_920 / vect__2057.1453_894; vect__1782.1489_799 = vect_sj_1740.1442_919 / vect__2057.1453_893; _1782 = 0.0 / r2_1729; vect__1784.1490_797 = vect_dij_1732.1435_945 + vect_sj_1740.1442_920; vect__1784.1490_796 = vect_dij_1732.1435_944 + vect_sj_1740.1442_919; vect__1785.1491_795 = vect__1745.1450_902 / vect__1784.1490_797; vect__1785.1491_794 = vect__1745.1450_901 / vect__1784.1490_796; vect__1786.1492_793 = __svml_log4_mask_e9D.7987 (vect__1785.1491_795); vect__1786.1492_792 = __svml_log4_mask_e9D.7987 (vect__1785.1491_794); vect__1894.1493_790 = vect_dij1i_1731.1434_947 * { 5.0e-1, 5.0e-1, 5.0e-1, 5.0e-1 }; vect__1894.1493_789 = vect_dij1i_1731.1434_946 * { 5.0e-1, 5.0e-1, 5.0e-1, 5.0e-1 }; vect__1788.1494_788 = vect__1786.1492_793 * vect__1894.1493_790; vect__1788.1494_787 = vect__1786.1492_792 * vect__1894.1493_789; vect__1789.1495_786 = vect__1782.1489_800 - vect__1788.1494_788; vect__1789.1495_785 = vect__1782.1489_799 - vect__1788.1494_787; _1789 = _1782 - Nan; vect__1790.1496_783 = vect__1789.1495_786 * { 5.0e-1, 5.0e-1, 5.0e-1, 5.0e-1 }; vect__1790.1496_782 = vect__1789.1495_785 * { 5.0e-1, 5.0e-1, 5.0e-1, 5.0e-1 }; _1790 = _1789 * 5.0e-1; # DEBUG temp2D.7770 => NULL mask__1448.1497_781 = vect_dij_1732.1435_945 <= vect__1780.1486_806; // begin else if (dij > fabs(ri - sj)) mask__1448.1497_780 = vect_dij_1732.1435_944 <= vect__1780.1486_805; _1448 = dij_1732 <= _1780; mask__1447.1498_779 = mask__1448.1497_781 & mask__1452.1485_809; mask__1447.1498_778 = mask__1448.1497_780 & mask__1452.1485_808; _1447 = _1448 & _1452; # DEBUG BEGIN_STMT vect__1793.1499_776 = vect_cst__807 - vect_sj_1740.1442_920; vect__1793.1499_775 = vect_cst__807 - vect_sj_1740.1442_919; vect__1794.1500_774 = ABS_EXPR <vect__1793.1499_776>; vect__1794.1500_773 = ABS_EXPR <vect__1793.1499_775>; _1794 = ABS_EXPR <_1780>; mask__1446.1501_772 = vect_dij_1732.1435_945 > vect__1794.1500_774; mask__1446.1501_771 = vect_dij_1732.1435_944 > vect__1794.1500_773; _1446 = dij_1732 > _1794; mask__1445.1502_770 = mask__1446.1501_772 & mask__1447.1498_779; mask__1445.1502_769 = mask__1446.1501_771 & mask__1447.1498_778; // else if (dij > fabs(ri - sj)) _1445 = _1446 & _1447; # DEBUG BEGIN_STMT vect__2372.1503_767 = vect_cst__768 - vect_powmult_1725.1446_911; vect__2372.1503_766 = vect_cst__768 - vect_powmult_1725.1446_910; _2372 = powmult_1728; vect__1798.1504_765 = vect_r2_1729.1432_953 + vect__2372.1503_767; vect__1798.1504_764 = vect_r2_1729.1432_952 + vect__2372.1503_766; _1798 = r2_1729 + _2372; vect__2373.1505_762 = vect__1798.1504_765 * vect_cst__763; vect__2373.1505_761 = vect__1798.1504_764 * vect_cst__763; _2373 = _1798 * _2894; vect_theta_1800.1506_760 = vect_dij1i_1731.1434_947 * vect__2373.1505_762; vect_theta_1800.1506_759 = vect_dij1i_1731.1434_946 * vect__2373.1505_761; theta_1800 = _2373 * Inf; # DEBUG thetaD.7670 => NULL # DEBUG BEGIN_STMT vect_uij_1802.1507_757 = { 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 } / vect__1784.1490_797; vect_uij_1802.1507_756 = { 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 } / vect__1784.1490_796; # DEBUG uijD.7689 => NULL # DEBUG BEGIN_STMT vect__1803.1508_754 = vect_theta_1800.1506_760 + { -2.0e+0, -2.0e+0, -2.0e+0, -2.0e+0 }; vect__1803.1508_753 = vect_theta_1800.1506_759 + { -2.0e+0, -2.0e+0, -2.0e+0, -2.0e+0 }; _1803 = theta_1800 - 2.0e+0; vect__1804.1509_751 = vect_cst__752 * vect__1803.1508_754; vect__1804.1509_750 = vect_cst__752 * vect__1803.1508_753; _1804 = ri1i_1701 * _1803; vect__1805.1510_749 = vect_uij_1802.1507_757 + vect__1804.1509_751; vect__1805.1510_748 = vect_uij_1802.1507_756 + vect__1804.1509_750; _1805 = uij_1746 + _1804; vect__1806.1511_746 = vect_uij_1802.1507_757 * vect_cst__807; vect__1806.1511_745 = vect_uij_1802.1507_756 * vect_cst__807; _1806 = ri_1700 * uij_1746; vect__1807.1512_744 = __svml_log4_mask_e9D.8008 (vect__1806.1511_746); vect__1807.1512_743 = __svml_log4_mask_e9D.8008 (vect__1806.1511_745); vect__1808.1513_742 = vect_dij1i_1731.1434_947 * vect__1807.1512_744; vect__1808.1513_741 = vect_dij1i_1731.1434_946 * vect__1807.1512_743; vect__1809.1514_740 = vect__1805.1510_749 - vect__1808.1513_742; vect__1809.1514_739 = vect__1805.1510_748 - vect__1808.1513_741; _1809 = _1805 - Nan; vect__1810.1515_737 = vect__1809.1514_740 * { 2.5e-1, 2.5e-1, 2.5e-1, 2.5e-1 }; vect__1810.1515_736 = vect__1809.1514_739 * { 2.5e-1, 2.5e-1, 2.5e-1, 2.5e-1 }; _1810 = _1809 * 2.5e-1; # DEBUG temp3D.7771 => NULL mask__1443.1516_735 = vect_dij_1732.1435_945 <= vect__1794.1500_774; // begin else if (ri < sj) mask__1443.1516_734 = vect_dij_1732.1435_944 <= vect__1794.1500_773; _1443 = dij_1732 <= _1794; mask__1442.1517_733 = mask__1443.1516_735 & mask__1447.1498_779; mask__1442.1517_732 = mask__1443.1516_734 & mask__1447.1498_778; _1442 = _1443 & _1447; # DEBUG BEGIN_STMT mask__1441.1518_730 = vect_cst__807 < vect_sj_1740.1442_920; mask__1441.1518_729 = vect_cst__807 < vect_sj_1740.1442_919; _1441 = _1699 < 8.99999999999999966693309261245303787291049957275390625e-2; mask__1406.1519_728 = mask__1441.1518_730 & mask__1442.1517_733; mask__1406.1519_727 = mask__1441.1518_729 & mask__1442.1517_732; // else if (ri < sj) _1406 = _1441 & _1442; # DEBUG BEGIN_STMT vect__1816.1520_725 = vect__1782.1489_800 - vect_cst__726; vect__1816.1520_724 = vect__1782.1489_799 - vect_cst__726; _1816 = _1782 - _1815; vect__1235.1521_723 = -vect__1785.1491_795; vect__1235.1521_722 = -vect__1785.1491_794; vect__1820.1522_721 = __svml_log4_mask_e9D.8019 (vect__1235.1521_723); vect__1820.1522_720 = __svml_log4_mask_e9D.8019 (vect__1235.1521_722); vect__1822.1523_719 = vect__1820.1522_721 * vect__1894.1493_790; vect__1822.1523_718 = vect__1820.1522_720 * vect__1894.1493_789; vect__1823.1524_717 = vect__1816.1520_725 - vect__1822.1523_719; vect__1823.1524_716 = vect__1816.1520_724 - vect__1822.1523_718; _1823 = _1816 - Nan; vect__1824.1525_714 = vect__1823.1524_717 * { 5.0e-1, 5.0e-1, 5.0e-1, 5.0e-1 }; vect__1824.1525_713 = vect__1823.1524_716 * { 5.0e-1, 5.0e-1, 5.0e-1, 5.0e-1 }; // end if-else _1824 = _1823 * 5.0e-1; # DEBUG temp4D.7772 => NULL vect__ifc__1252.1526_711 = VEC_COND_EXPR <mask__1460.1449_904, vect__1761.1465_864, { 0.0, 0.0, 0.0, 0.0 }>; vect__ifc__1252.1526_710 = VEC_COND_EXPR <mask__1460.1449_903, vect__1761.1465_863, { 0.0, 0.0, 0.0, 0.0 }>; _ifc__1252 = _1460 ? _1761 : 0.0; vect__1251.1527_709 = vect_temp0_1543.1410_1003 + vect__ifc__1252.1526_711; vect__1251.1527_708 = vect__1251.1527_709 + vect__ifc__1252.1526_710; _1251 = temp0_1543 + _ifc__1252; vect__ifc__1250.1529_704 = VEC_COND_EXPR <mask__1455.1470_853, vect__1777.1483_813, { 0.0, 0.0, 0.0, 0.0 }>; vect__ifc__1250.1529_703 = VEC_COND_EXPR <mask__1455.1470_852, vect__1777.1483_812, { 0.0, 0.0, 0.0, 0.0 }>; _ifc__1250 = _1455 ? Nan : 0.0; vect__1249.1530_702 = vect_temp1_2883.1411_1002 - vect__ifc__1250.1529_704; vect__1249.1530_701 = vect__1249.1530_702 - vect__ifc__1250.1529_703; _1249 = temp1_2883 - _ifc__1250; vect__ifc__1248.1532_697 = VEC_COND_EXPR <mask__1450.1488_802, vect__1790.1496_783, { 0.0, 0.0, 0.0, 0.0 }>; vect__ifc__1248.1532_696 = VEC_COND_EXPR <mask__1450.1488_801, vect__1790.1496_782, { 0.0, 0.0, 0.0, 0.0 }>; _ifc__1248 = _1450 ? _1790 : 0.0; vect__1247.1533_695 = vect_temp2_224.1412_1001 + vect__ifc__1248.1532_697; vect__1247.1533_694 = vect__1247.1533_695 + vect__ifc__1248.1532_696; _1247 = temp2_224 + _ifc__1248; vect__ifc__1246.1535_690 = VEC_COND_EXPR <mask__1445.1502_770, vect__1810.1515_737, { 0.0, 0.0, 0.0, 0.0 }>; vect__ifc__1246.1535_689 = VEC_COND_EXPR <mask__1445.1502_769, vect__1810.1515_736, { 0.0, 0.0, 0.0, 0.0 }>; _ifc__1246 = _1445 ? _1810 : 0.0; vect__1245.1536_688 = vect_temp3_2699.1413_1000 + vect__ifc__1246.1535_690; vect__1245.1536_687 = vect__1245.1536_688 + vect__ifc__1246.1535_689; _1245 = temp3_2699 + _ifc__1246; vect__ifc__1244.1538_673 = VEC_COND_EXPR <mask__1406.1519_728, vect__1824.1525_714, { 0.0, 0.0, 0.0, 0.0 }>; vect__ifc__1244.1538_672 = VEC_COND_EXPR <mask__1406.1519_727, vect__1824.1525_713, { 0.0, 0.0, 0.0, 0.0 }>; _ifc__1244 = _1406 ? _1824 : 0.0; vect__1243.1539_671 = vect_temp4_1545.1414_999 + vect__ifc__1244.1538_673; vect__1243.1539_670 = vect__1243.1539_671 + vect__ifc__1244.1538_672; _1243 = temp4_1545 + _ifc__1244; # DEBUG temp4D.7772 => _1243 # DEBUG temp3D.7771 => _1245 # DEBUG temp2D.7770 => _1247 # DEBUG temp1D.7769 => _1249 # DEBUG temp0D.7768 => _1251 # DEBUG BEGIN_STMT # RANGE [1, 2147483647] NONZERO 2147483647 k_1827 = k_3019 + 1; # DEBUG temp4D.7772 => _1243 # DEBUG temp3D.7771 => _1245 # DEBUG temp2D.7770 => _1247 # DEBUG temp1D.7769 => _1249 # DEBUG temp0D.7768 => _1251 # DEBUG kD.7615 => k_1827 # DEBUG BEGIN_STMT # PT = nonlocal escaped null vectp.1415_997 = vectp.1415_998 + 32; ivtmp_666 = ivtmp_667 + 1; if (ivtmp_666 < bnd.1407_1013)goto <bb 216>; [83.33%] elsegoto <bb 303>; [16.67% |
bb 分块的优化方案:
1:找到vec_cond_expr,将其中第一个参数mask作为上一个bb的结束,(其后还有一个mask)并且在其后新建一个该mask与0进行比较的gimple_cond,将这两个mask相与。同时新建该mask判断为ture 和 false的edge,分别指向分割的bb和其下一个bb。
2:以vec_cond_expr的第二个参数的ssa_name_def作为要分割bb的末尾,进行分割。并且生成一条指向其下一个bb的edge。同时将其作为mask判断为false的edge的dest。
optimize_mask_stores 代码
10093 /* The code below is trying to perform simple optimization - revert 10094 if-conversion for masked stores, i.e. if the mask of a store is zero 10095 do not perform it and all stored value producers also if possible. 10096 For example, 10097 for (i=0; i<n; i++) 10098 if (c[i]) 10099 { 10100 p1[i] += 1; 10101 p2[i] = p3[i] +2; 10102 } 10103 this transformation will produce the following semi-hammock: 10104 10105 if (!mask__ifc__42.18_165 == { 0, 0, 0, 0, 0, 0, 0, 0 }) 10106 { 10107 vect__11.19_170 = MASK_LOAD (vectp_p1.20_168, 0B, mask__ifc__42.18_165); 10108 vect__12.22_172 = vect__11.19_170 + vect_cst__171; 10109 MASK_STORE (vectp_p1.23_175, 0B, mask__ifc__42.18_165, vect__12.22_172); 10110 vect__18.25_182 = MASK_LOAD (vectp_p3.26_180, 0B, mask__ifc__42.18_165); 10111 vect__19.28_184 = vect__18.25_182 + vect_cst__183; 10112 MASK_STORE (vectp_p2.29_187, 0B, mask__ifc__42.18_165, vect__19.28_184); 10113 } 10114 */ 10115 10116 void 10117 optimize_mask_stores (class loop *loop) 10118 { 10119 basic_block *bbs = get_loop_body (loop); 10120 unsigned nbbs = loop->num_nodes; 10121 unsigned i; 10122 basic_block bb; 10123 class loop *bb_loop; 10124 gimple_stmt_iterator gsi; 10125 gimple *stmt; 10126 auto_vec<gimple *> worklist; 10127 auto_purge_vect_location sentinel; 10128 10129 vect_location = find_loop_location (loop); 10130 /* Pick up all masked stores in loop if any. */ 10131 for (i = 0; i < nbbs; i++) 10132 { 10133 bb = bbs[i]; 10134 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); 10135 gsi_next (&gsi)) 10136 { 10137 stmt = gsi_stmt (gsi); 10138 if (gimple_call_internal_p (stmt, IFN_MASK_STORE)) 10139 worklist.safe_push (stmt); 10140 } 10141 } 10142 10143 free (bbs); 10144 if (worklist.is_empty ()) 10145 return; 10146 10147 /* Loop has masked stores. */ 10148 while (!worklist.is_empty ()) 10149 { 10150 gimple *last, *last_store; 10151 edge e, efalse; 10152 tree mask; 10153 basic_block store_bb, join_bb; 10154 gimple_stmt_iterator gsi_to; 10155 tree vdef, new_vdef; 10156 gphi *phi; 10157 tree vectype; 10158 tree zero; 10159 10160 last = worklist.pop (); 10161 mask = gimple_call_arg (last, 2); 10162 bb = gimple_bb (last); 10163 /* Create then_bb and if-then structure in CFG, then_bb belongs to 10164 the same loop as if_bb. It could be different to LOOP when two 10165 level loop-nest is vectorized and mask_store belongs to the inner 10166 one. */ 10167 e = split_block (bb, last); 10168 bb_loop = bb->loop_father; 10169 gcc_assert (loop == bb_loop || flow_loop_nested_p (loop, bb_loop)); 10170 join_bb = e->dest; 10171 store_bb = create_empty_bb (bb); 10172 add_bb_to_loop (store_bb, bb_loop); 10173 e->flags = EDGE_TRUE_VALUE; 10174 efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE); 10175 /* Put STORE_BB to likely part. */ 10176 efalse->probability = profile_probability::unlikely (); 10177 store_bb->count = efalse->count (); 10178 make_single_succ_edge (store_bb, join_bb, EDGE_FALLTHRU); 10179 if (dom_info_available_p (CDI_DOMINATORS)) 10180 set_immediate_dominator (CDI_DOMINATORS, store_bb, bb); 10181 if (dump_enabled_p ()) 10182 dump_printf_loc (MSG_NOTE, vect_location, 10183 "Create new block %d to sink mask stores.", 10184 store_bb->index); 10185 /* Create vector comparison with boolean result. */ 10186 vectype = TREE_TYPE (mask); 10187 zero = build_zero_cst (vectype); 10188 stmt = gimple_build_cond (EQ_EXPR, mask, zero, NULL_TREE, NULL_TREE); 10189 gsi = gsi_last_bb (bb); 10190 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); 10191 /* Create new PHI node for vdef of the last masked store: 10192 .MEM_2 = VDEF <.MEM_1> 10193 will be converted to 10194 .MEM.3 = VDEF <.MEM_1> 10195 and new PHI node will be created in join bb 10196 .MEM_2 = PHI <.MEM_1, .MEM_3> 10197 */ 10198 vdef = gimple_vdef (last); 10199 new_vdef = make_ssa_name (gimple_vop (cfun), last); 10200 gimple_set_vdef (last, new_vdef); 10201 phi = create_phi_node (vdef, join_bb); 10202 add_phi_arg (phi, new_vdef, EDGE_SUCC (store_bb, 0), UNKNOWN_LOCATION); 10203 10204 /* Put all masked stores with the same mask to STORE_BB if possible. */ 10205 while (true) 10206 { 10207 gimple_stmt_iterator gsi_from; 10208 gimple *stmt1 = NULL; 10209 10210 /* Move masked store to STORE_BB. */ 10211 last_store = last; 10212 gsi = gsi_for_stmt (last); 10213 gsi_from = gsi; 10214 /* Shift GSI to the previous stmt for further traversal. */ 10215 gsi_prev (&gsi); 10216 gsi_to = gsi_start_bb (store_bb); 10217 gsi_move_before (&gsi_from, &gsi_to); 10218 /* Setup GSI_TO to the non-empty block start. */ 10219 gsi_to = gsi_start_bb (store_bb); 10220 if (dump_enabled_p ()) 10221 dump_printf_loc (MSG_NOTE, vect_location, 10222 "Move stmt to created bb\n%G", last); 10223 /* Move all stored value producers if possible. */ 10224 while (!gsi_end_p (gsi)) 10225 { 10226 tree lhs; 10227 imm_use_iterator imm_iter; 10228 use_operand_p use_p; 10229 bool res; 10230 10231 /* Skip debug statements. */ 10232 if (is_gimple_debug (gsi_stmt (gsi))) 10233 { 10234 gsi_prev (&gsi); 10235 continue; 10236 } 10237 stmt1 = gsi_stmt (gsi); 10238 /* Do not consider statements writing to memory or having 10239 volatile operand. */ 10240 if (gimple_vdef (stmt1) 10241 || gimple_has_volatile_ops (stmt1)) 10242 break; 10243 gsi_from = gsi; 10244 gsi_prev (&gsi); 10245 lhs = gimple_get_lhs (stmt1); 10246 if (!lhs) 10247 break; 10248 10249 /* LHS of vectorized stmt must be SSA_NAME. */ 10250 if (TREE_CODE (lhs) != SSA_NAME) 10251 break; 10252 10253 if (!VECTOR_TYPE_P (TREE_TYPE (lhs))) 10254 { 10255 /* Remove dead scalar statement. */ 10256 if (has_zero_uses (lhs)) 10257 { 10258 gsi_remove (&gsi_from, true); 10259 continue; 10260 } 10261 } 10262 10263 /* Check that LHS does not have uses outside of STORE_BB. */ 10264 res = true; 10265 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 10266 { 10267 gimple *use_stmt; 10268 use_stmt = USE_STMT (use_p); 10269 if (is_gimple_debug (use_stmt)) 10270 continue; 10271 if (gimple_bb (use_stmt) != store_bb) 10272 { 10273 res = false; 10274 break; 10275 } 10276 } 10277 if (!res) 10278 break; 10279 10280 if (gimple_vuse (stmt1) 10281 && gimple_vuse (stmt1) != gimple_vuse (last_store)) 10282 break; 10283 10284 /* Can move STMT1 to STORE_BB. */ 10285 if (dump_enabled_p ()) 10286 dump_printf_loc (MSG_NOTE, vect_location, 10287 "Move stmt to created bb\n%G", stmt1); 10288 gsi_move_before (&gsi_from, &gsi_to); 10289 /* Shift GSI_TO for further insertion. */ 10290 gsi_prev (&gsi_to); 10291 } 10292 /* Put other masked stores with the same mask to STORE_BB. */ 10293 if (worklist.is_empty () 10294 || gimple_call_arg (worklist.last (), 2) != mask 10295 || worklist.last () != stmt1) 10296 break; 10297 last = worklist.pop (); 10298 } 10299 add_phi_arg (phi, gimple_vuse (last_store), e, UNKNOWN_LOCATION); 10300 } 10301 } |
optimize_mask_vec_cond 代码
10093 void 10094 optimize_mask_vec_cond (class loop *loop) 10095 { 10096 basic_block *bbs = get_loop_body (loop); 10097 unsigned nbbs = loop->num_nodes; 10098 unsigned i; 10099 basic_block bb, bb_mask; 10100 class loop *bb_loop; 10101 gimple_stmt_iterator gsi; 10102 gimple *stmt; 10103 auto_vec<gimple *> worklist; 10104 auto_purge_vect_location sentinel; 10105 10106 enum tree_code code; 10107 10108 vect_location = find_loop_location (loop); 10109 /* Pick up all vec_cond_expr in loop if any. */ 10110 for (i = 0; i < nbbs; i++) 10111 { 10112 bb = bbs[i]; 10113 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); 10114 gsi_next (&gsi)) 10115 { 10116 stmt = gsi_stmt (gsi); 10117 if (is_gimple_assign(stmt)) { 10118 gassign *stmt_assign = dyn_cast <gassign *> (gsi_stmt (gsi)); 10119 code = gimple_assign_rhs_code (stmt_assign); 10120 // 检查语句是否为 VEC_COND_EXPR 10121 if (code == VEC_COND_EXPR) { 10122 worklist.safe_push (stmt); 10123 } 10124 } 10125 } 10126 } 10128 free (bbs); 10129 if (worklist.is_empty ()) 10130 return; 10131 10132 /* Loop has vec_cond_expr. */ 10133 while (!worklist.is_empty ()) 10134 { 10135 gimple *last, *last_store, *last1; 10136 edge e, efalse; 10137 tree mask; 10138 basic_block store_bb, join_bb; 10139 gimple_stmt_iterator gsi_to; 10140 gimple_stmt_iterator gsi_stmt_def; 10141 tree vdef, new_vdef; 10142 gphi *phi; 10143 tree vectype; 10144 tree zero; 10145 10146 last = worklist.pop (); 10147 gassign *stmt_assign = dyn_cast <gassign *> (last); 10148 mask = gimple_assign_rhs1(stmt_assign); 10149 tree true_vector_operand = gimple_assign_rhs2(stmt_assign); 10150 10151 gimple *mask_def = SSA_NAME_DEF_STMT (mask); 10152 10153 gimple *stmt_def = SSA_NAME_DEF_STMT (true_vector_operand); 10154 10155 bb = gimple_bb (stmt_def); 10156 10157 // bb_mask = gimple_bb (mask_def); 10158 /* Create then_bb and if-then structure in CFG, then_bb belongs to 10159 the same loop as if_bb. It could be different to LOOP when two 10160 level loop-nest is vectorized and mask_store belongs to the inner 10161 one. */ 10162 10163 gsi_stmt_def = gsi_for_stmt (stmt_def); 10164 gsi_next(&gsi_stmt_def); 10165 10166 stmt_def = gsi_stmt(gsi_stmt_def); 10167 10168 e = split_block (bb, stmt_def); 10169 bb_loop = bb->loop_father; 10170 // gcc_assert (loop == bb_loop || flow_loop_nested_p (loop, bb_loop)); 10171 join_bb = e->dest; 10172 store_bb = create_empty_bb (bb); 10173 add_bb_to_loop (store_bb, bb_loop); 10174 e->flags = EDGE_TRUE_VALUE; 10175 efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE); 10176 /* Put STORE_BB to likely part. */ 10177 efalse->probability = profile_probability::unlikely (); 10178 store_bb->count = efalse->count (); 10179 make_single_succ_edge (store_bb, join_bb, EDGE_FALLTHRU); 10180 if (dom_info_available_p (CDI_DOMINATORS)) 10181 set_immediate_dominator (CDI_DOMINATORS, store_bb, bb); 10182 if (dump_enabled_p ()) 10183 dump_printf_loc (MSG_NOTE, vect_location, 10184 "Create new block %d to sink vect cond expr", 10185 store_bb->index); 10186 /* Create vector comparison with boolean result. */ 10187 vectype = TREE_TYPE (mask); 10188 zero = build_zero_cst (vectype); 10189 stmt = gimple_build_cond (EQ_EXPR, mask, zero, NULL_TREE, NULL_TREE); 10190 // gsi = gsi_last_bb (bb); 10191 gsi = gsi_for_stmt (mask_def); 10192 gsi_next(&gsi); 10193 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); 10194 /* Create new PHI node for vdef of the last masked store: 10195 .MEM_2 = VDEF <.MEM_1> 10196 will be converted to 10197 .MEM.3 = VDEF <.MEM_1> 10198 and new PHI node will be created in join bb 10199 .MEM_2 = PHI <.MEM_1, .MEM_3> 10200 */ 10201 /* vdef = gimple_vdef (last); 10202 new_vdef = make_ssa_name (gimple_vop (cfun), last); 10203 gimple_set_vdef (last, new_vdef); 10204 phi = create_phi_node (vdef, join_bb); 10205 add_phi_arg (phi, new_vdef, EDGE_SUCC (store_bb, 0), UNKNOWN_LOCATION);*/ 10206 10207 /* Put all masked stores with the same mask to STORE_BB if possible. */ 10208 // while (true) 10209 // { 10210 gimple_stmt_iterator gsi_from; 10211 gimple *stmt1 = NULL; 10213 /* Move vec_cond second var def to STORE_BB. */ 10214 last_store = stmt_def; 10215 gsi = gsi_for_stmt (stmt_def); 10216 gsi_from = gsi; 10217 /* Shift GSI to the previous stmt for further traversal. */ 10218 gsi_prev (&gsi); 10219 gsi_to = gsi_start_bb (store_bb); 10220 gsi_move_before (&gsi_from, &gsi_to); 10221 /* Setup GSI_TO to the non-empty block start. */ 10222 gsi_to = gsi_start_bb (store_bb); 10223 if (dump_enabled_p ()) 10224 dump_printf_loc (MSG_NOTE, vect_location, 10225 "Move stmt to created bb\n%G", last); 10226 /* Move all stored value producers if possible. */ 10227 while (!gsi_end_p (gsi)) 10228 { 10229 tree lhs; 10230 imm_use_iterator imm_iter; 10231 use_operand_p use_p; 10232 bool res; 10233 10234 /* Skip debug statements. */ 10235 if (is_gimple_debug (gsi_stmt (gsi))) 10236 { 10237 gsi_prev (&gsi); 10238 continue; 10239 } 10240 stmt1 = gsi_stmt (gsi); 10241 /* Do not consider statements writing to memory or having 10242 volatile operand. */ 10243 if (gimple_vdef (stmt1) 10244 || gimple_has_volatile_ops (stmt1)) 10245 break; 10246 gsi_from = gsi; 10247 gsi_prev (&gsi); 10248 lhs = gimple_get_lhs (stmt1); 10249 if (!lhs) 10250 break; 10251 10252 /* LHS of vectorized stmt must be SSA_NAME. */ 10253 if (TREE_CODE (lhs) != SSA_NAME) 10254 break; 10255 10256 if (!VECTOR_TYPE_P (TREE_TYPE (lhs))) 10257 { 10258 /* Remove dead scalar statement. */ 10259 /* if (has_zero_uses (lhs)) 10260 { 10261 gsi_remove (&gsi_from, true); 10262 continue; 10263 }*/ 10264 } 10265 10266 /* Check that LHS does not have uses outside of STORE_BB. */ 10267 res = true; 10268 /* FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 10269 { 10270 gimple *use_stmt; 10271 use_stmt = USE_STMT (use_p); 10272 if (is_gimple_debug (use_stmt)) 10273 continue; 10274 if (gimple_bb (use_stmt) != store_bb) 10275 { 10276 res = false; 10277 break; 10278 } 10279 }*/ 10280 if (!res) 10281 break; 10282 10283 /* if (gimple_vuse (stmt1) 10284 && gimple_vuse (stmt1) != gimple_vuse (last_store)) 10285 break;*/ 10286 10287 /* Can move STMT1 to STORE_BB. */ 10288 if (dump_enabled_p ()) 10289 dump_printf_loc (MSG_NOTE, vect_location, 10290 "Move stmt to created bb\n%G", stmt1); 10291 gsi_move_before (&gsi_from, &gsi_to); 10292 /* Shift GSI_TO for further insertion. */ 10293 gsi_prev (&gsi_to); 10294 } 10295 /* Put other masked stores with the same mask to STORE_BB. */ 10296 /* if (worklist.is_empty () 10297 || gimple_call_arg (worklist.last (), 2) != mask 10298 || worklist.last () != stmt1) 10299 break; 10300 last = worklist.pop ();*/ 10301 // last1 = worklist.pop (); 10302 // } 10303 // add_phi_arg (phi, gimple_vuse (last_store), e, UNKNOWN_LOCATION); 10304 if (!worklist.is_empty ()) 10305 last = worklist.pop (); 10306 } 10307 } |
能够按照预期进行拆分bb块,同时解决编译不过的两个问题:
1:加上-g 之后,在fre pass 会报错,在对debug gimple 进行分析删除的时候,找不到某个标量的定义。 最后一个分支的标量gimple被直接删除了,没有生成debug gimple。导致后面debug gimple 使用到该标量是找不到其定义,报编译错误。解决方法,先去掉-g。后续在dce pass 中找删除标量和插入debug的逻辑。# DEBUG D#583 => D#597 ? _2164 : 0.0
2:在sink pass 中报编译错误,gimple_redirect_edge_and_branch函数中,assert不通过,需要该edge 是一个fallthru edge。在构造edge的时候需要生成。暂时注释掉。
default:6134 /* Otherwise it must be a fallthru edge, and we don't need to6135 do anything besides redirecting it. */6136 // gcc_assert (e->flags & EDGE_FALLTHRU); |
解决掉编译错误后,可以正确编译运行,但是结果错误。
原因是该loop 的 vf是8.每次会对loop 中的8个元素进行运算,计算mask的数据是double类型,会生成两个mask。每个分支需要对两个mask同时和{0,0,0,0}比较是否为0,目前只能进行一个mask的比较。可以的方法:
1:修改loop 中int 的类型使其在确定vf的时候将其作为double 看待(VIEW_CONVERT_EXPR),这样vf 是4, 就不存在两个mask。
2:gimple cond 不能支持这种if ( a==0 && b==0) 这种复杂条件表达,构造两个gimple cond。然后做&运算,将此条件作为需要判断的cond。
1761处循环:
1:在每个分支条件构造后插入两个mask按位或的gimple,并且以此新建一个gimple cond,作为分支判断的条件。
2:课题运算结果VE.查找原因。从打印每个分支运算结果来看,temp4的结果恒为0,即最后一个分支完全没有走到,存在问题,同时加上-g后报错,也是最后一个分支的标量被删除,怀疑最后分支在拆分的时候存在问题。(正确结果在源码中加打印中间结果,无法进行打印)。
Lhs use outside of BB。当其使用的outside BB是 VEC_COND 所在的BB认为是没问题的,其他情况需要进行添加phi节点操作
2中的stmt的 lhs res在4 里面被使用,原本在同一个bb里面不需要做额外的操作,当分到不同的bb后,走不走2 res的值会不同,如果不走4中用的res会使用上一次2中计算的res值,显然结果错误,需要添加phi节点来解决。
若2中的lhs res0 被 4 use ,需要在 2的上一个bb 1新建一个向量变量res1 = 0,在2 的下一个bb 3中,新建一个phi节点,res2 = phi<res1(1),res0(2)>, 并且将4中用到res0的地方改为res2。
若2中的lhs res0 被 4 use ,需要在 2的上一个bb 1新建一个向量变量res1 = 0,将2中的res0 = xx 修改为 res2 = xx,在2 的下一个bb 3中,新建一个phi节点,res0 = phi<res1(1),res2(2)>。
若2中的res0 2中的其他stmt使用到,则需要将所有用到res0的地方改成res2
对于多个分支都要进行计算的变量,可以将第二个分支直接用到此计算的地方,需要使用该计算的全部。在用到其的地方需要进行计算。
新增phi节点的代码
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 10283 { 10284 gimple *use_stmt; 10285 use_stmt = USE_STMT (use_p); 10286 if (is_gimple_debug (use_stmt)) 10287 continue; 10288 if (gimple_bb (use_stmt) != store_bb && gimple_bb (use_stmt) != gimple_bb (last)) 10289 { 10290 // res = false; 10291 10292 if (dump_enabled_p ()) 10293 dump_printf_loc (MSG_NOTE, vect_location, 10294 "LHS have use outside of store_BB\n%G", stmt1); 10295 tree lhs_use_out,new_lhs,new_lhs1,new_lhs2; 10296 tree new_lhs_phi; 10297 gphi *phi; 10298 tree vectype; 10299 tree zero; 10300 gimple *zero_def; 10301 lhs_use_out = gimple_assign_lhs(stmt1); 10302 10303 /* if (is_gimple_assign(stmt1)) { 10304 lhs_use_out = gimple_assign_lhs(stmt1); 10305 new_lhs = create_tmp_var(TREE_TYPE(lhs_use_out), "new_tmp_var"); 10306 new_lhs_phi = make_ssa_name(new_lhs,NULL); 10307 // gimple_assign_set_lhs(stmt1, new_lhs1); 10308 10309 10310 phi = create_phi_node (new_lhs_phi, join_bb); 10311 add_phi_arg (phi, lhs_use_out, EDGE_SUCC (store_bb, 0), UNKNOWN_LOCATION); 10312 10313 vectype = TREE_TYPE (lhs_use_out); 10314 zero = build_zero_cst (vectype); 10315 new_lhs1 = create_tmp_var(TREE_TYPE(lhs_use_out), "new_tmp_var1"); 10316 new_lhs2 = make_ssa_name(new_lhs1,NULL); 10317 zero_def = gimple_build_assign(new_lhs2, zero); 10318 10319 // basic_block stmt_bb = gimple_bb(stmt1); 10320 edge e_temp; 10321 edge_iterator ei; 10322 basic_block pred_bb; 10323 gimple_stmt_iterator gsi_temp; 10324 10325 // if (EDGE_COUNT(stmt_bb->preds) == 1) { 10326 e_temp = EDGE_PRED(store_bb, 0); 10327 pred_bb = e_temp->src; 10328 gsi_temp = gsi_start_bb(pred_bb); 10329 gsi_insert_before(&gsi_temp, zero_def, GSI_SAME_STMT); 10330 // } 10331 10332 add_phi_arg (phi, new_lhs2, e, UNKNOWN_LOCATION); 10333 // update_stmt (phi); 10334 10335 /* edge e_join; 10336 edge_iterator ei_join; 10337 10338 FOR_EACH_EDGE(e_join, ei_join, join_bb->succs) 10339 { 10340 if (EDGE_TRUE_P(e_join)) 10341 { 10342 *true_bb = e->dest; 10343 } 10344 }*/ 10345 10346 for (unsigned int i = 0; i < gimple_num_ops(use_stmt); i++) { 10347 tree rhs = gimple_op(use_stmt, i); 10348 if(rhs == lhs_use_out) { 10349 gimple_stmt_iterator gsi = gsi_for_stmt(use_stmt); 10350 gsi_insert_before (&gsi,stmt1,GSI_SAME_STMT); 10351 break; 10352 // create_new_def_for (rhs, phi,gimple_phi_result_ptr (phi)); 10353 // update_stmt (phi); 10354 } 10355 } 10356 // } |
2069处循环:
1:需要进行dim=3的常量传播,加上拆分循环这两个条件。验证前一个循环向量化后有7%的性能,加上ymm寄存器后有11%的性能。
2:查看gcc的loop split 和 loop distribute pass,发现loop distribute的总体思想是将能够向量化的代码最大限度拆分到一个循环中,(1)但其只对非嵌套循环的最内层循环分析,发现其dump的信息中没有对2069循环进行distribute。(2)同时其只能对没有数据依赖的部分distribute,源码有数据依赖的部分使用临时数组存储后进行拆分,需要自行编写代码实现。
549课题在mask store中涉及的运算上对数学函数添加mask代码
1 #include "config.h"2 #include "system.h"3 #include "coretypes.h"4 #include "backend.h"5 #include "tree.h"6 #include "gimple.h"7 #include "predict.h"8 #include "tree-pass.h"9 #include "ssa.h"10 #include "cgraph.h"11 #include "fold-const.h"12 #include "stor-layout.h"13 #include "gimple-iterator.h"14 #include "gimple-walk.h"15 #include "tree-ssa-loop-manip.h"16 #include "tree-ssa-loop-niter.h"17 #include "tree-cfg.h"18 #include "cfgloop.h"19 #include "tree-vectorizer.h"20 #include "tree-ssa-propagate.h"21 #include "dbgcnt.h"22 #include "tree-scalar-evolution.h"23 #include "stringpool.h"24 #include "attribs.h"25 #include "gimple-pretty-print.h"26 #include "opt-problem.h"27 #include "internal-fn.h"28 #include "tree-ssa-sccvn.h"29 #include "gimple-expr.h"30 #include <cstdio>31 32 namespace33 {34 const pass_data pass_data_test = {35 GIMPLE_PASS, /* type */36 "mask_vecmath_func", /* name */37 OPTGROUP_NONE, /* optinfo_flags */38 TV_TREE_VECT_MASK_VECMATH_FUNC, /* tv_id */39 (PROP_cfg | PROP_ssa), /* properties_required */40 0, /* properties_provided */41 0, /* properties_destroyed */42 0, /* todo_flags_start */43 0, /* todo_flags_finish */44 }; 45 46 class pass_mask_vecmath_func : public gimple_opt_pass47 {48 public:49 pass_mask_vecmath_func (gcc::context *ctxt) : gimple_opt_pass (pass_data_test, ctxt) {}50 virtual bool51 gate (function *fun)52 {53 // printf ("gate function noipa.\n");54 return flag_tree_mask_vecmath_func;55 }56 57 virtual unsigned int execute (function *);58 };59 60 61 static void add_mask_to_call(gimple *stmt, tree new_arg, const char *func_name) {62 if (!is_gimple_call(stmt)) {63 // 如果不是函数调用语句,则不做任何操作64 return;65 }66 67 // 获取原始函数调用的目标和参数列表68 tree call_fn = gimple_call_fndecl(stmt);69 70 // 获取或创建新的标识符节点来表示新的函数名称71 tree new_func_id;72 if(strcmp(func_name, "vmldCos2") == 0)73 new_func_id = get_identifier("__svml_cos2_mask_e9");74 else if (strcmp(func_name, "vmldExp2") == 0)75 new_func_id = get_identifier("__svml_exp2_mask_e9");76 else if (strcmp(func_name, "vmldSin2") == 0)77 new_func_id = get_identifier("__svml_sin2_mask_e9");78 else if (strcmp(func_name, "sin.simdclone.2") == 0)79 new_func_id = get_identifier("__svml_sin4_mask_e9");80 else if (strcmp(func_name, "cos.simdclone.2") == 0)81 new_func_id = get_identifier("__svml_cos4_mask_e9");82 else if (strcmp(func_name, "exp.simdclone.2") == 0)83 new_func_id = get_identifier("__svml_exp4_mask_e9");84 85 tree fntype = TREE_TYPE(call_fn);87 tree new_fndecl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, new_func_id, fntype);88 89 TREE_PUBLIC (new_fndecl) = 1;90 DECL_EXTERNAL (new_fndecl) = 1;91 DECL_IS_NOVOPS (new_fndecl) = 1;92 TREE_READONLY (new_fndecl) = 1;93 94 95 // 将新的标识符节点分配给函数声明的汇编名96 // DECL_ASSEMBLER_NAME(call_fn) = new_func_id;97 98 int num_args = gimple_call_num_args(stmt);99 vec<tree> vargs = vNULL; 100 vargs.create (num_args+1); 101 102 // 创建一个新的参数列表,包含原始的参数和新的参数 103 for (int i = 0; i < num_args; i++) { 104 tree arg = gimple_call_arg(stmt, i); 105 vargs.safe_push(arg); 106 } 107 vargs.safe_push(new_arg); 108 109 tree lhs = gimple_call_lhs(stmt); 110 111 // 创建新的函数调用语句,包含新的参数 112 gimple *new_call = gimple_build_call_vec(new_fndecl,vargs); 113 gimple_call_set_lhs (new_call, lhs); 114 115 // 替换原始的函数调用语句 116 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 117 118 // printf ("-------------finish add mask to vecmath func call------------.\n"); 119 120 gsi_replace(&gsi, new_call,true); 121 stmt = new_call; 122 123 // 释放参数列表的内存 124 vargs.release (); 125 } 126 127 static void find_relate_operand(tree operand, gimple *stmt, tree mask) 128 { 129 if (!stmt) 130 return ; 131 132 if (TREE_CODE (operand) == SSA_NAME && is_gimple_call(stmt)) { // operand is ssa && stmt is gimple call 133 tree fndecl = gimple_call_fndecl(stmt); // 获取函数声明 134 if (fndecl && DECL_P(fndecl)) { // 确保fndecl有效并且是一个声明 135 const char *func_name = IDENTIFIER_POINTER(DECL_NAME(fndecl)); // 获取函数名称 136 // if (strcmp(func_name, "vmldLn2") == 0) { 137 if (strcmp(func_name, "vmldCos2") == 0 || 138 strcmp(func_name, "vmldExp2") == 0 || 139 strcmp(func_name, "vmldSin2") == 0 || 140 strcmp(func_name, "exp.simdclone.2") == 0 || 141 strcmp(func_name, "cos.simdclone.2") == 0 || 142 strcmp(func_name, "sin.simdclone.2") == 0) { 143 // printf ("-------------find math func------------.\n"); 144 add_mask_to_call(stmt,mask,func_name); 145 return ; 146 } 147 } 148 } 149 if (TREE_CODE (operand) == SSA_NAME && is_gimple_assign(stmt)) { // only find gimple assign 150 151 for (unsigned i = 1; i < gimple_num_ops(stmt); ++i) { // get gimple assign right hand side operand 152 tree op = gimple_op(stmt, i); 153 if(TREE_CODE (op) == SSA_NAME) { 154 155 gimple *stmt_2 = SSA_NAME_DEF_STMT (op); 156 find_relate_operand(op,stmt_2,mask); 157 // if(result) return result; 158 } 159 } 160 } 161 return ; 162 } 163 164 165 unsigned 166 pass_mask_vecmath_func::execute (function *fun) 167 { 168 unsigned ret = 0; 169 170 basic_block bb; 171 enum tree_code code; 172 FOR_EACH_BB_FN(bb, fun) { 173 gimple_stmt_iterator gsi; 174 175 /* for (int i = 1; i < number_of_loops (fun); i++) 176 { 177 loop_vec_info loop_vinfo; 178 bool has_mask_store; 179 180 class loop *loop = get_loop (fun, i); 181 if (!loop || !loop->aux) 182 continue; 183 loop_vinfo = (loop_vec_info) loop->aux; 184 has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo); 185 delete loop_vinfo; 186 if (has_mask_store) { 187 188 printf ("-------------have mask store------------.\n"); 189 190 basic_block *bbs = get_loop_body (loop); 191 unsigned nbbs = loop->num_nodes; 192 unsigned i; 193 basic_block bb; 194 class loop *bb_loop; 195 gimple_stmt_iterator gsi; 196 gimple *stmt; 197 198 for (i = 0; i < nbbs; i++) 199 { 200 bb = bbs[i];*/ 201 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); 202 gsi_next (&gsi)) 203 { 204 gimple *stmt = gsi_stmt (gsi); 205 if (gimple_call_internal_p (stmt, IFN_MASK_STORE)) { 206 // printf ("------------ find mask store------------.\n"); 207 basic_block bb1 = gimple_bb(stmt); 208 tree mask = gimple_call_arg (stmt, 2); 209 tree value = gimple_call_arg (stmt, 3); 210 if(TREE_CODE (value) == SSA_NAME) { 211 gimple *value_def = SSA_NAME_DEF_STMT (value); 212 basic_block bb2 = gimple_bb(value_def); 213 // printf ("-------------begin find relate operand------------.\n"); 214 if(bb1 == bb2) // mask store and value def in same bb 215 find_relate_operand(value,value_def,mask); 216 } 217 } 218 } 219 220 // free (bbs); 221 } 222 // } 223 // } 224 225 return ret; 226 227 } 228 } 229 230 gimple_opt_pass * 231 make_pass_mask_vecmath_func (gcc::context *ctxt) 232 { 233 return new pass_mask_vecmath_func (ctxt); 234 } |
10092 10093 10094 void 10095 optimize_mask_vec_cond (class loop *loop) 10096 { 10097 basic_block *bbs = get_loop_body (loop); 10098 unsigned nbbs = loop->num_nodes; 10099 unsigned i; 10100 basic_block bb, bb_mask; 10101 class loop *bb_loop; 10102 gimple_stmt_iterator gsi; 10103 gimple *stmt; 10104 auto_vec<gimple *> worklist; 10105 auto_purge_vect_location sentinel; 10106 10107 enum tree_code code; 10108 10109 vect_location = find_loop_location (loop); 10110 /* Pick up all vec_cond_expr in loop if any. */ 10111 for (i = 0; i < nbbs; i++) 10112 { 10113 bb = bbs[i]; 10114 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); 10115 gsi_next (&gsi)) 10116 { 10117 stmt = gsi_stmt (gsi); 10118 if (is_gimple_assign(stmt)) { 10119 gassign *stmt_assign = dyn_cast <gassign *> (gsi_stmt (gsi)); 10120 code = gimple_assign_rhs_code (stmt_assign); 10121 // 检查语句是否为 VEC_COND_EXPR 10122 if (code == VEC_COND_EXPR) { 10123 worklist.safe_push (stmt); 10124 } 10125 } 10126 } 10127 } 10128 10129 free (bbs); 10130 if (worklist.is_empty () || worklist.length()==1) 10131 return; 10132 10133 /* Loop has vec_cond_expr. */ 10134 while (!worklist.is_empty ()) 10135 { 10136 gimple *last, *last_store, *last1; 10137 edge e, efalse; 10138 tree mask,mask2; 10139 basic_block store_bb, join_bb; 10140 gimple_stmt_iterator gsi_to; 10141 gimple_stmt_iterator gsi_stmt_def,gsi_mask_def; 10142 tree vdef, new_vdef; 10143 gphi *phi; 10144 tree vectype; 10145 tree zero_vector; 10146 10147 last = worklist.pop (); 10148 gassign *stmt_assign = dyn_cast <gassign *> (last); 10149 mask = gimple_assign_rhs1(stmt_assign); 10150 tree true_vector_operand = gimple_assign_rhs2(stmt_assign); 10151 10152 gimple *mask_def = SSA_NAME_DEF_STMT (mask); 10153 10154 gsi_mask_def = gsi_for_stmt(mask_def); 10155 gsi_prev(&gsi_mask_def); 10156 gimple *mask2_def = gsi_stmt(gsi_mask_def); 10157 gassign *stmt_mask2 = dyn_cast <gassign *> (mask2_def); 10158 mask2 = gimple_assign_lhs(stmt_mask2); 10159 10160 10161 gimple *stmt_def = SSA_NAME_DEF_STMT (true_vector_operand); 10162 10163 bb = gimple_bb (stmt_def); 10164 10165 /* Create then_bb and if-then structure in CFG, then_bb belongs to 10166 the same loop as if_bb. It could be different to LOOP when two 10167 level loop-nest is vectorized and mask_store belongs to the inner 10168 one. */ 10169 10170 gsi_stmt_def = gsi_for_stmt (stmt_def); 10171 gsi_next(&gsi_stmt_def); 10172 10173 stmt_def = gsi_stmt(gsi_stmt_def); 10174 10175 e = split_block (bb, stmt_def); 10176 bb_loop = bb->loop_father; 10177 // gcc_assert (loop == bb_loop || flow_loop_nested_p (loop, bb_loop)); 10178 join_bb = e->dest; 10179 store_bb = create_empty_bb (bb); 10180 add_bb_to_loop (store_bb, bb_loop); 10181 e->flags = EDGE_TRUE_VALUE; 10182 efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE); 10183 /* Put STORE_BB to likely part. */ 10184 efalse->probability = profile_probability::unlikely (); 10185 store_bb->count = efalse->count (); 10186 make_single_succ_edge (store_bb, join_bb, EDGE_FALLTHRU); 10187 if (dom_info_available_p (CDI_DOMINATORS)) 10188 set_immediate_dominator (CDI_DOMINATORS, store_bb, bb); 10189 if (dump_enabled_p ()) 10190 dump_printf_loc (MSG_NOTE, vect_location, 10191 "Create new block %d to sink vect cond expr", 10192 store_bb->index); 10193 /* Create vector comparison with boolean result. */ 10194 vectype = TREE_TYPE (mask); 10195 zero_vector = build_zero_cst (vectype); 10196 10197 tree combined_mask = create_tmp_var(TREE_TYPE(zero_vector), "combined_mask"); 10198 10199 gimple *combine_stmt1 = gimple_build_assign(combined_mask, BIT_IOR_EXPR, mask, mask2); 10200 10201 gsi = gsi_for_stmt (mask_def); 10202 gsi_next(&gsi); 10203 gsi_insert_after (&gsi, combine_stmt1, GSI_SAME_STMT); 10204 10205 /* vec<constructor_elt, va_gc> *ret_ctor_elts_tmp = NULL; 10206 vec_alloc (ret_ctor_elts_tmp, 2); 10207 CONSTRUCTOR_APPEND_ELT(ret_ctor_elts_tmp, NULL_TREE, mask2); // 添加第二个左子树 10208 CONSTRUCTOR_APPEND_ELT(ret_ctor_elts_tmp, NULL_TREE, mask); // 添加第一个左子树 10209 10210 // tree signed_boolean_type = build_nonstandard_integer_type(64, 1); 10211 tree signed_boolean_type = build_nonstandard_boolean_type(64); 10212 10213 tree vect_type = build_vector_type(signed_boolean_type, 4); 10214 tree constructor = build_constructor(vect_type, ret_ctor_elts_tmp); 10215 10216 tree new_var_constru = create_tmp_var(vect_type, "mask_array"); 10217 gimple *new_stmt_construc = gimple_build_assign(make_ssa_name(new_var_constru), constructor); 10218 gsi_next(&gsi); 10219 gsi_insert_after (&gsi, new_stmt_construc, GSI_SAME_STMT);*/ 10220 10221 gimple *gcond = gimple_build_cond(EQ_EXPR, combined_mask, zero_vector, NULL, NULL); 10222 gsi_next(&gsi); 10223 gsi_insert_after(&gsi, gcond, GSI_NEW_STMT); 10224 10225 10226 /* Put all masked stores with the same mask to STORE_BB if possible. */ 10227 // while (true) 10228 // { 10229 gimple_stmt_iterator gsi_from; 10230 gimple *stmt1 = NULL; 10231 10232 /* Move vec_cond second var def to STORE_BB. */ 10233 last_store = stmt_def; 10234 gsi = gsi_for_stmt (stmt_def); 10235 gsi_from = gsi; 10236 /* Shift GSI to the previous stmt for further traversal. */ 10237 gsi_prev (&gsi); 10238 gsi_to = gsi_start_bb (store_bb); 10239 gsi_move_before (&gsi_from, &gsi_to); 10240 /* Setup GSI_TO to the non-empty block start. */ 10241 gsi_to = gsi_start_bb (store_bb); 10242 if (dump_enabled_p ()) 10243 dump_printf_loc (MSG_NOTE, vect_location, 10244 "Move stmt to created bb\n%G", last); 10245 /* Move all stored value producers if possible. */ 10246 while (!gsi_end_p (gsi)) 10247 { 10248 tree lhs; 10249 imm_use_iterator imm_iter; 10250 use_operand_p use_p; 10251 bool res; 10252 10253 /* Skip debug statements. */ 10254 if (is_gimple_debug (gsi_stmt (gsi))) 10255 { 10256 gsi_prev (&gsi); 10257 continue; 10258 } 10259 stmt1 = gsi_stmt (gsi); 10260 /* Do not consider statements writing to memory or having 10261 volatile operand. */ 10262 if (gimple_vdef (stmt1) 10263 || gimple_has_volatile_ops (stmt1)) 10264 break; 10265 gsi_from = gsi; 10266 gsi_prev (&gsi); 10267 lhs = gimple_get_lhs (stmt1); 10268 if (!lhs) 10269 break; 10270 10271 /* LHS of vectorized stmt must be SSA_NAME. */ 10272 if (TREE_CODE (lhs) != SSA_NAME) 10273 break; 10274 10275 if (!VECTOR_TYPE_P (TREE_TYPE (lhs))) 10276 { 10277 /* Remove dead scalar statement. */ 10278 if (has_zero_uses (lhs)) 10279 { 10280 gsi_remove (&gsi_from, true); 10281 continue; 10282 } 10283 } 10284 10285 /* Check that LHS does not have uses outside of STORE_BB. */ 10286 res = true; 10287 // FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 10288 gimple *use_lhs; 10289 FOR_EACH_IMM_USE_STMT (use_lhs, imm_iter, lhs) 10290 { 10291 gimple *use_stmt; 10292 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) { 10293 10294 // gimple *use_stmt; 10295 use_stmt = USE_STMT (use_p); 10296 if (is_gimple_debug (use_stmt)) 10297 continue; 10298 if (gimple_bb (use_stmt) != store_bb && gimple_bb (use_stmt) != gimple_bb (last)) 10299 { 10300 // res = false; 10301 10302 if (dump_enabled_p ()) 10303 dump_printf_loc (MSG_NOTE, vect_location, 10304 "LHS have use outside of store_BB\n%G", stmt1); 10305 tree new_lhs,new_lhs1,new_lhs2; 10306 tree new_lhs_phi; 10307 gphi *phi; 10308 tree vectype; 10309 tree zero; 10310 gimple *zero_def; 10311 10312 gimple *new_assign_stmt; 10313 10314 if (is_gimple_assign(stmt1) && is_gimple_assign(use_lhs)) { 10315 for (unsigned int i = 1; i < gimple_num_ops(use_stmt); i++) { 10316 tree rhs = gimple_op(use_stmt, i); 10317 if(TREE_CODE (rhs) == SSA_NAME && (rhs == lhs)) { 10318 10319 if (dump_enabled_p ()) 10320 dump_printf_loc (MSG_NOTE, vect_location, 10321 "insert new stmt to use out of BB\n"); 10322 new_lhs = create_tmp_var(TREE_TYPE(lhs), "new_tmp_var"); 10323 new_lhs1 = make_ssa_name(new_lhs,NULL); 10324 tree rhs1 = gimple_assign_rhs1(stmt1); 10325 tree rhs2 = gimple_assign_rhs2(stmt1); 10326 new_assign_stmt = gimple_build_assign(new_lhs1, gimple_assign_rhs_code(stmt1), rhs1, rhs2); 10327 10328 gimple_stmt_iterator gsi_temp = gsi_for_stmt(use_stmt); 10329 gsi_insert_before (&gsi_temp,new_assign_stmt,GSI_SAME_STMT); 10330 update_stmt(new_assign_stmt); 10331 10332 if( i == 1) { 10333 10334 gimple_assign_set_rhs1(use_stmt, new_lhs1); 10335 // update_stmt(use_stmt); 10336 } 10337 else if (i == 2) { 10338 gimple_assign_set_rhs2(use_stmt, new_lhs1); 10339 // update_stmt(use_stmt); 10340 } 10341 10342 // update_stmt(use_stmt); 10343 } 10344 } 10345 } 10346 } 10347 } 10348 10349 update_stmt(use_stmt); 10350 } 10351 10352 /* Can move STMT1 to STORE_BB. */ 10353 /* if (dump_enabled_p ()) 10354 dump_printf_loc (MSG_NOTE, vect_location, 10355 "Move stmt to created bb\n%G", stmt1);*/ 10356 gsi_move_before (&gsi_from, &gsi_to); 10357 /* Shift GSI_TO for further insertion. */ 10358 gsi_prev (&gsi_to); 10359 } 10360 if (!worklist.is_empty ()) 10361 last = worklist.pop (); 10362 } 10363 10364 } |
对 if continue的分块
10161 /* if(worklist.length()== 1) { 10162 if (dump_enabled_p ()) 10163 dump_printf_loc (MSG_NOTE, vect_location, 10164 " if-continue split bb\n"); 10165 tree mask_tmp2 = gimple_assign_rhs2(stmt_mask2); 10166 tree mask_tmp1 = gimple_assign_rhs2(stmt_mask1); 10167 10168 gimple *mask_temp2_def = SSA_NAME_DEF_STMT (mask_tmp2); 10169 gimple *mask_temp1_def = SSA_NAME_DEF_STMT (mask_tmp1); 10170 10171 gassign *stmt_mask_tmp2 = dyn_cast <gassign *> (mask_temp2_def); 10172 gassign *stmt_mask_tmp1 = dyn_cast <gassign *> (mask_temp1_def); 10173 10174 tree temp2_rhs1 = gimple_assign_rhs1(stmt_mask_tmp2); 10175 tree temp1_rhs1 = gimple_assign_rhs1(stmt_mask_tmp1); 10176 10177 tree target_mask3 = gimple_assign_lhs(stmt_mask_tmp2); 10178 tree target_mask4 = gimple_assign_lhs(stmt_mask_tmp1); 10179 10180 tree temp2_rhs2 = gimple_assign_rhs2(stmt_mask_tmp2); 10181 tree temp1_rhs2 = gimple_assign_rhs2(stmt_mask_tmp1); 10182 10183 gimple *target_stmt1 = SSA_NAME_DEF_STMT (temp2_rhs1); 10184 gimple *target_stmt2 = SSA_NAME_DEF_STMT (temp1_rhs1); 10185 10186 gassign *stmt_target_stmt1 = dyn_cast <gassign *> (target_stmt1); 10187 gassign *stmt_target_stmt2 = dyn_cast <gassign *> (target_stmt2); 10188 10189 tree target_mask1 = gimple_assign_lhs(stmt_target_stmt1); 10190 tree target_mask2 = gimple_assign_lhs(stmt_target_stmt2); 10191 10192 10193 gimple *target_stmt3 = SSA_NAME_DEF_STMT (temp2_rhs2); 10194 gimple *target_stmt4 = SSA_NAME_DEF_STMT (temp1_rhs2); 10195 10196 basic_block bb_tmp = gimple_bb (target_stmt1); 10197 basic_block bb_tmp_next = gimple_bb (target_stmt4); 10198 edge e_tmp; 10199 gimple_stmt_iterator target_stmt4_gsi = gsi_for_stmt(mask_temp1_def); 10200 gsi_next(&target_stmt4_gsi); 10201 gimple *target_stmt4_next = gsi_stmt(target_stmt4_gsi); 10202 10203 gimple_stmt_iterator target_stmt2_gsi = gsi_for_stmt(target_stmt2); 10204 gsi_next(&target_stmt2_gsi); 10205 gimple *target_stmt2_next = gsi_stmt(target_stmt2_gsi); 10206 10207 e_tmp = split_block (bb_tmp, target_stmt4_next); 10208 class loop *bb_loop_tmp = bb_tmp->loop_father; 10209 gcc_assert (loop == bb_loop_tmp || flow_loop_nested_p (loop, bb_loop_tmp)); 10210 10211 basic_block bb_last_tmp = gimple_bb(last); 10212 basic_block join_bb_tmp; 10213 gimple *last_stmt_tmp = last_stmt(bb_last_tmp); 10214 if (last_stmt_tmp && gimple_code(last_stmt_tmp) == GIMPLE_COND) { 10215 10216 edge e_tmp2; 10217 edge_iterator ei_tmp2; 10218 basic_block true_bb; 10219 10220 FOR_EACH_EDGE(e_tmp2, ei_tmp2, bb_last_tmp->succs) { 10221 // 检查是否为 true 分支 10222 if (e_tmp2->flags & EDGE_TRUE_VALUE) { 10223 true_bb = e_tmp2->dest; 10224 } 10225 } 10226 join_bb_tmp = e_tmp->dest; 10227 basic_block store_bb_tmp = create_empty_bb (bb_tmp); 10228 add_bb_to_loop (store_bb_tmp, bb_loop_tmp); 10229 // e_tmp->flags = EDGE_TRUE_VALUE; 10230 10231 edge efalse_tmp_true = make_edge (bb_tmp, bb_last_tmp, EDGE_TRUE_VALUE); 10232 /* Put STORE_BB to likely part. */ 10233 /* efalse_tmp_true->probability = profile_probability::likely (); 10234 store_bb_tmp->count = efalse_tmp_true->count (); 10235 10236 edge efalse_tmp = make_edge (bb_tmp, store_bb_tmp, EDGE_FALSE_VALUE); 10237 /* Put STORE_BB to likely part. */ 10238 /* efalse_tmp->probability = profile_probability::unlikely (); 10239 store_bb_tmp->count = efalse_tmp->count (); 10240 // make_single_succ_edge (store_bb_tmp, join_bb_tmp, EDGE_FALLTHRU); 10241 10242 edge efalse_tmp_next = make_edge (store_bb_tmp, join_bb_tmp, EDGE_FALSE_VALUE); 10243 efalse_tmp_next->probability = profile_probability::unlikely (); 10244 // store_bb_tmp->count = efalse_tmp_true->count (); 10245 10246 edge etrue_tmp_next = make_edge (store_bb_tmp, bb_last_tmp, EDGE_TRUE_VALUE); 10247 etrue_tmp_next->probability = profile_probability::likely (); 10248 store_bb_tmp->count = efalse_tmp_true->count (); 10249 // true_bb = e_tmp->dest; 10250 10251 // e_tmp->dest = NULL; 10252 // e_tmp->flags = EDGE_TRUE_VALUE; 10253 10254 edge e_dele = find_edge(bb_tmp, join_bb_tmp); 10255 if (e_dele) { 10256 remove_edge(e_dele); // 删除这条边 10257 } 10258 10259 // true_bb->preds = chainon(true_bb->preds, e_tmp); 10260 add_to_dominance_info(CDI_DOMINATORS,join_bb_tmp); 10261 10262 if (dom_info_available_p (CDI_DOMINATORS)) { 10263 set_immediate_dominator (CDI_DOMINATORS, store_bb_tmp, bb_tmp); 10264 set_immediate_dominator (CDI_DOMINATORS, join_bb_tmp, store_bb_tmp); 10265 set_immediate_dominator (CDI_DOMINATORS, bb_last_tmp, bb_tmp); 10266 // free_dominance_info(CDI_DOMINATORS); 10267 calculate_dominance_info(CDI_DOMINATORS); 10268 } 10269 10270 // free_dominance_info(CDI_DOMINATORS); 10271 // calculate_dominance_info(CDI_DOMINATORS); 10272 10273 tree vectype_tmp = TREE_TYPE (mask_tmp1); 10274 tree zero_vector_tmp = build_zero_cst (vectype_tmp); 10275 10276 tree combined_mask_tmp = create_tmp_var(TREE_TYPE(zero_vector_tmp), "combined_mask_ifconti"); 10277 10278 tree combined_mask_tmp2 = create_tmp_var(TREE_TYPE(zero_vector_tmp), "combined_mask_ifconti2"); 10279 10280 gimple *combine_stmt1_tmp = gimple_build_assign(combined_mask_tmp, BIT_IOR_EXPR, target_mask1, target_mask2); 10281 10282 gimple *combine_stmt1_tmp2 = gimple_build_assign(combined_mask_tmp2, BIT_IOR_EXPR, target_mask3, target_mask4); 10283 10284 gimple_stmt_iterator gsi_tmp = gsi_for_stmt (target_stmt2); 10285 gsi_next(&gsi_tmp); 10286 gsi_insert_after (&gsi_tmp, combine_stmt1_tmp, GSI_SAME_STMT); 10287 10288 gimple_stmt_iterator gsi_tmp_next_if = gsi_last_bb (store_bb_tmp); 10289 // gsi_prev(&gsi_tmp_next_if); 10290 gsi_insert_before (&gsi_tmp_next_if, combine_stmt1_tmp2, GSI_SAME_STMT); 10291 10292 gimple *gcond_tmp = gimple_build_cond(EQ_EXPR, combined_mask_tmp, zero_vector_tmp, NULL, NULL); 10293 gsi_next(&gsi_tmp); 10294 gsi_insert_after(&gsi_tmp, gcond_tmp, GSI_NEW_STMT); 10295 10296 gimple *gcond_tmp_next = gimple_build_cond(EQ_EXPR, combined_mask_tmp2, zero_vector_tmp, NULL, NULL); 10297 // gsi_next(&gsi_tmp_next_if); 10298 gsi_insert_before(&gsi_tmp_next_if, gcond_tmp_next, GSI_NEW_STMT); 10299 10300 // calculate_dominance_info(CDI_DOMINATORS); 10301 10302 gimple_stmt_iterator gsi_from_tmp; 10303 gimple *stmt1 = NULL; 10304 10305 /* Move vec_cond second var def to STORE_BB. */ 10306 /* gimple *last_store = target_stmt4_next; 10307 gimple_stmt_iterator gsi_tmp4 = gsi_for_stmt (target_stmt4_next); 10308 gsi_from_tmp = gsi_tmp4; 10309 /* Shift GSI to the previous stmt for further traversal. */ 10310 /* gsi_prev (&gsi_tmp4); 10311 gimple_stmt_iterator gsi_to_tmp = gsi_start_bb (store_bb_tmp); 10312 gsi_move_before (&gsi_from_tmp, &gsi_to_tmp); 10313 /* Setup GSI_TO to the non-empty block start. */ 10314 /* gsi_to_tmp = gsi_start_bb (store_bb_tmp); 10315 if (dump_enabled_p ()) 10316 dump_printf_loc (MSG_NOTE, vect_location, 10317 "Move if-continue stmt to created bb\n%G", last); 10318 /* Move all stored value producers if possible. */ 10319 /* while (!gsi_end_p (gsi_tmp4)) { 10320 10321 tree lhs; 10322 imm_use_iterator imm_iter; 10323 use_operand_p use_p; 10324 bool res; 10325 10326 /* Skip debug statements. */ 10327 /* if (is_gimple_debug (gsi_stmt (gsi_tmp4))) 10328 { 10329 gsi_prev (&gsi_tmp4); 10330 continue; 10331 } 10332 stmt1 = gsi_stmt (gsi_tmp4); 10333 /* Do not consider statements writing to memory or having 10334 volatile operand. */ 10335 /* if (gimple_vdef (stmt1) || gimple_has_volatile_ops (stmt1)) 10336 break; 10337 gsi_from_tmp = gsi_tmp4; 10338 gsi_prev (&gsi_tmp4); 10339 lhs = gimple_get_lhs (stmt1); 10340 if (!lhs) 10341 break; 10342 10343 /* LHS of vectorized stmt must be SSA_NAME. */ 10344 /* if (TREE_CODE (lhs) != SSA_NAME) 10345 break; 10346 10347 if (!VECTOR_TYPE_P (TREE_TYPE (lhs))) 10348 { 10349 /* Remove dead scalar statement. */ 10350 /* if (has_zero_uses (lhs)) 10351 { 10352 gsi_remove (&gsi_from_tmp, true); 10353 continue; 10354 } 10355 } 10356 10357 gsi_move_before (&gsi_from_tmp, &gsi_to_tmp); 10358 /* Shift GSI_TO for further insertion. */ 10359 /* gsi_prev (&gsi_to_tmp); 10360 } 10361 } 10362 }*/ |
当vf 是4的时候,进行mask的合并,以及将合并后的mask加入到数学函数里面
mask合并代码
10410 vec<constructor_elt, va_gc> *ret_ctor_elts_tmp = NULL; 10411 vec_alloc (ret_ctor_elts_tmp, 2); 10412 CONSTRUCTOR_APPEND_ELT(ret_ctor_elts_tmp, NULL_TREE, mask2); // 添加第二个左子树 10413 CONSTRUCTOR_APPEND_ELT(ret_ctor_elts_tmp, NULL_TREE, mask); // 添加第一个左子树 10414 10415 // tree signed_boolean_type = build_nonstandard_integer_type(64, 1); 10416 tree signed_boolean_type = build_nonstandard_boolean_type(64); 10417 10418 tree vect_type = build_vector_type(signed_boolean_type, 4); 10419 tree constructor = build_constructor(vect_type, ret_ctor_elts_tmp); 10420 10421 tree new_var_constru = create_tmp_var(vect_type, "mask_array"); 10422 gimple *new_stmt_construc = gimple_build_assign(make_ssa_name(new_var_constru), constructor); 10423 gsi_next(&gsi); 10424 gsi_insert_after (&gsi, new_stmt_construc, GSI_SAME_STMT); |
将合并后的mask加入到数学函数里面
195 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mask_operand) 196 { 197 gimple *use_stmt; 198 use_stmt = USE_STMT (use_p); 199 if(is_gimple_assign(use_stmt)) { 200 tree rhs1_tmp1 = gimple_assign_rhs1(use_stmt); 201 if (TREE_CODE(rhs1_tmp1) == CONSTRUCTOR) { 202 tree lhs_tmp1 = gimple_assign_lhs(use_stmt); 203 if(stmt_vecmath) 204 add_mask_to_call(stmt_vecmath,lhs_tmp1); 205 } 206 } 207 } |
oneapi的cfg图
在移动的过程中如果store bb的中的LHS在 除了store bb外的其他bb中被使用,则需要重新计算
10490 /* Check that LHS does not have uses outside of STORE_BB. */ 10491 res = true; 10492 // FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 10493 gimple *use_lhs; 10494 FOR_EACH_IMM_USE_STMT (use_lhs, imm_iter, lhs) 10495 { 10496 gimple *use_stmt; 10497 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) { 10498 10499 // gimple *use_stmt; 10500 use_stmt = USE_STMT (use_p); 10501 if (is_gimple_debug (use_stmt)) 10502 continue; 10503 if (gimple_bb (use_stmt) != store_bb && gimple_bb (use_stmt) != gimple_bb (last)) 10504 { 10505 // res = false; 10506 10507 if (dump_enabled_p ()) 10508 dump_printf_loc (MSG_NOTE, vect_location, 10509 "LHS have use outside of store_BB\n%G", stmt1); 10510 tree new_lhs,new_lhs1,new_lhs2; 10511 tree new_lhs_phi; 10512 gphi *phi; 10513 tree vectype; 10514 tree zero; 10515 gimple *zero_def; 10516 10517 gimple *new_assign_stmt; 10518 10519 if (is_gimple_assign(stmt1) && is_gimple_assign(use_lhs)) { 10520 for (unsigned int i = 1; i < gimple_num_ops(use_stmt); i++) { 10521 tree rhs = gimple_op(use_stmt, i); 10522 if(TREE_CODE (rhs) == SSA_NAME && (rhs == lhs)) { 10523 10524 if (dump_enabled_p ()) 10525 dump_printf_loc (MSG_NOTE, vect_location, 10526 "insert new stmt to use out of BB\n"); 10527 new_lhs = create_tmp_var(TREE_TYPE(lhs), "new_tmp_var"); 10528 new_lhs1 = make_ssa_name(new_lhs,NULL); 10529 tree rhs1 = gimple_assign_rhs1(stmt1); 10530 tree rhs2 = gimple_assign_rhs2(stmt1); 10531 new_assign_stmt = gimple_build_assign(new_lhs1, gimple_assign_rhs_code(stmt1), rhs1, rhs2); 10532 10533 gimple_stmt_iterator gsi_temp = gsi_for_stmt(use_stmt); 10534 gsi_insert_before (&gsi_temp,new_assign_stmt,GSI_SAME_STMT); 10535 update_stmt(new_assign_stmt); 10536 10537 if( i == 1) { 10538 10539 gimple_assign_set_rhs1(use_stmt, new_lhs1); 10540 // update_stmt(use_stmt); 10541 } 10542 else if (i == 2) { 10543 gimple_assign_set_rhs2(use_stmt, new_lhs1); 10544 // update_stmt(use_stmt); 10545 } 10546 10547 // update_stmt(use_stmt); 10548 } 10549 } 10550 } 10551 } 10552 } 10553 10554 update_stmt(use_stmt); 10555 } */ |
消除同一个reduction 在loop 中使用多次
# temp_value.920_2824 = PHI <tmp_var.921_2823(234), 0.0(279)> 48420 # temp_value.923_2821 = PHI <tmp_var.924_2820(234), 0.0(279)> 48421 # temp_value.926_2814 = PHI <tmp_var.927_2813(234), 0.0(279)> 48422 # temp_value.929_2807 = PHI <tmp_var.930_2806(234), 0.0(279)> 48423 # temp_value.932_2800 = PHI <tmp_var.933_2798(234), 0.0(279)>_ifc__2843 = _3089 ? _2132 : 0.0; 48574 tmp_var.927_2813 = _ifc__2843 + temp_value.926_2814; 48575 _ifc__2842 = _3084 ? _2145 : 0.0; 48576 tmp_var.930_2806 = _ifc__2842 + temp_value.929_2807; 48577 _ifc__2841 = _3192 ? _2085 : 0.0; 48578 tmp_var.921_2823 = _ifc__2841 + temp_value.920_2824; 48579 _ifc__2840 = _3172 ? _2101 : 0.0; 48580 tmp_var.933_2798 = _ifc__2840 + temp_value.932_2800; 48581 _ifc__2839 = _3161 ? _2113 : 0.0; 48582 tmp_var.924_2820 = _ifc__2839 + temp_value.923_2821;# tmp_sumi.922_2822 = PHI <tmp_var.921_2823(83), 0.0(81), 0.0(276)> 48880 # tmp_sumi.925_2816 = PHI <tmp_var.924_2820(83), 0.0(81), 0.0(276)> 48881 # tmp_sumi.928_2809 = PHI <tmp_var.927_2813(83), 0.0(81), 0.0(276)> 48882 # tmp_sumi.931_2805 = PHI <tmp_var.930_2806(83), 0.0(81), 0.0(276)> 48883 # tmp_sumi.934_2793 = PHI <tmp_var.933_2798(83), 0.0(81), 0.0(276)>_2752 = tmp_sumi.922_2822 + tmp_sumi.925_2816; 48885 _2750 = _2752 + tmp_sumi.928_2809; 48886 _2747 = _2750 + tmp_sumi.931_2805; 48887 _2746 = _2747 + tmp_sumi.934_2793;_2156 = ri1i_2025 + _2746; 48931 _2163 = _2160 * _2746; |
1761 for (k = 0; k < lpears[i] + upears[i]; k++) { 1762 1763 if (pearlist[i] == NULL) { 1764 fprintf(nabout, 1765 "NULL pair list entry in egb loop 1, taskid = %d\n", 1766 mytaskid); 1767 fflush(nabout); 1768 } 1769 j = pearlist[i][k]; 1770 1771 xij = xi - x[dim * j]; 1772 yij = yi - x[dim * j + 1]; 1773 zij = zi - x[dim * j + 2]; 1774 r2 = xij * xij + yij * yij + zij * zij; 1775 1776 if (dim == 4) { // delete 1777 wij = wi - x[dim * j + 3]; 1778 r2 += wij * wij; 1779 } 1780 1781 if (r2 > rgbmaxpsmax2) // %hir.cmp.4310 ule 1782 continue; 1783 dij1i = 1.0 / sqrt(r2); 1784 dij = r2 * dij1i; 1785 sj = fs[j] * (rborn[j] - BOFFSET); // select fast 1786 sj2 = sj * sj; 1787 1788 /* 1789 * ---following are from the Appendix of Schaefer and Froemmel, 1790 * JMB 216:1045-1066, 1990; Taylor series expansion for d>>s 1791 * is by Andreas Svrcek-Seiler; smooth rgbmax idea is from 1792 * Andreas Svrcek-Seiler and Alexey Onufriev. 1793 */ 1794 1795 if (dij > rgbmax + sj) // rgbmax = 20; %hir.cmp.4333 ule 1796 continue; 1797 1798 if ((dij > rgbmax - sj)) { // %hir.cmp.4349 ogt 1799 uij = 1. / (dij - sj); 1800 sumi -= 0.125 * dij1i * (1.0 + 2.0 * dij * uij + 1801 rgbmax2i * (r2 - 1802 4.0 * rgbmax * 1803 dij - sj2) + 1804 2.0 * log((dij - sj) * rgbmax1i)); 1805 1806 } else if (dij > 4.0 * sj) { 1807 dij2i = dij1i * dij1i; 1808 tmpsd = sj2 * dij2i; 1809 dumbo = 1810 TA + tmpsd * (TB + 1811 tmpsd * (TC + 1812 tmpsd * (TD + tmpsd * TDD))); 1813 sumi -= sj * tmpsd * dij2i * dumbo; 1814 1815 } else if (dij > ri + sj) { 1816 sumi -= 0.5 * (sj / (r2 - sj2) + 1817 0.5 * dij1i * log((dij - sj) / (dij + sj))); 1818 1819 } else if (dij > fabs(ri - sj)) { 1820 theta = 0.5 * ri1i * dij1i * (r2 + ri * ri - sj2); 1821 uij = 1. / (dij + sj); 1822 sumi -= 0.25 * (ri1i * (2. - theta) - uij + 1823 dij1i * log(ri * uij)); 1824 1825 } else if (ri < sj) { 1826 sumi -= 0.5 * (sj / (r2 - sj2) + 2. * ri1i + 1827 0.5 * dij1i * log((sj - dij) / (sj + dij))); 1828 1829 } 1830 1831 } |
1:if fprintf 分析不出内存关系,无法ifcvt。(lim pass 其无法外提也是因为fprintf中内存关系无法分析)
解决:将其外提到最内层循环外面。
2 : dim常量传播 (ipa-cp pass)
mme → mme34 → egb
dim 作为全局变量无法常量传播,作为函数参数的时候可以传播到。
解决:新建一个pass,识别全局变量(当其没有作为函数传参时)和函数调用关系,在函数调用的地方将变量替换为常量值。(pass 的位置?是否有参数能解决)
根据inline pass debug的信息,发现mme34无法inline进mme 原因是--param early-inlining-insns= 值过小,将此值调大,可以成功inline。
inline 过后
;; basic block 2, loop depth 0, count 27580514 (estimated locally), maybe hot74798 ;; prev block 0, next block 3, flags: (NEW, REACHABLE, VISITED)74799 ;; pred: ENTRY [always] count:27580514 (estimated locally) (FALLTHRU,EXECUTABLE)74800 # .MEM_2325 = VDEF <.MEM_2324(D)>74801 dim.lto_priv.0D.4751 = 3;74802 # VUSE <.MEM_2325>basic block 96, loop depth 2, count 954868629 (estimated locally), maybe hot77095 ;; prev block 95, next block 97, flags: (NEW, REACHABLE, VISITED)77096 ;; pred: 94 [82.6% (guessed)] count:788435027 (estimated locally) (FALSE_VALUE,EXECUTABLE)77097 ;; 95 [always] count:166433602 (estimated locally) (FALLTHRU,EXECUTABLE)_698 = dim.lto_priv.0D.4751;77112 _699 = j_697 * _698;if (_698 == 4)77146 goto <bb 97>; [34.00%]77147 else77148 goto <bb 98>; [66.00%] |
怀疑是mme34函数中其他部分的代码,影响了其做常量传播的分析,注释掉mme34函数中的部分代码,发现其能够做到常量将dim =3 作为常量。
_77 = j_76 * 3; |
但是需要同时注释掉的内容较多,无法准确找到哪部分代码影响了传播,以及这部分代码的特性。
写了一个例子发现其静态全局变量可以成功作为常量计算,怀疑是mme34函数中的其他部分,影响到dim的常量传播。
1 #include<stdio.h>2 #include<math.h>3 #include<stdlib.h>456 static int threshold = 5;78 static inline int check_value1(int x) {9 if(threshold < 20)10 return x*threshold;11 else return threshold;12 }1314 static inline int check_value2(int x) {15 if(threshold < 5)16 return x+threshold;17 else return threshold;18 }19 static inline int check_value3(int x) {20 threshold = 10;21 return check_value1(x);22 }23 static inline int check_value4(int x) {24 threshold = 50;25 return check_value2(x);26 }2728 int use_threshold(int threshold) {2930 return 10 + threshold;31 }32 int main()33 {34 int num = 30;35 int num2 = 5;36 int ans3 = use_threshold(threshold);37 int ans1 = check_value3(num);38 int ans2 = check_value4(num2);39 int ans = ans1 + ans2 +ans3;40 printf("ans is %d\n",ans);41 return 0;42 } |
查看ccp pass 中的debug的信息
39040 Visiting statement:39041 # VUSE <.MEM_2279>39042 _698 = dim.lto_priv.0D.4751;39043 which is likely CONSTANT39044 Lattice value changed to VARYING. Adding SSA edges to worklist. |
在这里进行gdb 调试,
69046 Substituting values and folding statements69048 Folding statement: dim = 3;69049 Not folded |
1761 for (k = 0; k < lpears[i] + upears[i]; k++) { 1762 1763 if (pearlist[i] == NULL) { 1764 fprintf(nabout, 1765 "NULL pair list entry in egb loop 1, taskid = %d\n", 1766 mytaskid); 1767 fflush(nabout);abort(); 1768 } 1769 j = pearlist[i][k]; 1770 |
在ifcvt pass k看if 并没有被外提,无法ifcvt
插入abort需要识别的patern
14044 <bb 148> [local count: 919275880]: 14045 _2044 = _127 + _2039; 14046 _2045 = *_2044; 14047 if (_2045 == 0B) 14048 goto <bb 149>; [17.43%] 14049 else 14050 goto <bb 150>; [82.57%] 14051 14052 <bb 149> [local count: 160229786]: 14053 _2046 = 0; 14054 _2047 = nabout; 14055 fprintf (_2047, "NULL pair list entry in egb loop 1, taskid = %d\n", _2046); 14056 _2048 = nabout; 14057 fflush (_2048); 14058 14059 <bb 150> [local count: 919275880]: 14060 _2049 = *_2044; 14061 _2051 = (long unsigned int) k_2050; 14062 _2052 = _2051 * 4; 14063 _2053 = _2049 + _2052; 14064 j_2054 = *_2053; |
Eff.c:3282
build_base_HygonGCC_Spec2017_rate_perf-test.cfg-64.0000
build_base_HygonGCC_Spec2017_rate_perf.cfg-64.0001
加上一个参数使mme34内联进mme中,但是dim = 3的常量传播无法做到。写了一个静态全局变量的例子,发现其能够传播到,怀疑是函数中的其他代码影响了对常量的分析无法传播到,通过注释原题中的代码
加上if -continue 107
不加 106
Base 99.6
相关文章:
544 eff.c:1761处loop vect 分析
2.6 带有mask的向量数学函数 gcc 支持的svml向量数学函数 32652 GCC currently emits calls to code{vmldExp2}, 32653 code{vmldLn2}, code{vmldLog102}, code{vmldPow2}, 32654 code{vmldTanh2}, code{vmldTan2}, code{vmldAtan2}, code{vmldAtanh2}, 32655 code{vmldCbrt2}…...
搜狗拼音输入法纯净优化版:去广告,更流畅输入体验15.2.0.1758
前言 搜狗输入法电脑版无疑是装机必备的神器。它打字精准,词库丰富全面,功能强大,极大地提升了输入效率。最新版的搜狗拼音输入法更是借助AI技术,让打字变得既准确又高效。而搜狗输入法的去广告精简优化版,通过移除广…...
YOLOv11改进 | YOLOv11引入MobileNetV4
前言: 主要是对该文章YOLOv11改进 | YOLOv11引入MobileNetV4进行复现,以及对一些问题进行解答 1、mobilenetv4核心代码 from typing import Optional import torch import torch.nn as nn import torch.nn.functional as F__all__ [MobileNetV4ConvLa…...
Java中的ArrayList方法
1. 创建 ArrayList 实例 你可以通过多种方式创建 ArrayList 实例: <JAVA> ArrayList<String> list new ArrayList<>(); // 创建一个空的 ArrayList ArrayList<String> list new ArrayList<>(10); // 创建容量为 10 的 ArrayList …...
wordpress 利用 All-in-One WP Migration全站转移
导出导入站点 在插件中查询 All-in-One WP Migration备份并导出全站数据 导入 注意事项: 1.导入部分限制50MB 宝塔解决方案,其他类似,修改php.ini配置文件即可 2. 全站转移需要修改域名 3. 大文件版本,大于1G的可以参考我的…...
零基础教程:Windows电脑安装Linux系统(双系统/虚拟机)全攻略
一、安装方式选择 方案对比表 特性双系统安装虚拟机安装性能原生硬件性能依赖宿主机资源分配磁盘空间需要独立分区(建议50GB)动态分配(默认20GB起)内存占用独占全部内存需手动分配(建议4GB)启动方式开机选…...
聚焦AI与大模型创新,紫光云如何引领云计算行业快速演进?
【全球云观察 | 科技热点关注】 随着近年来AI与大模型的兴起,云计算行业正在发生着一场大变局。 “在2025年春节期间,DeepSeek两周火爆全球,如何进行私域部署成了企业关心的问题。”紫光云公司总裁王燕平强调指出,AI与…...
mapreduce 过程中,maptask的partitioner是在map阶段中具体什么阶段分区的?
在MapReduce的Map阶段中,Partitioner(分区器)的作用发生在map函数输出键值对之后,但在数据被写入磁盘(spill到本地文件)之前。具体流程如下: 分区发生的具体阶段: Map函数处理完成 当…...
找到字符串中所以字母异位词 --- 滑动窗口
目录 一:题目 二:算法原理 三:代码实现 一:题目 题目链接:438. 找到字符串中所有字母异位词 - 力扣(LeetCode) 二:算法原理 三:代码实现 版本一:无co…...
密码破解工具
1. 引言 密码是信息安全的核心之一,而攻击者往往利用各种工具和技术来破解密码。密码破解工具可以分为 离线破解(Offline Cracking) 和 在线破解(Online Cracking) 两大类: 离线破解:攻击者已经获取了加密的密码哈希(hash),可以在本地进行破解,无需与目标系统交互。…...
路由策略在双点双向路由重发布的应用
一、背景叙述 路由重发布通常是解决两个不同路由协议之间的互通问题,也就是路由双向引入。有时候,单点路由重发布在大规模网络中压力较大,缺乏冗余性,于是就有了双点双向路由重发布 问题:但是双点双向路由重发布也会…...
在Python软件中集成智能体:以百度文心一言和阿里通义千问为例
摘要 本文旨在探讨如何在Python软件中集成智能体,具体以百度文心一言和阿里通义千问等大模型生成的智能体为例。文章详细介绍了集成这些智能体的方法,包括环境准备、API调用、代码实现等步骤,并提供了相关的示例代码。通过集成这些智能体&…...
day22 学习笔记
文章目录 前言一、遍历1.行遍历2.列遍历3.直接遍历 二、排序三、去重四、分组 前言 通过今天的学习,我掌握了对Pandas的数据类型进行基本操作,包括遍历,去重,排序,分组 一、遍历 1.行遍历 intertuples方法用于遍历D…...
谈Linux之磁盘管理——万字详解
—— 小 峰 编 程 目录 一、硬盘的基本知识 1.了解硬盘的接口类型 2. 硬盘命名方式 3. 磁盘设备的命名 4. HP服务器硬盘 5. 硬盘的分区方式 二、 基本分区管理 1. 磁盘划分思路 2. 分区 2.1 MBR分区 2.2GPT分区 3.格式化—命令:mkfs 4.挂载 4.1手动挂…...
做好一个测试开发工程师第二阶段:java入门:idea新建一个project后默认生成的.idea/src/out文件文件夹代表什么意思?
时间:2025.4.8 一、前言 关于Java与idea工具安装不再展开,网上很多教程,可以自己去看 二、project建立后默认各文件夹代表意思 1、首先new---->project后会得到文件如图 其中: .idea文件代表:存储这个项目的历史…...
伪代码的定义与应用场景
李升伟 整理 伪代码(Pseudocode)是一种用近似自然语言(通常是英语或开发者熟悉的语言)和简单语法描述的算法逻辑工具。它介于自然语言和编程语言之间,不依赖具体语法规则,专注于表达思路,是编程…...
/sys/fs/cgroup/memory/memory.stat 关键指标说明
目录 1. **total_rss**2. **total_inactive_file**3. **total_active_file**4. **shmem**5. **其他相关指标**总结 以下是/sys/fs/cgroup/memory/memory.stat文件中一些关键指标的详细介绍,特别是与PostgreSQL相关的指标: 1. total_rss 定义࿱…...
机器学习中的聚类分析算法:原理与应用
一、什么是聚类分析? 聚类分析(Clustering Analysis)是机器学习中一种重要的无监督学习技术,它的目标是将数据集中的样本划分为若干个组(称为"簇"),使得同一簇内的样本彼此相似,而不同簇的样本差异较大。与分类不同&am…...
VUE中的路由处理
1.引入,预处理main.ts import {} from vue-router import { createRouter, createWebHistory } from vue-router import HomePages from @/pages/HomePages.vue import AboutPage from @/pages/AboutPage.vue import NewsPage from @/pages/NewsPage.vue //1. 配置路由规…...
MATLAB学习笔记(二) 控制工程会用到的
MATLAB中 控制工程会用到的 基础传递函数表达传递函数 零极点式 状态空间表达式 相互转化画响应图线根轨迹Nyquist图和bode图现控部分求约旦判能控能观极点配置和状态观测 基础 传递函数表达 % 拉普拉斯变换 syms t s a f exp(a*t) %e的a次方 l laplace(f) …...
Python: 实现数据可视化分析系统
后端基于Python 开源的 Web 框架 Flask,前端页面采用 LayUI 框架以及 Echarts 图表,数据库为sqlite。系统的功能模块分为数据采集和存储模块、数据处理和分析模块、可视化展示模块和系统管理模块。情感分析方面使用LDA等主题建模技术,结合领域…...
VectorBT量化入门系列:第一章 VectorBT基础与环境搭建
VectorBT量化入门系列:第一章 VectorBT基础与环境搭建 本教程专为中高级开发者设计,系统讲解VectorBT技术在量化交易中的应用。通过结合Tushare数据源和TA-Lib技术指标,深度探索策略开发、回测优化与风险评估的核心方法。从数据获取到策略部署…...
典型反模式深度解析及重构方案
反模式 1:魔法数字/字符串(Magic Numbers/Strings) ▐ 问题场景 // 订单状态校验 if (order.getStatus() 3) { // 3代表已发货?sendNotification(); }// 折扣计算 double discount price * 0.15; // 0.15是什么?…...
神经探针与价值蓝海:AI重构需求挖掘的认知拓扑学
当产品经理的决策边界遭遇量子态的用户需求,传统需求分析工具已显露出经典物理般的局限性。Gartner 2024报告揭示:全球Top 500企业中有83%遭遇需求洞察的"测不准困境"——用户声称的需求与行为数据偏差率达47%,而未被表达的潜在需求…...
Tomcat 负载均衡
目录 二、Tomcat Web Server 2.1 Tomcat 部署 2.1.1 Tomcat 介绍 2.1.2 Tomcat 安装 2.2 Tomcat 服务管理 2.2.1 Tomcat 启停 2.2.2 目录说明 2.2.3编辑主页 2.3 Tomcat管理控制台 2.3.1开启远程管理 2.3.2 配置远程管理密码 三、负载均衡 3.1 重新编译Nginx 3.1.1 确…...
CSS >子元素选择器和空格
在 CSS 中,> 符号是 子元素选择器(Child Combinator),它用于选择某个元素的直接子元素(仅限第一层嵌套的子元素,不包含更深层的后代元素)。 语法 父元素 > 子元素 {样式规则; } 示例 …...
duckdb源码阅读学习路径图
🧭 DuckDB 最小内存源码阅读路径图 1️⃣ 数据流入口与批处理:DataChunk 项目内容✅ 目标理解 DuckDB 向量化执行的数据载体结构,如何影响内存📁 路径src/common/types/data_chunk.cpp/hpp🔍 入口函数DataChunk::Initialize, DataChunk::SetCardinality, Reset📌 优化…...
C#二叉树
C#二叉树 二叉树是一种常见的数据结构,它是由节点组成的一种树形结构,其中每个节点最多有两个子节点。二叉树的一个节点通常包含三部分:存储数据的变量、指向左子节点的指针和指向右子节点的指针。二叉树可以用于多种算法和操作,…...
BT-Basic函数之首字母W
BT-Basic函数之首字母W 文章目录 BT-Basic函数之首字母Wwaitwait for start wait wait函数使程序在执行下一个功能之前暂停指定的秒数。 语法 wait <数值表达式>参数 <数值表达式> 等待时长,以秒为单位。该值必须大于或等于0。小于25毫秒的正值会被…...
如何避免论文内容被误认为是 AI 生成的?
AIGC 检测的原理 AIGC 检测主要基于自然语言处理(NLP)和机器学习技术,通过深度分析文本内容来识别其中的 AI 生成痕迹。具体原理如下: 基础学习算法:利用机器学习算法对文本信息进行特征提取和表示,以便计…...
node.js之path常用方法
node.js之path常用方法 1.path.join([…paths]) 用于将多个路径片段拼接成一个路径,会自动处理路径分隔符,避免手动拼接时可能出现的问题 const joinedPath path.join(folder1, folder2, file.txt); console.log(joinedPath); // 输出: folder1/fol…...
【面试】C++与C override的报错阶段 RAII
文章目录 C 相对于 C 语言的主要区别**1. 面向对象编程(OOP)****2. 函数增强****3. 内存管理****4. 引用(Reference)****5. 标准模板库(STL)****6. 异常处理****7. 类型安全增强****8. 其他特性****9. 兼容…...
LeetCode 3396.使数组元素互不相同所需的最少操作次数:O(n)一次倒序遍历
【LetMeFly】3396.使数组元素互不相同所需的最少操作次数:O(n)一次倒序遍历 力扣题目链接:https://leetcode.cn/problems/minimum-number-of-operations-to-make-elements-in-array-distinct/ 给你一个整数数组 nums,你需要确保数组中的元素…...
机器学习课堂7用scikit-learn库训练SVM模型
1.用scikit-learn库训练SVM模型 代码 # 2-11用scikit-learn库训练SVM模型 import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn import svm # 导入sklearn# 参数设置 m_train 250 # 训练样本数量 svm_C 100 # SVM的C值 svm_kernel …...
模拟考试系统(ssm+vue+mysql5.x)
模拟考试系统(ssmvuemysql5.x) 模拟考试系统是一个为考试准备和管理提供全面支持的平台。系统提供了丰富的功能模块,包括个人中心、科目管理、复习资料管理、参考文献管理、用户管理、留言板管理、试题管理、试卷管理、系统管理和考试管理。用户可以在个人中心修改…...
【计网】作业4
一. 单选题(共22题,64分) 1. (单选题)主机甲采用停止-等待协议向主机乙发送数据,数据传输速率是4kb/s,单向传播时延为30ms,忽略确认帧的发送时延。当信道利用率等于80%时,数据帧的长度为&#…...
MYSQL数据库语法补充
一,DQL基础查询 DQL(Data Query Language)数据查询语言,可以单表查询,也可以多表查询 语法: select 查询结果 from 表名 where 条件; 特点: 查询结果可以是:表中的字段…...
Java基础编程练习第38题-除法器
题目:编写一个除法器,输入被除数和除数,并将结果输出。 这道题看似很简单,实则也不难。 就是假如用户输入的类型不同怎么办呢?用户输入int或者double类型应该怎么解决。这里我们就需要用到函数的重载。 代码如下&am…...
【基于Vue3组合式API的互斥输入模式实现与实践分享】
基于Vue3组合式API的互斥输入模式实现与实践分享 目录 背景与痛点设计思路技术实现使用场景与案例遇到的问题与解决方案最佳实践总结 1. 背景与痛点 在表单交互设计中,我们经常面临这样的场景:多种输入方式互斥。例如,在评分系统中&#…...
Linux进程概念及理解
目录 冯诺依曼体系结构 操作系统(Operator System) 概念 设计OS的目的 定位 如何理解 "管理" 总结 系统调用和库函数概念 进程 基本概念 描述进程-PCB task_struct-PCB的一种 task_ struct内容分类 组织进程 查看进程 通过系统调用获取进程标示符 通过系统调用创建进…...
苹果签名是否安全
苹果开发者与运营商都对苹果签名有一定了解,那么苹果签名安全吗?下面我来跟大家聊一聊。 苹果签名能验证应用的来源,但存在一些风险,有开发者伪造签名,让用户认为此产品是可信的,这样就安装到了恶意应用&am…...
STM32在裸机(无RTOS)环境下,需要手动实现队列机制来替代FreeRTOS的CAN发送接收函数
xQueueSendToBackFromISR(ecuCanRxQueue, hcan->pRxMsg, &xHigherPriorityTaskWoken),xQueueReceive(mscCanRxQueue,&mscRxMsg,0)和xQueueSendToBack(mscCanTxQueue, &TxMessageTemp, 0 )这3个函数,在裸机下实现: 在裸机&…...
无法看到新安装的 JDK 17
在 Linux 系统中使用 update-alternatives --config java 无法看到新安装的 JDK 17,可能是由于 JDK 未正确注册到系统备选列表中。 一、原因分析 JDK 未注册到 update-alternatives update-alternatives 工具需要手动注册 JDK 路径后才能识别新版本。如果仅安装 JDK…...
JavaEE——线程的状态
目录 前言1. NEW2. TERMINATED3. RUNNABLE4. 三种阻塞状态总结 前言 本篇文章来讲解线程的几种状态。在Java中,线程的状态是一个枚举类型,Thread.State。其中一共分为了六个状态。分别为:NEW,RUNNABLE,BLOCKED,WAITING,TIMED_WAITING, TERMI…...
数据结构与算法-数学-(同余,线性同余方程,中国剩余定理,卡特兰数,斯特林数)
同余方程: 1.1 线性同余方程 & 乘法逆元 线性同余方程是形如 ax≡b(mod m) 的方程,可转化为 axmyb 的线性不定方程,利用扩展欧几里得算法求解。当 b1 时,x 就是 a 在模 m 意义下的乘法逆元。 代码: #include &…...
RAG 系统中的偏差是什么?
检索增强生成 (RAG) 在减少模型幻觉和增强大型语言模型 (LLM)的领域特定知识库方面已获得广泛认可。通过外部数据源佐证大型语言模型生成的信息,有助于保持模型输出的新鲜度和真实性。然而,最近在 RAG系统中的发现,突显了基于 RAG 的大型语言…...
[创业之路-362]:用确定性的团队、组织、产品开发流程和方法,应对客户、市场、竞争和商业模式的不确定性。
在充满不确定性的商业环境中,通过确定性的团队、组织、产品开发流程和方法构建核心竞争力,是应对客户、市场、竞争和商业模式变化的核心策略。以下从团队韧性、组织敏捷、产品开发闭环三个维度,结合实战方法论,提供可落地的解决方…...
系统与网络安全------网络通信原理(1)
资料整理于网络资料、书本资料、AI,仅供个人学习参考。 文章目录 网络通信模型协议分层计算机网络发展计算机网络功能什么是协议为什么分层邮局实例 OSI模型OSI协议模型OSI七层模型OSI七层的功能简介 TCP/IP模型OSI模型与TCP/IP模型TCP/IP协议族的组成各层PDU设备与…...
ArkTS语言基础之函数
前言 臭宝们终于来到了ArkTS基础之函数,今天我们来学习一下ArkTS的函数的相关知识,上一节中也有一些函数的基础知识。 函数声明 函数声明引入一个函数,包含其名称、参数列表、返回类型和函数体,在下面的例子中,我们声明了一个名…...
synchronized锁升级的锁对象和Mark Word
在讨论synchronized锁升级和Mark Word时,提到的"对象"通常指的是锁对象,也就是被用作synchronized同步锁的那个Java对象。 1. 什么是锁对象? 锁对象是指被用于synchronized同步代码块或方法的对象实例。例如: // 这个…...