当前位置: 首页 > news >正文

544 eff.c:1761处loop vect 分析

2.6 带有mask的向量数学函数

gcc 支持的svml向量数学函数

32652 GCC currently emits calls to @code{vmldExp2},
32653 @code{vmldLn2}, @code{vmldLog102}, @code{vmldPow2},
32654 @code{vmldTanh2}, @code{vmldTan2}, @code{vmldAtan2}, @code{vmldAtanh2},
32655 @code{vmldCbrt2}, @code{vmldSinh2}, @code{vmldSin2}, @code{vmldAsinh2},
32656 @code{vmldAsin2}, @code{vmldCosh2}, @code{vmldCos2}, @code{vmldAcosh2},
32657 @code{vmldAcos2}, @code{vmlsExp4}, @code{vmlsLn4},
32658 @code{vmlsLog104}, @code{vmlsPow4}, @code{vmlsTanh4}, @code{vmlsTan4},
32659 @code{vmlsAtan4}, @code{vmlsAtanh4}, @code{vmlsCbrt4}, @code{vmlsSinh4},
32660 @code{vmlsSin4}, @code{vmlsAsinh4}, @code{vmlsAsin4}, @code{vmlsCosh4},
32661 @code{vmlsCos4}, @code{vmlsAcosh4} and @code{vmlsAcos4} for corresponding
32662 function type when @option{-mveclibabi=svml} is used

oneapi的IR:%3970 = call fast cc104 <4 double@__svml_log4_mask(<4 double> %3968, <4 x i64> %3969)

gcc的IR : _799 = _ZGVdN4v_logD.6143 (_800);

<__svml_log4_mask_e9>汇编代码的函数原名。

从如何调用不带mask的svml向量数学函数的流程出发,找出调用带有mask的方法。

设计方案:

vect__ifc__1252.1526_717 = VEC_COND_EXPR <mask__1460.1449_910, vect__1761.1465_870, { 0.0, 0.0 }>; 找到一个VEC_COND_EXPR,在同一个基本块中,根据第二个或者第三个参数所涉及到的运算(建立一个栈暂存每次找到的结果),顺着运算的关系一步步往上找,直到找到了需要进行mask的数学函数。如果在第二个参数中找到,VEC_COND_EXPR中的第一个参数mask就是数学函数需要进行mask的值。如果在第三个参数的关系链中找到,其所需的mask就是VEC_COND_EXPR中的mask的取反。将数学函数和mask一起生成带有mask的数学函数的IR,替换掉原来的不带mask的。(在生成cond_expr之后做还是在loop vect pass之后另外新建一个pass做。)

     #include "config.h"2  #include "system.h"3  #include "coretypes.h"4  #include "backend.h"5  #include "tree.h"6  #include "gimple.h"7  #include "predict.h"8  #include "tree-pass.h"9  #include "ssa.h"10  #include "cgraph.h"11  #include "fold-const.h"12  #include "stor-layout.h"13  #include "gimple-iterator.h"14  #include "gimple-walk.h"15  #include "tree-ssa-loop-manip.h"16  #include "tree-ssa-loop-niter.h"17  #include "tree-cfg.h"18  #include "cfgloop.h"19  #include "tree-vectorizer.h"20  #include "tree-ssa-propagate.h"21  #include "dbgcnt.h"22  #include "tree-scalar-evolution.h"23  #include "stringpool.h"24  #include "attribs.h"25  #include "gimple-pretty-print.h"26  #include "opt-problem.h"27  #include "internal-fn.h"28  #include "tree-ssa-sccvn.h"29  #include "gimple-expr.h"30  #include <cstdio>31 32  namespace33  {34  const pass_data pass_data_test = {35    GIMPLE_PASS,           /* type */36    "mask_vecmath_func",                /* name */37    OPTGROUP_NONE,         /* optinfo_flags */38    TV_TREE_VECT_MASK_VECMATH_FUNC,          /* tv_id */39    (PROP_cfg | PROP_ssa), /* properties_required */40    0,                     /* properties_provided */41    0,                     /* properties_destroyed */42    0,                     /* todo_flags_start */43    0,                     /* todo_flags_finish */44  };46  class pass_mask_vecmath_func : public gimple_opt_pass47  {48  public:49    pass_mask_vecmath_func (gcc::context *ctxt) : gimple_opt_pass (pass_data_test, ctxt) {}50    virtual bool51    gate (function *fun)52    {53     // printf ("gate function noipa.\n");54      return flag_tree_mask_vecmath_func;55    }56 57    virtual unsigned int execute (function *);58  };59 60 61 static gimple *find_relate_operand(tree operand, gimple *stmt)62 {63   if (!stmt)64         return NULL;65 66   if (TREE_CODE (operand) == SSA_NAME && is_gimple_call(stmt)) {  // operand is ssa && stmt is gimple call67      tree fndecl = gimple_call_fndecl(stmt);  // 获取函数声明68        if (fndecl && DECL_P(fndecl)) {  // 确保fndecl有效并且是一个声明69           const char *func_name = IDENTIFIER_POINTER(DECL_NAME(fndecl));  // 获取函数名称70          // if (strcmp(func_name, "vmldLn2") == 0) {71           if (strcmp(func_name, "__svml_log4_mask_e9") == 0) {72             return stmt;73           }74        }75   }76   if (TREE_CODE (operand) == SSA_NAME && is_gimple_assign(stmt)) {   // only find gimple assign77 78      for (unsigned i = 1; i < gimple_num_ops(stmt); ++i) {  // get gimple assign right hand side operand79         tree op = gimple_op(stmt, i);80         if(TREE_CODE (op) == SSA_NAME) {81 82            gimple *stmt_2 = SSA_NAME_DEF_STMT (op);83            gimple *result = find_relate_operand(op,stmt_2);84            if(result) return result;85         }86     }87   }88   return NULL;89 }90 91 static void add_mask_to_call(gimple *stmt, tree new_arg) {92     if (!is_gimple_call(stmt)) {93         // 如果不是函数调用语句,则不做任何操作94         return;95     }96 97     // 获取原始函数调用的目标和参数列表98     tree call_fn = gimple_call_fndecl(stmt);99 
100    // 获取或创建新的标识符节点来表示新的函数名称
101   // tree new_func_id = get_identifier("vmldLn2Mask");
102    tree new_func_id = get_identifier("__svml_log4_mask_e9");
103    tree fntype = TREE_TYPE(call_fn);
104 
105    tree new_fndecl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, new_func_id, fntype);
106 
107    TREE_PUBLIC (new_fndecl) = 1;
108    DECL_EXTERNAL (new_fndecl) = 1;
109    DECL_IS_NOVOPS (new_fndecl) = 1;
110    TREE_READONLY (new_fndecl) = 1;
111 
112 
113    // 将新的标识符节点分配给函数声明的汇编名
114   // DECL_ASSEMBLER_NAME(call_fn) = new_func_id;
115 
116     int num_args = gimple_call_num_args(stmt);
117     vec<tree> vargs = vNULL;
118     vargs.create (num_args+1);
119 
120     // 创建一个新的参数列表,包含原始的参数和新的参数
121     for (int i = 0; i < num_args; i++) {
122         tree arg = gimple_call_arg(stmt, i);
123         vargs.safe_push(arg);
124     }
125     vargs.safe_push(new_arg);
126 
127     tree lhs = gimple_call_lhs(stmt);
128 
129     // 创建新的函数调用语句,包含新的参数
130     gimple *new_call = gimple_build_call_vec(new_fndecl,vargs);
131     gimple_call_set_lhs (new_call, lhs);
132 
133     // 替换原始的函数调用语句
134     gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
135 
136  //   printf ("-------------finish add mask to vecmath func call------------.\n");
137 
138     gsi_replace(&gsi, new_call,true);
139     stmt = new_call;
140 
141     // 释放参数列表的内存
142     vargs.release ();
143 }
144 
145  unsigned
146  pass_mask_vecmath_func::execute (function *fun)
147  {
148    unsigned ret = 0;
149 
150  //  printf ("-----------begin mask vecmath func------------.\n");
151  //  printf ("current function name:%s\n", function_name (fun));
152    basic_block bb;
153    enum tree_code code;
154 
155     // 遍历所有基本块
156     FOR_EACH_BB_FN(bb, fun) {
157         gimple_stmt_iterator gsi;
158 
159         // 遍历基本块中的所有 GIMPLE 语句
160         for (gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) {
161             gimple *stmt = gsi_stmt(gsi);
162             if (is_gimple_assign(stmt)) {
163 
164                gassign *stmt_assign = dyn_cast <gassign *> (gsi_stmt (gsi));
165                code = gimple_assign_rhs_code (stmt_assign);
166 
167              // 检查语句是否为 VEC_COND_EXPR
168                if (code == VEC_COND_EXPR) {
169 
170                 //  printf ("-----------find out vec cond expr------------.\n");
171                   tree true_vector_operand = gimple_assign_rhs2(stmt_assign); // add wrong vec operand
172                   tree  mask_operand = gimple_assign_rhs1(stmt_assign);
173                   if(TREE_CODE (true_vector_operand) == SSA_NAME) {
174 
175                      gimple *stmt_def = SSA_NAME_DEF_STMT (true_vector_operand);
176                      gimple *stmt_vecmath = find_relate_operand(true_vector_operand,stmt_def);
177                      if(stmt_vecmath) {
178                  //  printf ("-----------find out vecmath stmt------------.\n");
179                      add_mask_to_call(stmt_vecmath,mask_operand);
180 
181                      }
182                   }
183               }
184             }
185         }
186     }
187    return ret;
188  }
189  }
190 
191  gimple_opt_pass *
192  make_pass_mask_vecmath_func (gcc::context *ctxt)
193  {
194    return new pass_mask_vecmath_func (ctxt);
195  }

生成了正确的IR之后,使用buitlin的方式调用svml中的带有mask的数学函数。

gcc调用svml函数在gimple阶段的过程:

1:examining statement:

vect_analyze_stmt函数中检查stmt, 在vectorizable_xxx函数里面判断操作数的类型。vect_is_simple_use: 计算向量化的cost, vect_model_simple_cost,先不进行transform。

调用svml需要使用target-specific built-in function,使用此函数targetm.vectorize.builtin_vectorized_function,根据优化选项(config/i386/i386-options.cc:2567)定位到(ix86_veclib_handler = &ix86_veclibabi_svml)后端ix86_veclibabi_svml函数处,返回向量svml函数fndecl。

2:vectorizing statement:

vect_transform_loop_stmt函数中,进行transform,同样也会调用vectorizable_xxx函数进行此转化。gimple_build_call_vec (fndecl, vargs):根据获取到的fndecl以及对参数的向量化,构建一个新的gimple vec call。

loop vec pass的调用栈

vect_analyze_loop_2:

Apply a set of analyses on LOOP, and create a loop_vec_info struct for it.  The different analyses will record information in the loop_vec_info struct

loop_vec_info 里面放的是对loop 分析完成后的整个loop的信息

vect_analyze_loop_operations:

Scan the loop stmts and make sure they are all vectorizable.

vect_analyze_stmt:

Make sure the statement is vectorizable.

ziyuan  2.3 和 2.4修改对于其他课题的影响 aggressive_if_conv && use_gather_2parts result.xlsx 采用HygonGCC 1.3.2编译器最新版本 和最新配置文件Hygon7490-2p-HygonGCC1.3.2.202403-hgalloc-znver1-base.cfg

跑1copy的时候整个node最好不要跑其他程序,不然性能数据会波动较大。会抢占node的内存等资源。

可能优化的方向:

  1. gcc调用svml向量数学库的接口函数只能支持128bit的输入。修改接口调用256bit的输入。
  2. -mtune-ctrl=^avx256_split_regs,^avx128_optimal,256_unaligned_store_optimal可以使程序使用256bit的ymm寄存器,提高循环向量化的vf,对性能有提高2069 4%,1761:8%。
  3. oneapi使用将条件和条件里面的计算分别放在不同的bb块中,通过控制流来选择需要执行哪些分支,可以减少冗余运算。Gcc向量化只能在同一个bb块中进行,无法控制每个分支,只支持在log函数上进行mask操作,和最终运算的结果上进行选择,其他操作- + *等只能在支持avx512的机器上。只能想办法在gcc上也进行将不同分支分为不同bb块的操作,模仿oneapi。
  4. gcc上的vf是8,使用两次log4,oneapi的vf为4,使用一次log4,通过将i32扩展为i64,使用256bit ymm,尝试将gcc变为vf4使用一次log4,使用相似的方法,未能成功。并且怀疑3才是性能的主要点,此操作应该不是性能的主要点。

       5. gcc循环向量化无法处理跨bb的问题,如果向量化后拆分成不同bb,后续的pass可能无法处理会对拆分的bb做一些未知的操作,不建议使用此方法,可以在原有的bb里面插入一些 根据mask进行选择的指令,来模拟分支选择的操作。

void calc(double *src1,double *src2,double *src3)5    {6        int i;7        for(i=0;i<100;i++)8        {9            if(src3[i] > 10.0)10            {11                src1[i] = exp(src2[i]);12            }13            else if(src3[i] > 5.0)14            {15                src1[i] = log(src2[i]);16            }17            else if(src3[i] > 2.5)18            {19                src1[i] = sin(src2[i]);20            }21        }22    }

对于有mask store的操作,会将if-conversion操作进行回退。optimize_mask_stores

1:新建一个对mask进行判断是否全为0的GIMPLE_COND。

2:新建一个then bb块,并且维护其边。

3:在mask store后分割一个新的bb,并且把stmt全部移到bb里面,新建一个边。

create_basic_block_1 (void *head, void *end, basic_block after):

int vf为4,double vf 为2.

test_mask_vecmath.c:13:18: note:   === vect_determine_vectorization_factor ===681 test_mask_vecmath.c:13:18: note:   ==> examining phi: i_114 = PHI <i_85(20), 0(35)>682 test_mask_vecmath.c:13:18: note:   ==> examining phi: sumi1_115 = PHI <_136(20), 0.0(35)>683 test_mask_vecmath.c:13:18: note:   get vectype for scalar type:  double684 test_mask_vecmath.c:13:18: note:   vectype: vector(2) double685 test_mask_vecmath.c:13:18: note:   nunits = 2686 test_mask_vecmath.c:13:18: note:   ==> examining phi: sumi2_117 = PHI <_138(20), 0.0(35)>687 test_mask_vecmath.c:13:18: note:   get vectype for scalar type:  double688 test_mask_vecmath.c:13:18: note:   vectype: vector(2) double689 test_mask_vecmath.c:13:18: note:   nunits = 2690 test_mask_vecmath.c:13:18: note:   ==> examining phi: sumi3_119 = PHI <_140(20), 0.0(35)>691 test_mask_vecmath.c:13:18: note:   get vectype for scalar type:  double692 test_mask_vecmath.c:13:18: note:   vectype: vector(2) double693 test_mask_vecmath.c:13:18: note:   nunits = 2694 test_mask_vecmath.c:13:18: note:   ==> examining phi: ivtmp_106 = PHI <ivtmp_101(20), 100(35)>695 test_mask_vecmath.c:13:18: note:   ==> examining statement: _62 = (long unsigned int) i_114;696 test_mask_vecmath.c:13:18: note:   skip.697 test_mask_vecmath.c:13:18: note:   ==> examining statement: _63 = _62 * 4;698 test_mask_vecmath.c:13:18: note:   skip.699 test_mask_vecmath.c:13:18: note:   ==> examining pattern def stmt: patt_151 = i_114 w* 4;700 test_mask_vecmath.c:13:18: note:   skip.701 test_mask_vecmath.c:13:18: note:   ==> examining pattern statement: patt_152 = (long unsigned int) patt_151;702 test_mask_vecmath.c:13:18: note:   skip.703 test_mask_vecmath.c:13:18: note:   ==> examining statement: _64 = &src3 + _63;704 test_mask_vecmath.c:13:18: note:   skip.705 test_mask_vecmath.c:13:18: note:   ==> examining statement: j_65 = *_64;706 test_mask_vecmath.c:13:18: note:   precomputed vectype: vector(4) int707 test_mask_vecmath.c:13:18: note:   nunits = 4708 test_mask_vecmath.c:13:18: note:   ==> examining statement: _66 = (long unsigned int) j_65;709 test_mask_vecmath.c:13:18: note:   skip.710 test_mask_vecmath.c:13:18: note:   ==> examining statement: _67 = _66 * 8;711 test_mask_vecmath.c:13:18: note:   skip.712 test_mask_vecmath.c:13:18: note:   ==> examining pattern def stmt: patt_153 = j_65 w* 8;713 test_mask_vecmath.c:13:18: note:   skip.714 test_mask_vecmath.c:13:18: note:   ==> examining pattern statement: patt_154 = (long unsigned int) patt_153;715 test_mask_vecmath.c:13:18: note:   skip.716 test_mask_vecmath.c:13:18: note:   ==> examining statement: _142 = _141 + _67;717 test_mask_vecmath.c:13:18: note:   skip.
test_mask_vecmath.c:13:18: note:   ==> examining statement: _68 = (double *) _142;719 test_mask_vecmath.c:13:18: note:   skip.720 test_mask_vecmath.c:13:18: note:   ==> examining statement: _143 = j_65 > 10;721 test_mask_vecmath.c:13:18: note:   vectype: vector(4) <signed-boolean:32>722 test_mask_vecmath.c:13:18: note:   nunits = 4723 test_mask_vecmath.c:13:18: note:   ==> examining statement: _69 = .MASK_LOAD (_68, 64B, _143);724 test_mask_vecmath.c:13:18: note:   skip.725 test_mask_vecmath.c:13:18: note:   ==> examining pattern def stmt: patt_155 = (<signed-boolean:64>) _143;726 test_mask_vecmath.c:13:18: note:   precomputed vectype: vector(2) <signed-boolean:64>727 test_mask_vecmath.c:13:18: note:   nunits = 2728 test_mask_vecmath.c:13:18: note:   ==> examining pattern statement: patt_156 = .MASK_LOAD (_68, 64B, patt_155);729 test_mask_vecmath.c:13:18: note:   precomputed vectype: vector(2) double730 test_mask_vecmath.c:13:18: note:   nunits = 2731 test_mask_vecmath.c:13:18: note:   ==> examining statement: _70 = log (_69);732 test_mask_vecmath.c:13:18: note:   get vectype for scalar type: double733 test_mask_vecmath.c:13:18: note:   vectype: vector(2) double734 test_mask_vecmath.c:13:18: note:   nunits = 2735 test_mask_vecmath.c:13:18: note:   ==> examining statement: _89 = (unsigned int) j_65;736 test_mask_vecmath.c:13:18: note:   get vectype for scalar type: unsigned int737 test_mask_vecmath.c:13:18: note:   vectype: vector(4) unsigned int738 test_mask_vecmath.c:13:18: note:   nunits = 4739 test_mask_vecmath.c:13:18: note:   ==> examining statement: _87 = _89 + 4294967288;740 test_mask_vecmath.c:13:18: note:   get vectype for scalar type: unsigned int741 test_mask_vecmath.c:13:18: note:   vectype: vector(4) unsigned int742 test_mask_vecmath.c:13:18: note:   nunits = 4743 test_mask_vecmath.c:13:18: note:   ==> examining statement: _73 = _62 * 8;744 test_mask_vecmath.c:13:18: note:   skip.745 test_mask_vecmath.c:13:18: note:   ==> examining pattern def stmt: patt_157 = i_114 w* 8;746 test_mask_vecmath.c:13:18: note:   skip.747 test_mask_vecmath.c:13:18: note:   ==> examining pattern statement: patt_158 = (long unsigned int) patt_157;748 test_mask_vecmath.c:13:18: note:   skip.749 test_mask_vecmath.c:13:18: note:   ==> examining statement: _145 = _73 + _141;750 test_mask_vecmath.c:13:18: note:   skip.751 test_mask_vecmath.c:13:18: note:   ==> examining statement: _74 = (double *) _145;752 test_mask_vecmath.c:13:18: note:   skip.753 test_mask_vecmath.c:13:18: note:   ==> examining statement: _146 = _87 <= 2;754 test_mask_vecmath.c:13:18: note:   vectype: vector(4) <signed-boolean:32>755 test_mask_vecmath.c:13:18: note:   nunits = 4756 test_mask_vecmath.c:13:18: note:   ==> examining statement: _75 = .MASK_LOAD (_74, 64B, _146);757 test_mask_vecmath.c:13:18: note:   skip.758 test_mask_vecmath.c:13:18: note:   ==> examining pattern def stmt: patt_159 = (<signed-boolean:64>) _146;759 test_mask_vecmath.c:13:18: note:   precomputed vectype: vector(2) <signed-boolean:64>760 test_mask_vecmath.c:13:18: note:   nunits = 2
761 test_mask_vecmath.c:13:18: note:   ==> examining pattern statement: patt_160 = .MASK_LOAD (_74, 64B, patt_159);762 test_mask_vecmath.c:13:18: note:   precomputed vectype: vector(2) double763 test_mask_vecmath.c:13:18: note:   nunits = 2764 test_mask_vecmath.c:13:18: note:   ==> examining statement: _76 = log (_75);765 test_mask_vecmath.c:13:18: note:   get vectype for scalar type: double766 test_mask_vecmath.c:13:18: note:   vectype: vector(2) double767 test_mask_vecmath.c:13:18: note:   nunits = 2768 test_mask_vecmath.c:13:18: note:   ==> examining statement: _148 = _73 + _147;769 test_mask_vecmath.c:13:18: note:   skip.770 test_mask_vecmath.c:13:18: note:   ==> examining statement: _80 = (double *) _148;771 test_mask_vecmath.c:13:18: note:   skip.772 test_mask_vecmath.c:13:18: note:   ==> examining statement: _149 = j_65 == 7;773 test_mask_vecmath.c:13:18: note:   vectype: vector(4) <signed-boolean:32>774 test_mask_vecmath.c:13:18: note:   nunits = 4775 test_mask_vecmath.c:13:18: note:   ==> examining statement: _81 = .MASK_LOAD (_80, 64B, _149);776 test_mask_vecmath.c:13:18: note:   skip.777 test_mask_vecmath.c:13:18: note:   ==> examining pattern def stmt: patt_161 = (<signed-boolean:64>) _149;778 test_mask_vecmath.c:13:18: note:   precomputed vectype: vector(2) <signed-boolean:64>779 test_mask_vecmath.c:13:18: note:   nunits = 2780 test_mask_vecmath.c:13:18: note:   ==> examining pattern statement: patt_162 = .MASK_LOAD (_80, 64B, patt_161);781 test_mask_vecmath.c:13:18: note:   precomputed vectype: vector(2) double782 test_mask_vecmath.c:13:18: note:   nunits = 2783 test_mask_vecmath.c:13:18: note:   ==> examining statement: _82 = log (_81);784 test_mask_vecmath.c:13:18: note:   get vectype for scalar type: double785 test_mask_vecmath.c:13:18: note:   vectype: vector(2) double786 test_mask_vecmath.c:13:18: note:   nunits = 2787 test_mask_vecmath.c:13:18: note:   ==> examining statement: _ifc__135 = j_65 > 10 ? _70 : 0.0;788 test_mask_vecmath.c:13:18: note:   skip.789 test_mask_vecmath.c:13:18: note:   ==> examining pattern def stmt: patt_163 = j_65 > 10;790 test_mask_vecmath.c:13:18: note:   precomputed vectype: vector(4) <signed-boolean:32>791 test_mask_vecmath.c:13:18: note:   nunits = 4792 test_mask_vecmath.c:13:18: note:   ==> examining pattern def stmt: patt_164 = (<signed-boolean:64>) patt_163;793 test_mask_vecmath.c:13:18: note:   precomputed vectype: vector(2) <signed-boolean:64>794 test_mask_vecmath.c:13:18: note:   nunits = 2795 test_mask_vecmath.c:13:18: note:   ==> examining pattern statement: patt_165 = patt_164 ? _70 : 0.0;
796 test_mask_vecmath.c:13:18: note:   precomputed vectype: vector(2) double797 test_mask_vecmath.c:13:18: note:   nunits = 2798 test_mask_vecmath.c:13:18: note:   ==> examining statement: _136 = sumi1_115 + _ifc__135;799 test_mask_vecmath.c:13:18: note:   get vectype for scalar type: double800 test_mask_vecmath.c:13:18: note:   vectype: vector(2) double801 test_mask_vecmath.c:13:18: note:   nunits = 2802 test_mask_vecmath.c:13:18: note:   ==> examining statement: _ifc__137 = _87 <= 2 ? _76 : 0.0;803 test_mask_vecmath.c:13:18: note:   skip.804 test_mask_vecmath.c:13:18: note:   ==> examining pattern def stmt: patt_166 = _87 <= 2;805 test_mask_vecmath.c:13:18: note:   precomputed vectype: vector(4) <signed-boolean:32>806 test_mask_vecmath.c:13:18: note:   nunits = 4807 test_mask_vecmath.c:13:18: note:   ==> examining pattern def stmt: patt_167 = (<signed-boolean:64>) patt_166;808 test_mask_vecmath.c:13:18: note:   precomputed vectype: vector(2) <signed-boolean:64>809 test_mask_vecmath.c:13:18: note:   nunits = 2810 test_mask_vecmath.c:13:18: note:   ==> examining pattern statement: patt_168 = patt_167 ? _76 : 0.0;811 test_mask_vecmath.c:13:18: note:   precomputed vectype: vector(2) double812 test_mask_vecmath.c:13:18: note:   nunits = 2813 test_mask_vecmath.c:13:18: note:   ==> examining statement: _138 = sumi2_117 + _ifc__137;814 test_mask_vecmath.c:13:18: note:   get vectype for scalar type: double815 test_mask_vecmath.c:13:18: note:   vectype: vector(2) double816 test_mask_vecmath.c:13:18: note:   nunits = 2817 test_mask_vecmath.c:13:18: note:   ==> examining statement: _ifc__139 = j_65 == 7 ? _82 : 0.0;818 test_mask_vecmath.c:13:18: note:   skip.819 test_mask_vecmath.c:13:18: note:   ==> examining pattern def stmt: patt_169 = j_65 == 7;820 test_mask_vecmath.c:13:18: note:   precomputed vectype: vector(4) <signed-boolean:32>821 test_mask_vecmath.c:13:18: note:   nunits = 4822 test_mask_vecmath.c:13:18: note:   ==> examining pattern def stmt: patt_170 = (<signed-boolean:64>) patt_169;823 test_mask_vecmath.c:13:18: note:   precomputed vectype: vector(2) <signed-boolean:64>824 test_mask_vecmath.c:13:18: note:   nunits = 2825 test_mask_vecmath.c:13:18: note:   ==> examining pattern statement: patt_171 = patt_170 ? _82 : 0.0;826 test_mask_vecmath.c:13:18: note:   precomputed vectype: vector(2) double827 test_mask_vecmath.c:13:18: note:   nunits = 2828 test_mask_vecmath.c:13:18: note:   ==> examining statement: _140 = sumi3_119 + _ifc__139;829 test_mask_vecmath.c:13:18: note:   get vectype for scalar type: double830 test_mask_vecmath.c:13:18: note:   vectype: vector(2) double831 test_mask_vecmath.c:13:18: note:   nunits = 2832 test_mask_vecmath.c:13:18: note:   ==> examining statement: i_85 = i_114 + 1;833 test_mask_vecmath.c:13:18: note:   skip.834 test_mask_vecmath.c:13:18: note:   ==> examining statement: ivtmp_101 = ivtmp_106 - 1;835 test_mask_vecmath.c:13:18: note:   skip.836 test_mask_vecmath.c:13:18: note:   ==> examining statement: if (ivtmp_101 != 0)837 test_mask_vecmath.c:13:18: note:   skip.838 test_mask_vecmath.c:13:18: note:   vectorization factor = 4

既有int 也有double的loop

        #include<stdio.h>2     #include<math.h>3     #include<stdlib.h>4     void calc(double *src1,double *src2,int *src3)5     {6         int i;7         int j;8         double sumi = 0;9         double sumi1 = 0;10         double sumi2 = 0;11         double sumi3 = 0;12         double sumi_temp[100];13         for(i=0;i<100;i++)14         {15             j = src3[i];16             if(src3[i] > 10)17             {18               //  src1[i] = exp(src2[j]);19                 sumi1 += log(src2[j]);20               //  sumi = exp(src3[i]);21             //  sumi += 2;22             }23             else if(src3[i] > 7)24             {25               //  src1[i] = log(src2[j]);26               //  sumi = log(src2[j]);27                sumi2 += log(src2[i]);28             //  sumi += 3;29             }30 31             else if(src3[i] > 6)32             {33               //  src1[i] = sin(src2[j]);34                 sumi3 += log(src1[i]);35              //  sumi += 2;36             }37         }38       /*  for(int i=0;i<100;i++) {39           sumi+=src1[i];40         }*/41         sumi = sumi1 + sumi2 + sumi3;42         printf("sumi is %lf\n",sumi);43 44     }46     int main()47     {48       srand(12);49       double src1[100];50       double src2[100];51     //  double src3[100];52       int src3[100];53       double rand_double_min2 = 5.0;54       double rand_double_max2 = 15.0;55 56       int rand_int_min2 = 5;57       int rand_int_max2 = 15;58 59       for(int k = 0;k<100;k++) {60         src1[k] = rand_double_min2+1.0 * rand() / RAND_MAX * ( rand_double_max2 - rand_double_min2 );61         src2[k] = rand_double_min2+1.0 * rand() / RAND_MAX * ( rand_double_max2 - rand_double_min2 );62       //  src3[k] = rand_double_min2+1.0 * rand() / RAND_MAX * ( rand_double_max2 - rand_double_min2 );63       }64       for(int k = 0;k<100;k++) {65         src3[k] = rand_int_min2+ rand() % ( rand_int_max2 - rand_int_min2 );66       }67 68       for(int k = 0;k<100;k++) {69         printf("src1 is %lf ",src1[k]);70       }71       calc(src1,src2,src3);72       double res= 0;73       for(int m = 0;m<100;m++) {74         res += src1[m];75       }76       printf("res is %lf\n",res);77       return 0;78     }

bb分块

COUNT:1604735257<bb 78>:
# # RANGE [0, 2147483647] NONZERO 2147483647
k_3019 = PHI <k_1827(216), 0(301)>
# temp0_1543 = PHI <_1251(216), 0.0(301)>
# temp1_2883 = PHI <_1249(216), 0.0(301)>
# temp2_224 = PHI <_1247(216), 0.0(301)>
# temp3_2699 = PHI <_1245(216), 0.0(301)>
# temp4_1545 = PHI <_1243(216), 0.0(301)>
# vect_temp0_1543.1410_1003 = PHI <vect__1251.1527_708(216), { 0.0, 0.0, 0.0, 0.0 }(301)>
# vect_temp1_2883.1411_1002 = PHI <vect__1249.1530_701(216), { 0.0, 0.0, 0.0, 0.0 }(301)>
# vect_temp2_224.1412_1001 = PHI <vect__1247.1533_694(216), { 0.0, 0.0, 0.0, 0.0 }(301)>
# vect_temp3_2699.1413_1000 = PHI <vect__1245.1536_687(216), { 0.0, 0.0, 0.0, 0.0 }(301)>
# vect_temp4_1545.1414_999 = PHI <vect__1243.1539_670(216), { 0.0, 0.0, 0.0, 0.0 }(301)>
# # PT = nonlocal escaped null
# ALIGN = 4, MISALIGN = 0
vectp.1415_998 = PHI <vectp.1415_997(216), _1703(301)>
# ivtmp_667 = PHI <ivtmp_666(216), 0(301)>
# DEBUG temp4D.7772 => NULL
# DEBUG temp3D.7771 => NULL
# DEBUG temp2D.7770 => NULL
# DEBUG temp1D.7769 => NULL
# DEBUG temp0D.7768 => NULL
# DEBUG kD.7615 => NULL
# DEBUG BEGIN_STMT
# DEBUG BEGIN_STMT
# RANGE [0, 2147483646] NONZERO 2147483647
_1705 = (long unsigned intD.10) k_3019;
# RANGE [0, 8589934584] NONZERO 8589934588
_1706 = _1705 * 4;
# PT = nonlocal escaped null
_1707 = _1703 + _1706;
# VUSE <.MEM_2600>
vect_j_1708.1417_996 = MEM <vector(8) intD.6> [(INT_TD.3736 *)vectp.1415_998];
# VUSE <.MEM_2600>
j_1708 = *_1707;
# DEBUG jD.7613 => NULL
# DEBUG BEGIN_STMT
vect__1709.1418_994 = vect_j_1708.1417_996 * { 3, 3, 3, 3, 3, 3, 3, 3 };
_1709 = j_1708 * 3;
# RANGE ~[2147483648, 18446744071562067967]
_1710 = (long unsigned intD.10) _1709;
# RANGE [0, 18446744073709551608] NONZERO 18446744073709551608
_1711 = _1710 * 8;
# PT = nonlocal null
_1712 = x_242(D) + _1711;
# VUSE <.MEM_2600>
# USE = anything
vect__1713.1419_991 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, x_242(D), vect__1709.1418_994, {  Nan,  Nan,  Nan,  Nan }, 8);
vect__1713.1420_990 = VEC_PERM_EXPR <vect__1709.1418_994, vect__1709.1418_994, { 4, 5, 6, 7, 4, 5, 6, 7 }>;
# VUSE <.MEM_2600>
# USE = anything
vect__1713.1419_989 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, x_242(D), vect__1713.1420_990, {  Nan,  Nan,  Nan,  Nan }, 8);
# VUSE <.MEM_2600>
_1713 = *_1712;
vect_xij_1714.1421_987 = vect_cst__988 - vect__1713.1419_991;
vect_xij_1714.1421_986 = vect_cst__988 - vect__1713.1419_989;
xij_1714 = xi_1687 - _1713;
# DEBUG xijD.7655 => NULL
# DEBUG BEGIN_STMT
# RANGE ~[2147483649, 18446744071562067968]
_1715 = _1710 + 1;
# RANGE [0, 18446744073709551608] NONZERO 18446744073709551608
_1716 = _1715 * 8;
# PT = nonlocal null
_1717 = x_242(D) + _1716;
# VUSE <.MEM_2600>
# USE = anything
vect__1718.1422_980 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, _983, vect__1709.1418_994, {  Nan,  Nan,  Nan,  Nan }, 8);
# VUSE <.MEM_2600>
# USE = anything
vect__1718.1422_977 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, _983, vect__1713.1420_990, {  Nan,  Nan,  Nan,  Nan }, 8);
# VUSE <.MEM_2600>
_1718 = *_1717;
vect_yij_1719.1424_975 = vect_cst__976 - vect__1718.1422_980;
vect_yij_1719.1424_974 = vect_cst__976 - vect__1718.1422_977;
yij_1719 = yi_1691 - _1718;
# DEBUG yijD.7656 => NULL
# DEBUG BEGIN_STMT
# RANGE ~[2147483650, 18446744071562067969]
_1720 = _1710 + 2;
# RANGE [0, 18446744073709551608] NONZERO 18446744073709551608
_1721 = _1720 * 8;
# PT = nonlocal null
_1722 = x_242(D) + _1721;
# VUSE <.MEM_2600>
# USE = anything
vect__1723.1425_967 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, _971, vect__1709.1418_994, {  Nan,  Nan,  Nan,  Nan }, 8);
# VUSE <.MEM_2600>
# USE = anything
vect__1723.1425_965 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, _971, vect__1713.1420_990, {  Nan,  Nan,  Nan,  Nan }, 8);
# VUSE <.MEM_2600>
_1723 = *_1722;
vect_zij_1724.1427_963 = vect_cst__964 - vect__1723.1425_967;
vect_zij_1724.1427_962 = vect_cst__964 - vect__1723.1425_965;
zij_1724 = zi_1695 - _1723;
# DEBUG zijD.7657 => NULL
# DEBUG BEGIN_STMT
vect_powmult_2740.1428_961 = vect_xij_1714.1421_987 * vect_xij_1714.1421_987;
vect_powmult_2740.1428_960 = vect_xij_1714.1421_986 * vect_xij_1714.1421_986;
powmult_2740 = xij_1714 * xij_1714;
vect_powmult_2713.1429_959 = vect_yij_1719.1424_975 * vect_yij_1719.1424_975;
vect_powmult_2713.1429_958 = vect_yij_1719.1424_974 * vect_yij_1719.1424_974;
powmult_2713 = yij_1719 * yij_1719;
vect_powmult_1661.1430_957 = vect_zij_1724.1427_963 * vect_zij_1724.1427_963;
vect_powmult_1661.1430_956 = vect_zij_1724.1427_962 * vect_zij_1724.1427_962;
powmult_1661 = zij_1724 * zij_1724;
vect__1971.1431_955 = vect_powmult_1661.1430_957 + vect_powmult_2713.1429_959;
vect__1971.1431_954 = vect_powmult_1661.1430_956 + vect_powmult_2713.1429_958;
_1971 = powmult_1661 + powmult_2713;
vect_r2_1729.1432_953 = vect__1971.1431_955 + vect_powmult_2740.1428_961;
vect_r2_1729.1432_952 = vect__1971.1431_954 + vect_powmult_2740.1428_960;           //  compute r2
r2_1729 = _1971 + powmult_2740;
# DEBUG r2D.7683 => NULL
# DEBUG BEGIN_STMT
# DEBUG r2D.7683 => NULL
# DEBUG BEGIN_STMT
# DEBUG BEGIN_STMT
vect__1730.1433_950 = .SQRT (vect_r2_1729.1432_953);        // after if (r2 > rgbmaxpsmax2) compute 
vect__1730.1433_949 = .SQRT (vect_r2_1729.1432_952);
vect_dij1i_1731.1434_947 = { 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 } / vect__1730.1433_950;
vect_dij1i_1731.1434_946 = { 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 } / vect__1730.1433_949;
# DEBUG dij1iD.7664 => NULL
# DEBUG BEGIN_STMT
vect_dij_1732.1435_945 = vect_r2_1729.1432_953 * vect_dij1i_1731.1434_947;
vect_dij_1732.1435_944 = vect_r2_1729.1432_952 * vect_dij1i_1731.1434_946;
dij_1732 = r2_1729 *  Inf;
# DEBUG dijD.7673 => NULL
# DEBUG BEGIN_STMT
_1733 = (long unsigned intD.10) j_1708;
_1734 = _1733 * 8;
_1241 = _1242 + _1734;
# PT = nonlocal escaped null
_1735 = (doubleD.32 *) _1241;
mask__1239.1436_942 = vect_r2_1729.1432_953 <= vect_cst__943;    //  if (r2 > rgbmaxpsmax2)
mask__1239.1436_941 = vect_r2_1729.1432_952 <= vect_cst__943;
_1239 = r2_1729 <= powmult_2494;
stmp_938 = VIEW_CONVERT_EXPR<vector(4) doubleD.32>(mask__1239.1436_942);
# VUSE <.MEM_2600>
# USE = anything
vect__1736.1437_937 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, _939, vect_j_1708.1417_996, stmp_938, 8);  // after if (r2 > rgbmaxpsmax2) compute 
vect__1736.1438_936 = VEC_PERM_EXPR <vect_j_1708.1417_996, vect_j_1708.1417_996, { 4, 5, 6, 7, 4, 5, 6, 7 }>;
stmp_935 = VIEW_CONVERT_EXPR<vector(4) doubleD.32>(mask__1239.1436_941);
# VUSE <.MEM_2600>
# USE = anything
vect__1736.1437_934 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, _939, vect__1736.1438_936, stmp_935, 8);
_1237 = _1238 + _1734;
# PT = nonlocal escaped null
_1737 = (doubleD.32 *) _1237;
# VUSE <.MEM_2600>
# USE = anything
vect__1738.1439_931 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, _933, vect_j_1708.1417_996, stmp_938, 8);
# VUSE <.MEM_2600>
# USE = anything
vect__1738.1439_924 = __builtin_ia32_gatheraltsiv4df D.2164 ({ 0.0, 0.0, 0.0, 0.0 }, _933, vect__1736.1438_936, stmp_935, 8);
vect__1739.1441_922 = vect__1738.1439_931 + { -8.99999999999999966693309261245303787291049957275390625e-2, -8.99999999999999966693309261245303787291049957275390625e-2, -8.99999999999999966693309261245303787291049957275390625e-2, -8.99999999999999966693309261245303787291049957275390625e-2 };
vect__1739.1441_921 = vect__1738.1439_924 + { -8.99999999999999966693309261245303787291049957275390625e-2, -8.99999999999999966693309261245303787291049957275390625e-2, -8.99999999999999966693309261245303787291049957275390625e-2, -8.99999999999999966693309261245303787291049957275390625e-2 };
vect_sj_1740.1442_920 = vect__1736.1437_937 * vect__1739.1441_922;
vect_sj_1740.1442_919 = vect__1736.1437_934 * vect__1739.1441_921;
# DEBUG sjD.7686 => NULL
# DEBUG BEGIN_STMT
# DEBUG sj2D.7687 => NULL
# DEBUG BEGIN_STMT
vect__1743.1443_917 = vect_sj_1740.1442_920 + { 2.0e+1, 2.0e+1, 2.0e+1, 2.0e+1 };
vect__1743.1443_916 = vect_sj_1740.1442_919 + { 2.0e+1, 2.0e+1, 2.0e+1, 2.0e+1 };
mask__1463.1444_915 = vect_dij_1732.1435_945 <= vect__1743.1443_917;
mask__1463.1444_914 = vect_dij_1732.1435_944 <= vect__1743.1443_916;
_1463 = dij_1732 <= 2.0e+1;
mask__1462.1445_913 = mask__1239.1436_942 & mask__1463.1444_915;    //  if (dij > rgbmax + sj)
mask__1462.1445_912 = mask__1239.1436_941 & mask__1463.1444_914;
_1462 = _1239 & _1463;
vect_powmult_1725.1446_911 = vect_sj_1740.1442_920 * vect_sj_1740.1442_920;
vect_powmult_1725.1446_910 = vect_sj_1740.1442_919 * vect_sj_1740.1442_919;
# DEBUG BEGIN_STMT
vect__1744.1447_908 = { 2.0e+1, 2.0e+1, 2.0e+1, 2.0e+1 } - vect_sj_1740.1442_920;   // begin if ((dij > rgbmax - sj))
vect__1744.1447_907 = { 2.0e+1, 2.0e+1, 2.0e+1, 2.0e+1 } - vect_sj_1740.1442_919;
mask__1461.1448_906 = vect_dij_1732.1435_945 > vect__1744.1447_908;
mask__1461.1448_905 = vect_dij_1732.1435_944 > vect__1744.1447_907;
_1461 = dij_1732 > 2.0e+1;
mask__1460.1449_904 = mask__1461.1448_906 & mask__1462.1445_913;      //  if ((dij > rgbmax - sj))  enter if-else chain
mask__1460.1449_903 = mask__1461.1448_905 & mask__1462.1445_912;
_1460 = _1461 & _1462;                    else add 
# DEBUG BEGIN_STMT
vect__1745.1450_902 = vect_dij_1732.1435_945 - vect_sj_1740.1442_920;
vect__1745.1450_901 = vect_dij_1732.1435_944 - vect_sj_1740.1442_919;
vect_uij_1746.1451_899 = { 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 } / vect__1745.1450_902;
vect_uij_1746.1451_898 = { 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 } / vect__1745.1450_901;
uij_1746 = 0.0 / r2_1729;
# DEBUG uijD.7689 => NULL
# DEBUG BEGIN_STMT
vect__1748.1452_896 = vect_dij_1732.1435_945 * { 8.0e+1, 8.0e+1, 8.0e+1, 8.0e+1 };
vect__1748.1452_895 = vect_dij_1732.1435_944 * { 8.0e+1, 8.0e+1, 8.0e+1, 8.0e+1 };
_1748 = dij_1732 * 8.0e+1;
vect__2057.1453_894 = vect_powmult_1725.1446_911 - vect_r2_1729.1432_953;
vect__2057.1453_893 = vect_powmult_1725.1446_910 - vect_r2_1729.1432_952;
_2057 = -r2_1729;
vect__1750.1454_892 = vect__1748.1452_896 + vect__2057.1453_894;
vect__1750.1454_891 = vect__1748.1452_895 + vect__2057.1453_893;
_1750 = _1748 + _2057;
vect__1751.1455_889 = vect__1750.1454_892 * { 2.50000000000000048572257327350598643533885478973388671875e-3, 2.50000000000000048572257327350598643533885478973388671875e-3, 2.50000000000000048572257327350598643533885478973388671875e-3, 2.50000000000000048572257327350598643533885478973388671875e-3 };
vect__1751.1455_888 = vect__1750.1454_891 * { 2.50000000000000048572257327350598643533885478973388671875e-3, 2.50000000000000048572257327350598643533885478973388671875e-3, 2.50000000000000048572257327350598643533885478973388671875e-3, 2.50000000000000048572257327350598643533885478973388671875e-3 };
_1751 = _1750 * 2.50000000000000048572257327350598643533885478973388671875e-3;
vect__2086.1456_886 = vect_dij_1732.1435_945 * { 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0 };
vect__2086.1456_885 = vect_dij_1732.1435_944 * { 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0 };
_2086 = dij_1732 * 2.0e+0;
vect__1753.1457_884 = vect_uij_1746.1451_899 * vect__2086.1456_886;
vect__1753.1457_883 = vect_uij_1746.1451_898 * vect__2086.1456_885;
_1753 = uij_1746 * _2086;
vect__1754.1458_882 = vect__1751.1455_889 - vect__1753.1457_884;
vect__1754.1458_881 = vect__1751.1455_888 - vect__1753.1457_883;
_1754 = _1751 - _1753;
vect__1755.1459_879 = vect__1745.1450_902 * { 5.000000000000000277555756156289135105907917022705078125e-2, 5.000000000000000277555756156289135105907917022705078125e-2, 5.000000000000000277555756156289135105907917022705078125e-2, 5.000000000000000277555756156289135105907917022705078125e-2 };
vect__1755.1459_878 = vect__1745.1450_901 * { 5.000000000000000277555756156289135105907917022705078125e-2, 5.000000000000000277555756156289135105907917022705078125e-2, 5.000000000000000277555756156289135105907917022705078125e-2, 5.000000000000000277555756156289135105907917022705078125e-2 };
_1755 = dij_1732 * 5.000000000000000277555756156289135105907917022705078125e-2;
vect__1756.1460_877 = __svml_log4_mask_e9D.7954 (vect__1755.1459_879);
vect__1756.1460_876 = __svml_log4_mask_e9D.7954 (vect__1755.1459_878);
vect__1757.1461_874 = vect__1756.1460_877 * { 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0 };
vect__1757.1461_873 = vect__1756.1460_876 * { 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0 };
vect__2097.1462_871 = vect__1754.1458_882 + { -1.0e+0, -1.0e+0, -1.0e+0, -1.0e+0 };
vect__2097.1462_870 = vect__1754.1458_881 + { -1.0e+0, -1.0e+0, -1.0e+0, -1.0e+0 };
_2097 = _1754 - 1.0e+0;
vect__1759.1463_869 = vect__2097.1462_871 - vect__1757.1461_874;
vect__1759.1463_868 = vect__2097.1462_870 - vect__1757.1461_873;
vect__2099.1464_866 = vect_dij1i_1731.1434_947 * { 1.25e-1, 1.25e-1, 1.25e-1, 1.25e-1 };
vect__2099.1464_865 = vect_dij1i_1731.1434_946 * { 1.25e-1, 1.25e-1, 1.25e-1, 1.25e-1 };
vect__1761.1465_864 = vect__1759.1463_869 * vect__2099.1464_866;
vect__1761.1465_863 = vect__1759.1463_868 * vect__2099.1464_865;
_1761 = _2097 *  Inf;               ///   else add
# DEBUG temp0D.7768 => NULL
mask__1458.1466_862 = vect_dij_1732.1435_945 <= vect__1744.1447_908;           //  begin else if (dij > 4.0 * sj)
mask__1458.1466_861 = vect_dij_1732.1435_944 <= vect__1744.1447_907;
mask__1457.1467_860 = mask__1458.1466_862 & mask__1462.1445_913;
mask__1457.1467_859 = mask__1458.1466_861 & mask__1462.1445_912;
# DEBUG BEGIN_STMT
vect__1764.1468_857 = vect_sj_1740.1442_920 * { 4.0e+0, 4.0e+0, 4.0e+0, 4.0e+0 };
vect__1764.1468_856 = vect_sj_1740.1442_919 * { 4.0e+0, 4.0e+0, 4.0e+0, 4.0e+0 };
mask__1456.1469_855 = vect_dij_1732.1435_945 > vect__1764.1468_857;
mask__1456.1469_854 = vect_dij_1732.1435_944 > vect__1764.1468_856;
_1456 = dij_1732 > 0.0;
mask__1455.1470_853 = mask__1456.1469_855 & mask__1457.1467_860;    //  else if (dij > 4.0 * sj)
mask__1455.1470_852 = mask__1456.1469_854 & mask__1457.1467_859;
_1455 = _1456 & _1462;             ///  else add
# DEBUG BEGIN_STMT
vect_powmult_1726.1471_851 = vect_dij1i_1731.1434_947 * vect_dij1i_1731.1434_947;
vect_powmult_1726.1471_846 = vect_dij1i_1731.1434_946 * vect_dij1i_1731.1434_946;
# DEBUG dij2iD.7672 => NULL
# DEBUG BEGIN_STMT
vect_tmpsd_1766.1472_845 = vect_powmult_1725.1446_911 * vect_powmult_1726.1471_851;
vect_tmpsd_1766.1472_844 = vect_powmult_1725.1446_910 * vect_powmult_1726.1471_846;
# DEBUG tmpsdD.7695 => NULL
# DEBUG BEGIN_STMT
vect__1767.1473_842 = vect_tmpsd_1766.1472_845 * { 4.54545454545454530315140573293319903314113616943359375e-1, 4.54545454545454530315140573293319903314113616943359375e-1, 4.54545454545454530315140573293319903314113616943359375e-1, 4.54545454545454530315140573293319903314113616943359375e-1 };
vect__1767.1473_841 = vect_tmpsd_1766.1472_844 * { 4.54545454545454530315140573293319903314113616943359375e-1, 4.54545454545454530315140573293319903314113616943359375e-1, 4.54545454545454530315140573293319903314113616943359375e-1, 4.54545454545454530315140573293319903314113616943359375e-1 };
vect__1768.1474_839 = vect__1767.1473_842 + { 4.444444444444444197728216749965213239192962646484375e-1, 4.444444444444444197728216749965213239192962646484375e-1, 4.444444444444444197728216749965213239192962646484375e-1, 4.444444444444444197728216749965213239192962646484375e-1 };
vect__1768.1474_838 = vect__1767.1473_841 + { 4.444444444444444197728216749965213239192962646484375e-1, 4.444444444444444197728216749965213239192962646484375e-1, 4.444444444444444197728216749965213239192962646484375e-1, 4.444444444444444197728216749965213239192962646484375e-1 };
vect__1769.1475_837 = vect_tmpsd_1766.1472_845 * vect__1768.1474_839;
vect__1769.1475_836 = vect_tmpsd_1766.1472_844 * vect__1768.1474_838;
vect__1770.1476_834 = vect__1769.1475_837 + { 4.28571428571428547638078043746645562350749969482421875e-1, 4.28571428571428547638078043746645562350749969482421875e-1, 4.28571428571428547638078043746645562350749969482421875e-1, 4.28571428571428547638078043746645562350749969482421875e-1 };
vect__1770.1476_832 = vect__1769.1475_836 + { 4.28571428571428547638078043746645562350749969482421875e-1, 4.28571428571428547638078043746645562350749969482421875e-1, 4.28571428571428547638078043746645562350749969482421875e-1, 4.28571428571428547638078043746645562350749969482421875e-1 };
vect__1771.1477_831 = vect_tmpsd_1766.1472_845 * vect__1770.1476_834;
vect__1771.1477_830 = vect_tmpsd_1766.1472_844 * vect__1770.1476_832;
vect__1772.1478_824 = vect__1771.1477_831 + { 4.0000000000000002220446049250313080847263336181640625e-1, 4.0000000000000002220446049250313080847263336181640625e-1, 4.0000000000000002220446049250313080847263336181640625e-1, 4.0000000000000002220446049250313080847263336181640625e-1 };
vect__1772.1478_823 = vect__1771.1477_830 + { 4.0000000000000002220446049250313080847263336181640625e-1, 4.0000000000000002220446049250313080847263336181640625e-1, 4.0000000000000002220446049250313080847263336181640625e-1, 4.0000000000000002220446049250313080847263336181640625e-1 };
vect__1773.1479_822 = vect_tmpsd_1766.1472_845 * vect__1772.1478_824;
vect__1773.1479_821 = vect_tmpsd_1766.1472_844 * vect__1772.1478_823;
vect_dumbo_1774.1480_819 = vect__1773.1479_822 + { 3.33333333333333314829616256247390992939472198486328125e-1, 3.33333333333333314829616256247390992939472198486328125e-1, 3.33333333333333314829616256247390992939472198486328125e-1, 3.33333333333333314829616256247390992939472198486328125e-1 };
vect_dumbo_1774.1480_818 = vect__1773.1479_821 + { 3.33333333333333314829616256247390992939472198486328125e-1, 3.33333333333333314829616256247390992939472198486328125e-1, 3.33333333333333314829616256247390992939472198486328125e-1, 3.33333333333333314829616256247390992939472198486328125e-1 };
# DEBUG dumboD.7694 => NULL
# DEBUG BEGIN_STMT
vect__2892.1481_817 = vect_powmult_1726.1471_851 * vect_sj_1740.1442_920;
vect__2892.1481_816 = vect_powmult_1726.1471_846 * vect_sj_1740.1442_919;
vect__1776.1482_815 = vect_tmpsd_1766.1472_845 * vect__2892.1481_817;
vect__1776.1482_814 = vect_tmpsd_1766.1472_844 * vect__2892.1481_816;
vect__1777.1483_813 = vect_dumbo_1774.1480_819 * vect__1776.1482_815;
vect__1777.1483_812 = vect_dumbo_1774.1480_818 * vect__1776.1482_814;
# DEBUG temp1D.7769 => NULL
mask__1453.1484_811 = vect_dij_1732.1435_945 <= vect__1764.1468_857;   // begin else if (dij > ri + sj)
mask__1453.1484_810 = vect_dij_1732.1435_944 <= vect__1764.1468_856;
_1453 = dij_1732 <= 0.0;
mask__1452.1485_809 = mask__1453.1484_811 & mask__1457.1467_860;
mask__1452.1485_808 = mask__1453.1484_810 & mask__1457.1467_859;
_1452 = _1453 & _1462;      // esle add
# DEBUG BEGIN_STMT
vect__1780.1486_806 = vect_cst__807 + vect_sj_1740.1442_920;
vect__1780.1486_805 = vect_cst__807 + vect_sj_1740.1442_919;
_1780 = ri_1700;
mask__1451.1487_804 = vect_dij_1732.1435_945 > vect__1780.1486_806;
mask__1451.1487_803 = vect_dij_1732.1435_944 > vect__1780.1486_805;
_1451 = dij_1732 > _1780;
mask__1450.1488_802 = mask__1451.1487_804 & mask__1452.1485_809;
mask__1450.1488_801 = mask__1451.1487_803 & mask__1452.1485_808;    //  else if (dij > ri + sj)
_1450 = _1451 & _1452;
# DEBUG BEGIN_STMT
vect__1782.1489_800 = vect_sj_1740.1442_920 / vect__2057.1453_894;
vect__1782.1489_799 = vect_sj_1740.1442_919 / vect__2057.1453_893;
_1782 = 0.0 / r2_1729;
vect__1784.1490_797 = vect_dij_1732.1435_945 + vect_sj_1740.1442_920;
vect__1784.1490_796 = vect_dij_1732.1435_944 + vect_sj_1740.1442_919;
vect__1785.1491_795 = vect__1745.1450_902 / vect__1784.1490_797;
vect__1785.1491_794 = vect__1745.1450_901 / vect__1784.1490_796;
vect__1786.1492_793 = __svml_log4_mask_e9D.7987 (vect__1785.1491_795);
vect__1786.1492_792 = __svml_log4_mask_e9D.7987 (vect__1785.1491_794);
vect__1894.1493_790 = vect_dij1i_1731.1434_947 * { 5.0e-1, 5.0e-1, 5.0e-1, 5.0e-1 };
vect__1894.1493_789 = vect_dij1i_1731.1434_946 * { 5.0e-1, 5.0e-1, 5.0e-1, 5.0e-1 };
vect__1788.1494_788 = vect__1786.1492_793 * vect__1894.1493_790;
vect__1788.1494_787 = vect__1786.1492_792 * vect__1894.1493_789;
vect__1789.1495_786 = vect__1782.1489_800 - vect__1788.1494_788;
vect__1789.1495_785 = vect__1782.1489_799 - vect__1788.1494_787;
_1789 = _1782 -  Nan;
vect__1790.1496_783 = vect__1789.1495_786 * { 5.0e-1, 5.0e-1, 5.0e-1, 5.0e-1 };
vect__1790.1496_782 = vect__1789.1495_785 * { 5.0e-1, 5.0e-1, 5.0e-1, 5.0e-1 };
_1790 = _1789 * 5.0e-1;
# DEBUG temp2D.7770 => NULL
mask__1448.1497_781 = vect_dij_1732.1435_945 <= vect__1780.1486_806;    // begin  else if (dij > fabs(ri - sj))
mask__1448.1497_780 = vect_dij_1732.1435_944 <= vect__1780.1486_805;
_1448 = dij_1732 <= _1780;
mask__1447.1498_779 = mask__1448.1497_781 & mask__1452.1485_809;
mask__1447.1498_778 = mask__1448.1497_780 & mask__1452.1485_808;
_1447 = _1448 & _1452;
# DEBUG BEGIN_STMT
vect__1793.1499_776 = vect_cst__807 - vect_sj_1740.1442_920;
vect__1793.1499_775 = vect_cst__807 - vect_sj_1740.1442_919;
vect__1794.1500_774 = ABS_EXPR <vect__1793.1499_776>;
vect__1794.1500_773 = ABS_EXPR <vect__1793.1499_775>;
_1794 = ABS_EXPR <_1780>;
mask__1446.1501_772 = vect_dij_1732.1435_945 > vect__1794.1500_774;
mask__1446.1501_771 = vect_dij_1732.1435_944 > vect__1794.1500_773;
_1446 = dij_1732 > _1794;
mask__1445.1502_770 = mask__1446.1501_772 & mask__1447.1498_779;
mask__1445.1502_769 = mask__1446.1501_771 & mask__1447.1498_778;    // else if (dij > fabs(ri - sj))
_1445 = _1446 & _1447;
# DEBUG BEGIN_STMT
vect__2372.1503_767 = vect_cst__768 - vect_powmult_1725.1446_911;
vect__2372.1503_766 = vect_cst__768 - vect_powmult_1725.1446_910;
_2372 = powmult_1728;
vect__1798.1504_765 = vect_r2_1729.1432_953 + vect__2372.1503_767;
vect__1798.1504_764 = vect_r2_1729.1432_952 + vect__2372.1503_766;
_1798 = r2_1729 + _2372;
vect__2373.1505_762 = vect__1798.1504_765 * vect_cst__763;
vect__2373.1505_761 = vect__1798.1504_764 * vect_cst__763;
_2373 = _1798 * _2894;
vect_theta_1800.1506_760 = vect_dij1i_1731.1434_947 * vect__2373.1505_762;
vect_theta_1800.1506_759 = vect_dij1i_1731.1434_946 * vect__2373.1505_761;
theta_1800 = _2373 *  Inf;
# DEBUG thetaD.7670 => NULL
# DEBUG BEGIN_STMT
vect_uij_1802.1507_757 = { 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 } / vect__1784.1490_797;
vect_uij_1802.1507_756 = { 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 } / vect__1784.1490_796;
# DEBUG uijD.7689 => NULL
# DEBUG BEGIN_STMT
vect__1803.1508_754 = vect_theta_1800.1506_760 + { -2.0e+0, -2.0e+0, -2.0e+0, -2.0e+0 };
vect__1803.1508_753 = vect_theta_1800.1506_759 + { -2.0e+0, -2.0e+0, -2.0e+0, -2.0e+0 };
_1803 = theta_1800 - 2.0e+0;
vect__1804.1509_751 = vect_cst__752 * vect__1803.1508_754;
vect__1804.1509_750 = vect_cst__752 * vect__1803.1508_753;
_1804 = ri1i_1701 * _1803;
vect__1805.1510_749 = vect_uij_1802.1507_757 + vect__1804.1509_751;
vect__1805.1510_748 = vect_uij_1802.1507_756 + vect__1804.1509_750;
_1805 = uij_1746 + _1804;
vect__1806.1511_746 = vect_uij_1802.1507_757 * vect_cst__807;
vect__1806.1511_745 = vect_uij_1802.1507_756 * vect_cst__807;
_1806 = ri_1700 * uij_1746;
vect__1807.1512_744 = __svml_log4_mask_e9D.8008 (vect__1806.1511_746);
vect__1807.1512_743 = __svml_log4_mask_e9D.8008 (vect__1806.1511_745);
vect__1808.1513_742 = vect_dij1i_1731.1434_947 * vect__1807.1512_744;
vect__1808.1513_741 = vect_dij1i_1731.1434_946 * vect__1807.1512_743;
vect__1809.1514_740 = vect__1805.1510_749 - vect__1808.1513_742;
vect__1809.1514_739 = vect__1805.1510_748 - vect__1808.1513_741;
_1809 = _1805 -  Nan;
vect__1810.1515_737 = vect__1809.1514_740 * { 2.5e-1, 2.5e-1, 2.5e-1, 2.5e-1 };
vect__1810.1515_736 = vect__1809.1514_739 * { 2.5e-1, 2.5e-1, 2.5e-1, 2.5e-1 };
_1810 = _1809 * 2.5e-1;
# DEBUG temp3D.7771 => NULL
mask__1443.1516_735 = vect_dij_1732.1435_945 <= vect__1794.1500_774;   // begin  else if (ri < sj)
mask__1443.1516_734 = vect_dij_1732.1435_944 <= vect__1794.1500_773;
_1443 = dij_1732 <= _1794;
mask__1442.1517_733 = mask__1443.1516_735 & mask__1447.1498_779;
mask__1442.1517_732 = mask__1443.1516_734 & mask__1447.1498_778;
_1442 = _1443 & _1447;
# DEBUG BEGIN_STMT
mask__1441.1518_730 = vect_cst__807 < vect_sj_1740.1442_920;
mask__1441.1518_729 = vect_cst__807 < vect_sj_1740.1442_919;
_1441 = _1699 < 8.99999999999999966693309261245303787291049957275390625e-2;
mask__1406.1519_728 = mask__1441.1518_730 & mask__1442.1517_733;
mask__1406.1519_727 = mask__1441.1518_729 & mask__1442.1517_732;   //  else if (ri < sj)
_1406 = _1441 & _1442;
# DEBUG BEGIN_STMT
vect__1816.1520_725 = vect__1782.1489_800 - vect_cst__726;
vect__1816.1520_724 = vect__1782.1489_799 - vect_cst__726;
_1816 = _1782 - _1815;
vect__1235.1521_723 = -vect__1785.1491_795;
vect__1235.1521_722 = -vect__1785.1491_794;
vect__1820.1522_721 = __svml_log4_mask_e9D.8019 (vect__1235.1521_723);
vect__1820.1522_720 = __svml_log4_mask_e9D.8019 (vect__1235.1521_722);
vect__1822.1523_719 = vect__1820.1522_721 * vect__1894.1493_790;
vect__1822.1523_718 = vect__1820.1522_720 * vect__1894.1493_789;
vect__1823.1524_717 = vect__1816.1520_725 - vect__1822.1523_719;
vect__1823.1524_716 = vect__1816.1520_724 - vect__1822.1523_718;
_1823 = _1816 -  Nan;
vect__1824.1525_714 = vect__1823.1524_717 * { 5.0e-1, 5.0e-1, 5.0e-1, 5.0e-1 };
vect__1824.1525_713 = vect__1823.1524_716 * { 5.0e-1, 5.0e-1, 5.0e-1, 5.0e-1 }; //  end  if-else
_1824 = _1823 * 5.0e-1;
# DEBUG temp4D.7772 => NULL
vect__ifc__1252.1526_711 = VEC_COND_EXPR <mask__1460.1449_904, vect__1761.1465_864, { 0.0, 0.0, 0.0, 0.0 }>;
vect__ifc__1252.1526_710 = VEC_COND_EXPR <mask__1460.1449_903, vect__1761.1465_863, { 0.0, 0.0, 0.0, 0.0 }>;
_ifc__1252 = _1460 ? _1761 : 0.0;
vect__1251.1527_709 = vect_temp0_1543.1410_1003 + vect__ifc__1252.1526_711;
vect__1251.1527_708 = vect__1251.1527_709 + vect__ifc__1252.1526_710;
_1251 = temp0_1543 + _ifc__1252;
vect__ifc__1250.1529_704 = VEC_COND_EXPR <mask__1455.1470_853, vect__1777.1483_813, { 0.0, 0.0, 0.0, 0.0 }>;
vect__ifc__1250.1529_703 = VEC_COND_EXPR <mask__1455.1470_852, vect__1777.1483_812, { 0.0, 0.0, 0.0, 0.0 }>;
_ifc__1250 = _1455 ?  Nan : 0.0;
vect__1249.1530_702 = vect_temp1_2883.1411_1002 - vect__ifc__1250.1529_704;
vect__1249.1530_701 = vect__1249.1530_702 - vect__ifc__1250.1529_703;
_1249 = temp1_2883 - _ifc__1250;
vect__ifc__1248.1532_697 = VEC_COND_EXPR <mask__1450.1488_802, vect__1790.1496_783, { 0.0, 0.0, 0.0, 0.0 }>;
vect__ifc__1248.1532_696 = VEC_COND_EXPR <mask__1450.1488_801, vect__1790.1496_782, { 0.0, 0.0, 0.0, 0.0 }>;
_ifc__1248 = _1450 ? _1790 : 0.0;
vect__1247.1533_695 = vect_temp2_224.1412_1001 + vect__ifc__1248.1532_697;
vect__1247.1533_694 = vect__1247.1533_695 + vect__ifc__1248.1532_696;
_1247 = temp2_224 + _ifc__1248;
vect__ifc__1246.1535_690 = VEC_COND_EXPR <mask__1445.1502_770, vect__1810.1515_737, { 0.0, 0.0, 0.0, 0.0 }>;
vect__ifc__1246.1535_689 = VEC_COND_EXPR <mask__1445.1502_769, vect__1810.1515_736, { 0.0, 0.0, 0.0, 0.0 }>;
_ifc__1246 = _1445 ? _1810 : 0.0;
vect__1245.1536_688 = vect_temp3_2699.1413_1000 + vect__ifc__1246.1535_690;
vect__1245.1536_687 = vect__1245.1536_688 + vect__ifc__1246.1535_689;
_1245 = temp3_2699 + _ifc__1246;
vect__ifc__1244.1538_673 = VEC_COND_EXPR <mask__1406.1519_728, vect__1824.1525_714, { 0.0, 0.0, 0.0, 0.0 }>;
vect__ifc__1244.1538_672 = VEC_COND_EXPR <mask__1406.1519_727, vect__1824.1525_713, { 0.0, 0.0, 0.0, 0.0 }>;
_ifc__1244 = _1406 ? _1824 : 0.0;
vect__1243.1539_671 = vect_temp4_1545.1414_999 + vect__ifc__1244.1538_673;
vect__1243.1539_670 = vect__1243.1539_671 + vect__ifc__1244.1538_672;
_1243 = temp4_1545 + _ifc__1244;
# DEBUG temp4D.7772 => _1243
# DEBUG temp3D.7771 => _1245
# DEBUG temp2D.7770 => _1247
# DEBUG temp1D.7769 => _1249
# DEBUG temp0D.7768 => _1251
# DEBUG BEGIN_STMT
# RANGE [1, 2147483647] NONZERO 2147483647
k_1827 = k_3019 + 1;
# DEBUG temp4D.7772 => _1243
# DEBUG temp3D.7771 => _1245
# DEBUG temp2D.7770 => _1247
# DEBUG temp1D.7769 => _1249
# DEBUG temp0D.7768 => _1251
# DEBUG kD.7615 => k_1827
# DEBUG BEGIN_STMT
# PT = nonlocal escaped null
vectp.1415_997 = vectp.1415_998 + 32;
ivtmp_666 = ivtmp_667 + 1;
if (ivtmp_666 < bnd.1407_1013)goto <bb 216>; [83.33%]
elsegoto <bb 303>; [16.67%

bb 分块的优化方案:

1:找到vec_cond_expr,将其中第一个参数mask作为上一个bb的结束,(其后还有一个mask)并且在其后新建一个该mask与0进行比较的gimple_cond,将这两个mask相与。同时新建该mask判断为ture 和 false的edge,分别指向分割的bb和其下一个bb。

2:以vec_cond_expr的第二个参数的ssa_name_def作为要分割bb的末尾,进行分割。并且生成一条指向其下一个bb的edge。同时将其作为mask判断为false的edge的dest。

optimize_mask_stores 代码

10093 /* The code below is trying to perform simple optimization - revert
10094    if-conversion for masked stores, i.e. if the mask of a store is zero
10095    do not perform it and all stored value producers also if possible.
10096    For example,
10097      for (i=0; i<n; i++)
10098        if (c[i])
10099   {
10100     p1[i] += 1;
10101     p2[i] = p3[i] +2;
10102   }
10103    this transformation will produce the following semi-hammock:
10104 
10105    if (!mask__ifc__42.18_165 == { 0, 0, 0, 0, 0, 0, 0, 0 })
10106      {
10107        vect__11.19_170 = MASK_LOAD (vectp_p1.20_168, 0B, mask__ifc__42.18_165);
10108        vect__12.22_172 = vect__11.19_170 + vect_cst__171;
10109        MASK_STORE (vectp_p1.23_175, 0B, mask__ifc__42.18_165, vect__12.22_172);
10110        vect__18.25_182 = MASK_LOAD (vectp_p3.26_180, 0B, mask__ifc__42.18_165);
10111        vect__19.28_184 = vect__18.25_182 + vect_cst__183;
10112        MASK_STORE (vectp_p2.29_187, 0B, mask__ifc__42.18_165, vect__19.28_184);
10113      }
10114 */
10115 
10116 void
10117 optimize_mask_stores (class loop *loop)
10118 {
10119   basic_block *bbs = get_loop_body (loop);
10120   unsigned nbbs = loop->num_nodes;
10121   unsigned i;
10122   basic_block bb;
10123   class loop *bb_loop;
10124   gimple_stmt_iterator gsi;
10125   gimple *stmt;
10126   auto_vec<gimple *> worklist;
10127   auto_purge_vect_location sentinel;
10128 
10129   vect_location = find_loop_location (loop);
10130   /* Pick up all masked stores in loop if any.  */
10131   for (i = 0; i < nbbs; i++)
10132     {
10133       bb = bbs[i];
10134       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
10135      gsi_next (&gsi))
10136   {
10137     stmt = gsi_stmt (gsi);
10138     if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10139       worklist.safe_push (stmt);
10140   }
10141     }
10142 
10143   free (bbs);
10144   if (worklist.is_empty ())
10145     return;
10146 
10147   /* Loop has masked stores.  */
10148   while (!worklist.is_empty ())
10149     {
10150       gimple *last, *last_store;
10151       edge e, efalse;
10152       tree mask;
10153       basic_block store_bb, join_bb;
10154       gimple_stmt_iterator gsi_to;
10155       tree vdef, new_vdef;
10156       gphi *phi;
10157       tree vectype;
10158       tree zero;
10159 
10160       last = worklist.pop ();
10161       mask = gimple_call_arg (last, 2);
10162       bb = gimple_bb (last);
10163       /* Create then_bb and if-then structure in CFG, then_bb belongs to
10164    the same loop as if_bb.  It could be different to LOOP when two
10165    level loop-nest is vectorized and mask_store belongs to the inner
10166    one.  */
10167       e = split_block (bb, last);
10168       bb_loop = bb->loop_father;
10169       gcc_assert (loop == bb_loop || flow_loop_nested_p (loop, bb_loop));
10170       join_bb = e->dest;
10171       store_bb = create_empty_bb (bb);
10172       add_bb_to_loop (store_bb, bb_loop);
10173       e->flags = EDGE_TRUE_VALUE;
10174       efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE);
10175       /* Put STORE_BB to likely part.  */
10176       efalse->probability = profile_probability::unlikely ();
10177       store_bb->count = efalse->count ();
10178       make_single_succ_edge (store_bb, join_bb, EDGE_FALLTHRU);
10179       if (dom_info_available_p (CDI_DOMINATORS))
10180   set_immediate_dominator (CDI_DOMINATORS, store_bb, bb);
10181       if (dump_enabled_p ())
10182   dump_printf_loc (MSG_NOTE, vect_location,
10183        "Create new block %d to sink mask stores.",
10184        store_bb->index);
10185       /* Create vector comparison with boolean result.  */
10186       vectype = TREE_TYPE (mask);
10187       zero = build_zero_cst (vectype);
10188       stmt = gimple_build_cond (EQ_EXPR, mask, zero, NULL_TREE, NULL_TREE);
10189       gsi = gsi_last_bb (bb);
10190       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
10191       /* Create new PHI node for vdef of the last masked store:
10192    .MEM_2 = VDEF <.MEM_1>
10193    will be converted to
10194    .MEM.3 = VDEF <.MEM_1>
10195    and new PHI node will be created in join bb
10196    .MEM_2 = PHI <.MEM_1, .MEM_3>
10197       */
10198       vdef = gimple_vdef (last);
10199       new_vdef = make_ssa_name (gimple_vop (cfun), last);
10200       gimple_set_vdef (last, new_vdef);
10201       phi = create_phi_node (vdef, join_bb);
10202       add_phi_arg (phi, new_vdef, EDGE_SUCC (store_bb, 0), UNKNOWN_LOCATION);
10203 
10204       /* Put all masked stores with the same mask to STORE_BB if possible.  */
10205       while (true)
10206   {
10207     gimple_stmt_iterator gsi_from;
10208     gimple *stmt1 = NULL;
10209 
10210     /* Move masked store to STORE_BB.  */
10211     last_store = last;
10212     gsi = gsi_for_stmt (last);
10213     gsi_from = gsi;
10214     /* Shift GSI to the previous stmt for further traversal.  */
10215     gsi_prev (&gsi);
10216     gsi_to = gsi_start_bb (store_bb);
10217     gsi_move_before (&gsi_from, &gsi_to);
10218     /* Setup GSI_TO to the non-empty block start.  */
10219     gsi_to = gsi_start_bb (store_bb);
10220     if (dump_enabled_p ())
10221       dump_printf_loc (MSG_NOTE, vect_location,
10222            "Move stmt to created bb\n%G", last);
10223     /* Move all stored value producers if possible.  */
10224     while (!gsi_end_p (gsi))
10225       {
10226         tree lhs;
10227         imm_use_iterator imm_iter;
10228         use_operand_p use_p;
10229         bool res;
10230 
10231         /* Skip debug statements.  */
10232         if (is_gimple_debug (gsi_stmt (gsi)))
10233     {
10234       gsi_prev (&gsi);
10235       continue;
10236     }
10237         stmt1 = gsi_stmt (gsi);
10238         /* Do not consider statements writing to memory or having
10239      volatile operand.  */
10240         if (gimple_vdef (stmt1)
10241       || gimple_has_volatile_ops (stmt1))
10242     break;
10243         gsi_from = gsi;
10244         gsi_prev (&gsi);
10245         lhs = gimple_get_lhs (stmt1);
10246         if (!lhs)
10247     break;
10248 
10249         /* LHS of vectorized stmt must be SSA_NAME.  */
10250         if (TREE_CODE (lhs) != SSA_NAME)
10251     break;
10252 
10253         if (!VECTOR_TYPE_P (TREE_TYPE (lhs)))
10254     {
10255       /* Remove dead scalar statement.  */
10256       if (has_zero_uses (lhs))
10257         {
10258           gsi_remove (&gsi_from, true);
10259           continue;
10260         }
10261     }
10262 
10263         /* Check that LHS does not have uses outside of STORE_BB.  */
10264         res = true;
10265         FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
10266     {
10267       gimple *use_stmt;
10268       use_stmt = USE_STMT (use_p);
10269       if (is_gimple_debug (use_stmt))
10270         continue;
10271       if (gimple_bb (use_stmt) != store_bb)
10272         {
10273           res = false;
10274           break;
10275         }
10276     }
10277         if (!res)
10278     break;
10279 
10280         if (gimple_vuse (stmt1)
10281       && gimple_vuse (stmt1) != gimple_vuse (last_store))
10282     break;
10283 
10284         /* Can move STMT1 to STORE_BB.  */
10285         if (dump_enabled_p ())
10286     dump_printf_loc (MSG_NOTE, vect_location,
10287          "Move stmt to created bb\n%G", stmt1);
10288         gsi_move_before (&gsi_from, &gsi_to);
10289         /* Shift GSI_TO for further insertion.  */
10290         gsi_prev (&gsi_to);
10291       }
10292     /* Put other masked stores with the same mask to STORE_BB.  */
10293     if (worklist.is_empty ()
10294         || gimple_call_arg (worklist.last (), 2) != mask
10295         || worklist.last () != stmt1)
10296       break;
10297     last = worklist.pop ();
10298   }
10299       add_phi_arg (phi, gimple_vuse (last_store), e, UNKNOWN_LOCATION);
10300     }
10301 }

optimize_mask_vec_cond 代码

10093 void
10094 optimize_mask_vec_cond (class loop *loop)
10095 {
10096   basic_block *bbs = get_loop_body (loop);
10097   unsigned nbbs = loop->num_nodes;
10098   unsigned i;
10099   basic_block bb, bb_mask;
10100   class loop *bb_loop;
10101   gimple_stmt_iterator gsi;
10102   gimple *stmt;
10103   auto_vec<gimple *> worklist;
10104   auto_purge_vect_location sentinel;
10105 
10106   enum tree_code code;
10107 
10108   vect_location = find_loop_location (loop);
10109   /* Pick up all vec_cond_expr in loop if any.  */
10110   for (i = 0; i < nbbs; i++)
10111     {
10112        bb = bbs[i];
10113        for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
10114         gsi_next (&gsi))
10115       {
10116         stmt = gsi_stmt (gsi);
10117         if (is_gimple_assign(stmt)) {
10118           gassign *stmt_assign = dyn_cast <gassign *> (gsi_stmt (gsi));
10119           code = gimple_assign_rhs_code (stmt_assign);
10120           // 检查语句是否为 VEC_COND_EXPR
10121           if (code == VEC_COND_EXPR) {
10122             worklist.safe_push (stmt);
10123           }
10124         }
10125        }
10126      }
10128   free (bbs);
10129   if (worklist.is_empty ())
10130     return;
10131 
10132   /* Loop has vec_cond_expr.  */
10133   while (!worklist.is_empty ())
10134     {
10135       gimple *last, *last_store, *last1;
10136       edge e, efalse;
10137       tree mask;
10138       basic_block store_bb, join_bb;
10139       gimple_stmt_iterator gsi_to;
10140       gimple_stmt_iterator gsi_stmt_def;
10141       tree vdef, new_vdef;
10142       gphi *phi;
10143       tree vectype;
10144       tree zero;
10145 
10146       last = worklist.pop ();
10147       gassign *stmt_assign = dyn_cast <gassign *> (last);
10148       mask = gimple_assign_rhs1(stmt_assign);
10149       tree true_vector_operand = gimple_assign_rhs2(stmt_assign);
10150 
10151       gimple *mask_def = SSA_NAME_DEF_STMT (mask);
10152 
10153       gimple *stmt_def = SSA_NAME_DEF_STMT (true_vector_operand);
10154 
10155       bb = gimple_bb (stmt_def);
10156 
10157     //  bb_mask = gimple_bb (mask_def);
10158       /* Create then_bb and if-then structure in CFG, then_bb belongs to
10159    the same loop as if_bb.  It could be different to LOOP when two
10160    level loop-nest is vectorized and mask_store belongs to the inner
10161    one.  */
10162 
10163       gsi_stmt_def = gsi_for_stmt (stmt_def);
10164       gsi_next(&gsi_stmt_def);
10165 
10166       stmt_def = gsi_stmt(gsi_stmt_def);
10167 
10168       e = split_block (bb, stmt_def);
10169       bb_loop = bb->loop_father;
10170    //   gcc_assert (loop == bb_loop || flow_loop_nested_p (loop, bb_loop));
10171       join_bb = e->dest;
10172       store_bb = create_empty_bb (bb);
10173       add_bb_to_loop (store_bb, bb_loop);
10174       e->flags = EDGE_TRUE_VALUE;
10175       efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE);
10176       /* Put STORE_BB to likely part.  */
10177       efalse->probability = profile_probability::unlikely ();
10178       store_bb->count = efalse->count ();
10179       make_single_succ_edge (store_bb, join_bb, EDGE_FALLTHRU);
10180       if (dom_info_available_p (CDI_DOMINATORS))
10181   set_immediate_dominator (CDI_DOMINATORS, store_bb, bb);
10182       if (dump_enabled_p ())
10183   dump_printf_loc (MSG_NOTE, vect_location,
10184        "Create new block %d to sink vect cond expr",
10185        store_bb->index);
10186       /* Create vector comparison with boolean result.  */
10187       vectype = TREE_TYPE (mask);
10188       zero = build_zero_cst (vectype);
10189       stmt = gimple_build_cond (EQ_EXPR, mask, zero, NULL_TREE, NULL_TREE);
10190    //   gsi = gsi_last_bb (bb);
10191       gsi = gsi_for_stmt (mask_def);
10192       gsi_next(&gsi);
10193       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
10194       /* Create new PHI node for vdef of the last masked store:
10195    .MEM_2 = VDEF <.MEM_1>
10196    will be converted to
10197    .MEM.3 = VDEF <.MEM_1>
10198    and new PHI node will be created in join bb
10199    .MEM_2 = PHI <.MEM_1, .MEM_3>
10200       */
10201   /*    vdef = gimple_vdef (last);
10202       new_vdef = make_ssa_name (gimple_vop (cfun), last);
10203       gimple_set_vdef (last, new_vdef);
10204       phi = create_phi_node (vdef, join_bb);
10205       add_phi_arg (phi, new_vdef, EDGE_SUCC (store_bb, 0), UNKNOWN_LOCATION);*/
10206 
10207       /* Put all masked stores with the same mask to STORE_BB if possible.  */
10208   //    while (true)
10209 //  {
10210     gimple_stmt_iterator gsi_from;
10211     gimple *stmt1 = NULL;
10213     /* Move vec_cond second var def to STORE_BB.  */
10214     last_store = stmt_def;
10215     gsi = gsi_for_stmt (stmt_def);
10216     gsi_from = gsi;
10217     /* Shift GSI to the previous stmt for further traversal.  */
10218     gsi_prev (&gsi);
10219     gsi_to = gsi_start_bb (store_bb);
10220     gsi_move_before (&gsi_from, &gsi_to);
10221     /* Setup GSI_TO to the non-empty block start.  */
10222     gsi_to = gsi_start_bb (store_bb);
10223     if (dump_enabled_p ())
10224       dump_printf_loc (MSG_NOTE, vect_location,
10225            "Move stmt to created bb\n%G", last);
10226     /* Move all stored value producers if possible.  */
10227     while (!gsi_end_p (gsi))
10228       {
10229         tree lhs;
10230         imm_use_iterator imm_iter;
10231         use_operand_p use_p;
10232         bool res;
10233 
10234         /* Skip debug statements.  */
10235         if (is_gimple_debug (gsi_stmt (gsi)))
10236     {
10237       gsi_prev (&gsi);
10238       continue;
10239     }
10240         stmt1 = gsi_stmt (gsi);
10241         /* Do not consider statements writing to memory or having
10242      volatile operand.  */
10243         if (gimple_vdef (stmt1)
10244       || gimple_has_volatile_ops (stmt1))
10245     break;
10246         gsi_from = gsi;
10247         gsi_prev (&gsi);
10248         lhs = gimple_get_lhs (stmt1);
10249         if (!lhs)
10250     break;
10251 
10252         /* LHS of vectorized stmt must be SSA_NAME.  */
10253         if (TREE_CODE (lhs) != SSA_NAME)
10254     break;
10255 
10256         if (!VECTOR_TYPE_P (TREE_TYPE (lhs)))
10257     {
10258       /* Remove dead scalar statement.  */
10259     /*  if (has_zero_uses (lhs))
10260         {
10261           gsi_remove (&gsi_from, true);
10262           continue;
10263         }*/
10264     }
10265 
10266         /* Check that LHS does not have uses outside of STORE_BB.  */
10267         res = true;
10268   /*      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
10269     {
10270       gimple *use_stmt;
10271       use_stmt = USE_STMT (use_p);
10272       if (is_gimple_debug (use_stmt))
10273         continue;
10274       if (gimple_bb (use_stmt) != store_bb)
10275         {
10276           res = false;
10277           break;
10278         }
10279     }*/
10280         if (!res)
10281     break;
10282 
10283     /*    if (gimple_vuse (stmt1)
10284       && gimple_vuse (stmt1) != gimple_vuse (last_store))
10285     break;*/
10286 
10287         /* Can move STMT1 to STORE_BB.  */
10288         if (dump_enabled_p ())
10289     dump_printf_loc (MSG_NOTE, vect_location,
10290          "Move stmt to created bb\n%G", stmt1);
10291         gsi_move_before (&gsi_from, &gsi_to);
10292         /* Shift GSI_TO for further insertion.  */
10293         gsi_prev (&gsi_to);
10294       }
10295     /* Put other masked stores with the same mask to STORE_BB.  */
10296   /*  if (worklist.is_empty ()
10297         || gimple_call_arg (worklist.last (), 2) != mask
10298         || worklist.last () != stmt1)
10299       break;
10300     last = worklist.pop ();*/
10301   //  last1 = worklist.pop ();
10302 //  }
10303     //  add_phi_arg (phi, gimple_vuse (last_store), e, UNKNOWN_LOCATION);
10304     if (!worklist.is_empty ())
10305     last = worklist.pop ();
10306     }
10307 }

能够按照预期进行拆分bb块,同时解决编译不过的两个问题:

1:加上-g 之后,在fre pass 会报错,在对debug gimple 进行分析删除的时候,找不到某个标量的定义。 最后一个分支的标量gimple被直接删除了,没有生成debug gimple。导致后面debug gimple 使用到该标量是找不到其定义,报编译错误。解决方法,先去掉-g。后续在dce pass 中找删除标量和插入debug的逻辑。# DEBUG D#583 => D#597 ? _2164 : 0.0

2:在sink pass 中报编译错误,gimple_redirect_edge_and_branch函数中,assert不通过,需要该edge 是一个fallthru edge。在构造edge的时候需要生成。暂时注释掉。

default:6134       /* Otherwise it must be a fallthru edge, and we don't need to6135    do anything besides redirecting it.  */6136    //   gcc_assert (e->flags & EDGE_FALLTHRU);

解决掉编译错误后,可以正确编译运行,但是结果错误。

原因是该loop 的 vf是8.每次会对loop 中的8个元素进行运算,计算mask的数据是double类型,会生成两个mask。每个分支需要对两个mask同时和{0,0,0,0}比较是否为0,目前只能进行一个mask的比较。可以的方法:

1:修改loop 中int 的类型使其在确定vf的时候将其作为double 看待(VIEW_CONVERT_EXPR),这样vf 是4, 就不存在两个mask。

2:gimple cond 不能支持这种if ( a==0 && b==0) 这种复杂条件表达,构造两个gimple cond。然后做&运算,将此条件作为需要判断的cond。

1761处循环:

1:在每个分支条件构造后插入两个mask按位或的gimple,并且以此新建一个gimple cond,作为分支判断的条件。

2:课题运算结果VE.查找原因。从打印每个分支运算结果来看,temp4的结果恒为0,即最后一个分支完全没有走到,存在问题,同时加上-g后报错,也是最后一个分支的标量被删除,怀疑最后分支在拆分的时候存在问题。(正确结果在源码中加打印中间结果,无法进行打印)。

Lhs use outside of BB。当其使用的outside BB是 VEC_COND 所在的BB认为是没问题的,其他情况需要进行添加phi节点操作

      

     

  

2中的stmt的 lhs res在4 里面被使用,原本在同一个bb里面不需要做额外的操作,当分到不同的bb后,走不走2 res的值会不同,如果不走4中用的res会使用上一次2中计算的res值,显然结果错误,需要添加phi节点来解决。  

若2中的lhs  res0 被 4 use ,需要在 2的上一个bb  1新建一个向量变量res1 = 0,在2 的下一个bb 3中,新建一个phi节点,res2 = phi<res1(1),res0(2)>, 并且将4中用到res0的地方改为res2。

若2中的lhs  res0 被 4 use ,需要在 2的上一个bb  1新建一个向量变量res1 = 0,将2中的res0 = xx 修改为 res2 = xx,在2 的下一个bb 3中,新建一个phi节点,res0 = phi<res1(1),res2(2)>。

若2中的res0 2中的其他stmt使用到,则需要将所有用到res0的地方改成res2

对于多个分支都要进行计算的变量,可以将第二个分支直接用到此计算的地方,需要使用该计算的全部。在用到其的地方需要进行计算。

新增phi节点的代码

FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
10283     {
10284       gimple *use_stmt;
10285       use_stmt = USE_STMT (use_p);
10286       if (is_gimple_debug (use_stmt))
10287         continue;
10288       if (gimple_bb (use_stmt) != store_bb && gimple_bb (use_stmt) != gimple_bb (last))
10289         {
10290          // res = false;
10291 
10292         if (dump_enabled_p ())
10293     dump_printf_loc (MSG_NOTE, vect_location,
10294          "LHS have use outside of store_BB\n%G", stmt1);
10295         tree lhs_use_out,new_lhs,new_lhs1,new_lhs2;
10296         tree new_lhs_phi;
10297         gphi *phi;
10298         tree vectype;
10299         tree zero;
10300         gimple *zero_def;
10301         lhs_use_out = gimple_assign_lhs(stmt1);
10302 
10303       /*  if (is_gimple_assign(stmt1)) {
10304             lhs_use_out = gimple_assign_lhs(stmt1);
10305             new_lhs = create_tmp_var(TREE_TYPE(lhs_use_out), "new_tmp_var");
10306             new_lhs_phi = make_ssa_name(new_lhs,NULL);
10307         //    gimple_assign_set_lhs(stmt1, new_lhs1);
10308         
10309 
10310             phi = create_phi_node (new_lhs_phi, join_bb);
10311             add_phi_arg (phi, lhs_use_out, EDGE_SUCC (store_bb, 0), UNKNOWN_LOCATION);
10312          
10313             vectype = TREE_TYPE (lhs_use_out);
10314             zero = build_zero_cst (vectype);
10315             new_lhs1 = create_tmp_var(TREE_TYPE(lhs_use_out), "new_tmp_var1");
10316             new_lhs2 = make_ssa_name(new_lhs1,NULL);
10317             zero_def = gimple_build_assign(new_lhs2, zero);
10318 
10319      //    basic_block stmt_bb = gimple_bb(stmt1);
10320             edge e_temp;
10321             edge_iterator ei;
10322             basic_block pred_bb;
10323             gimple_stmt_iterator gsi_temp;
10324 
10325        //  if (EDGE_COUNT(stmt_bb->preds) == 1) {
10326             e_temp = EDGE_PRED(store_bb, 0);
10327             pred_bb = e_temp->src;
10328             gsi_temp = gsi_start_bb(pred_bb);
10329             gsi_insert_before(&gsi_temp, zero_def, GSI_SAME_STMT);
10330        //  }
10331 
10332            add_phi_arg (phi, new_lhs2, e, UNKNOWN_LOCATION);
10333          //  update_stmt (phi);
10334 
10335         /*   edge e_join;
10336            edge_iterator ei_join;
10337 
10338            FOR_EACH_EDGE(e_join, ei_join, join_bb->succs)
10339            {
10340               if (EDGE_TRUE_P(e_join))
10341               {
10342                 *true_bb = e->dest;
10343               }
10344            }*/
10345 
10346            for (unsigned int i = 0; i < gimple_num_ops(use_stmt); i++) {
10347               tree rhs = gimple_op(use_stmt, i);
10348               if(rhs == lhs_use_out) {
10349                 gimple_stmt_iterator gsi = gsi_for_stmt(use_stmt);
10350                 gsi_insert_before (&gsi,stmt1,GSI_SAME_STMT);
10351                 break;
10352               //  create_new_def_for (rhs, phi,gimple_phi_result_ptr (phi));
10353               //  update_stmt (phi);
10354               }
10355            }
10356      //   }

2069处循环:

1:需要进行dim=3的常量传播,加上拆分循环这两个条件。验证前一个循环向量化后有7%的性能,加上ymm寄存器后有11%的性能。

2:查看gcc的loop split 和 loop distribute pass,发现loop distribute的总体思想是将能够向量化的代码最大限度拆分到一个循环中,(1)但其只对非嵌套循环的最内层循环分析,发现其dump的信息中没有对2069循环进行distribute。(2)同时其只能对没有数据依赖的部分distribute,源码有数据依赖的部分使用临时数组存储后进行拆分,需要自行编写代码实现。

549课题在mask store中涉及的运算上对数学函数添加mask代码

1  #include "config.h"2  #include "system.h"3  #include "coretypes.h"4  #include "backend.h"5  #include "tree.h"6  #include "gimple.h"7  #include "predict.h"8  #include "tree-pass.h"9  #include "ssa.h"10  #include "cgraph.h"11  #include "fold-const.h"12  #include "stor-layout.h"13  #include "gimple-iterator.h"14  #include "gimple-walk.h"15  #include "tree-ssa-loop-manip.h"16  #include "tree-ssa-loop-niter.h"17  #include "tree-cfg.h"18  #include "cfgloop.h"19  #include "tree-vectorizer.h"20  #include "tree-ssa-propagate.h"21  #include "dbgcnt.h"22  #include "tree-scalar-evolution.h"23  #include "stringpool.h"24  #include "attribs.h"25  #include "gimple-pretty-print.h"26  #include "opt-problem.h"27  #include "internal-fn.h"28  #include "tree-ssa-sccvn.h"29  #include "gimple-expr.h"30  #include <cstdio>31 32  namespace33  {34  const pass_data pass_data_test = {35    GIMPLE_PASS,           /* type */36    "mask_vecmath_func",                /* name */37    OPTGROUP_NONE,         /* optinfo_flags */38    TV_TREE_VECT_MASK_VECMATH_FUNC,          /* tv_id */39    (PROP_cfg | PROP_ssa), /* properties_required */40    0,                     /* properties_provided */41    0,                     /* properties_destroyed */42    0,                     /* todo_flags_start */43    0,                     /* todo_flags_finish */44  };
45 46  class pass_mask_vecmath_func : public gimple_opt_pass47  {48  public:49    pass_mask_vecmath_func (gcc::context *ctxt) : gimple_opt_pass (pass_data_test, ctxt) {}50    virtual bool51    gate (function *fun)52    {53     // printf ("gate function noipa.\n");54      return flag_tree_mask_vecmath_func;55    }56 57    virtual unsigned int execute (function *);58  };59 60 61 static void add_mask_to_call(gimple *stmt, tree new_arg, const char *func_name) {62     if (!is_gimple_call(stmt)) {63         // 如果不是函数调用语句,则不做任何操作64         return;65     }66 67     // 获取原始函数调用的目标和参数列表68     tree call_fn = gimple_call_fndecl(stmt);69 70    // 获取或创建新的标识符节点来表示新的函数名称71    tree new_func_id;72    if(strcmp(func_name, "vmldCos2") == 0)73      new_func_id = get_identifier("__svml_cos2_mask_e9");74    else if (strcmp(func_name, "vmldExp2") == 0)75      new_func_id = get_identifier("__svml_exp2_mask_e9");76    else if (strcmp(func_name, "vmldSin2") == 0)77      new_func_id = get_identifier("__svml_sin2_mask_e9");78    else if (strcmp(func_name, "sin.simdclone.2") == 0)79      new_func_id = get_identifier("__svml_sin4_mask_e9");80    else if (strcmp(func_name, "cos.simdclone.2") == 0)81      new_func_id = get_identifier("__svml_cos4_mask_e9");82    else if (strcmp(func_name, "exp.simdclone.2") == 0)83      new_func_id = get_identifier("__svml_exp4_mask_e9");84 85    tree fntype = TREE_TYPE(call_fn);87    tree new_fndecl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, new_func_id, fntype);88 89    TREE_PUBLIC (new_fndecl) = 1;90    DECL_EXTERNAL (new_fndecl) = 1;91    DECL_IS_NOVOPS (new_fndecl) = 1;92    TREE_READONLY (new_fndecl) = 1;93 94 95    // 将新的标识符节点分配给函数声明的汇编名96   // DECL_ASSEMBLER_NAME(call_fn) = new_func_id;97 98     int num_args = gimple_call_num_args(stmt);99     vec<tree> vargs = vNULL;
100     vargs.create (num_args+1);
101 
102     // 创建一个新的参数列表,包含原始的参数和新的参数
103     for (int i = 0; i < num_args; i++) {
104         tree arg = gimple_call_arg(stmt, i);
105         vargs.safe_push(arg);
106     }
107     vargs.safe_push(new_arg);
108 
109     tree lhs = gimple_call_lhs(stmt);
110 
111     // 创建新的函数调用语句,包含新的参数
112     gimple *new_call = gimple_build_call_vec(new_fndecl,vargs);
113     gimple_call_set_lhs (new_call, lhs);
114 
115     // 替换原始的函数调用语句
116     gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
117 
118   //  printf ("-------------finish add mask to vecmath func call------------.\n");
119 
120     gsi_replace(&gsi, new_call,true);
121     stmt = new_call;
122 
123     // 释放参数列表的内存
124     vargs.release ();
125 }
126 
127 static void find_relate_operand(tree operand, gimple *stmt, tree mask)
128 {
129   if (!stmt)
130         return ;
131 
132   if (TREE_CODE (operand) == SSA_NAME && is_gimple_call(stmt)) {  // operand is ssa && stmt is gimple call
133      tree fndecl = gimple_call_fndecl(stmt);  // 获取函数声明
134        if (fndecl && DECL_P(fndecl)) {  // 确保fndecl有效并且是一个声明
135         const  char *func_name = IDENTIFIER_POINTER(DECL_NAME(fndecl));  // 获取函数名称
136          // if (strcmp(func_name, "vmldLn2") == 0) {
137           if (strcmp(func_name, "vmldCos2") == 0 ||
138               strcmp(func_name, "vmldExp2") == 0 ||
139               strcmp(func_name, "vmldSin2") == 0 ||
140               strcmp(func_name, "exp.simdclone.2") == 0 ||
141               strcmp(func_name, "cos.simdclone.2") == 0 ||
142               strcmp(func_name, "sin.simdclone.2") == 0) {
143      //       printf ("-------------find math func------------.\n");
144             add_mask_to_call(stmt,mask,func_name);
145             return ;
146           }
147        }
148   }
149   if (TREE_CODE (operand) == SSA_NAME && is_gimple_assign(stmt)) {   // only find gimple assign
150 
151      for (unsigned i = 1; i < gimple_num_ops(stmt); ++i) {  // get gimple assign right hand side operand
152         tree op = gimple_op(stmt, i);
153         if(TREE_CODE (op) == SSA_NAME) {
154 
155            gimple *stmt_2 = SSA_NAME_DEF_STMT (op);
156            find_relate_operand(op,stmt_2,mask);
157         //   if(result) return result;
158         }
159     }
160   }
161   return ;
162 }
163 
164 
165  unsigned
166  pass_mask_vecmath_func::execute (function *fun)
167  {
168    unsigned ret = 0;
169 
170    basic_block bb;
171    enum tree_code code;
172    FOR_EACH_BB_FN(bb, fun) {
173        gimple_stmt_iterator gsi;
174 
175   /* for (int i = 1; i < number_of_loops (fun); i++)
176      {
177        loop_vec_info loop_vinfo;
178        bool has_mask_store;
179  
180        class loop *loop = get_loop (fun, i);
181        if (!loop || !loop->aux)
182        continue;
183        loop_vinfo = (loop_vec_info) loop->aux;
184        has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
185        delete loop_vinfo;
186        if (has_mask_store) {
187          
188          printf ("-------------have mask store------------.\n");
189 
190          basic_block *bbs = get_loop_body (loop);
191          unsigned nbbs = loop->num_nodes;
192          unsigned i;
193          basic_block bb;
194          class loop *bb_loop;
195          gimple_stmt_iterator gsi;
196          gimple *stmt;
197 
198          for (i = 0; i < nbbs; i++)
199          {
200             bb = bbs[i];*/
201             for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
202              gsi_next (&gsi))
203             {
204               gimple *stmt = gsi_stmt (gsi);
205               if (gimple_call_internal_p (stmt, IFN_MASK_STORE)) {
206  //                printf ("------------ find mask store------------.\n");
207                  basic_block bb1 = gimple_bb(stmt);
208                  tree mask = gimple_call_arg (stmt, 2);
209                  tree value = gimple_call_arg (stmt, 3);
210                  if(TREE_CODE (value) == SSA_NAME) {
211                    gimple *value_def = SSA_NAME_DEF_STMT (value);
212                    basic_block bb2 = gimple_bb(value_def);
213    //                printf ("-------------begin find relate operand------------.\n");
214                    if(bb1 == bb2) //  mask store and value def in same bb
215                    find_relate_operand(value,value_def,mask);
216                  }
217               }
218             }
219 
220          // free (bbs);
221          }
222      //  }
223    //  }
224 
225    return ret;
226 
227  }
228  }
229 
230  gimple_opt_pass *
231  make_pass_mask_vecmath_func (gcc::context *ctxt)
232  {
233    return new pass_mask_vecmath_func (ctxt);
234  }
10092 
10093
10094 void
10095 optimize_mask_vec_cond (class loop *loop)
10096 {
10097   basic_block *bbs = get_loop_body (loop);
10098   unsigned nbbs = loop->num_nodes;
10099   unsigned i;
10100   basic_block bb, bb_mask;
10101   class loop *bb_loop;
10102   gimple_stmt_iterator gsi;
10103   gimple *stmt;
10104   auto_vec<gimple *> worklist;
10105   auto_purge_vect_location sentinel;
10106
10107   enum tree_code code;
10108
10109   vect_location = find_loop_location (loop);
10110   /* Pick up all vec_cond_expr in loop if any.  */
10111   for (i = 0; i < nbbs; i++)
10112     {
10113        bb = bbs[i];
10114        for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
10115         gsi_next (&gsi))
10116       {
10117         stmt = gsi_stmt (gsi);
10118         if (is_gimple_assign(stmt)) {
10119           gassign *stmt_assign = dyn_cast <gassign *> (gsi_stmt (gsi));
10120           code = gimple_assign_rhs_code (stmt_assign);
10121           // 检查语句是否为 VEC_COND_EXPR
10122           if (code == VEC_COND_EXPR) {
10123             worklist.safe_push (stmt);
10124           }
10125         }
10126        }
10127      }
10128
10129   free (bbs);
10130   if (worklist.is_empty () || worklist.length()==1)
10131     return;
10132
10133   /* Loop has vec_cond_expr.  */
10134   while (!worklist.is_empty ())
10135     {
10136       gimple *last, *last_store, *last1;
10137       edge e, efalse;
10138       tree mask,mask2;
10139       basic_block store_bb, join_bb;
10140       gimple_stmt_iterator gsi_to;
10141       gimple_stmt_iterator gsi_stmt_def,gsi_mask_def;
10142       tree vdef, new_vdef;
10143       gphi *phi;
10144       tree vectype;
10145       tree zero_vector;
10146
10147       last = worklist.pop ();
10148       gassign *stmt_assign = dyn_cast <gassign *> (last);
10149       mask = gimple_assign_rhs1(stmt_assign);
10150       tree true_vector_operand = gimple_assign_rhs2(stmt_assign);
10151
10152       gimple *mask_def = SSA_NAME_DEF_STMT (mask);
10153
10154       gsi_mask_def = gsi_for_stmt(mask_def);
10155       gsi_prev(&gsi_mask_def);
10156       gimple *mask2_def = gsi_stmt(gsi_mask_def);
10157       gassign *stmt_mask2 = dyn_cast <gassign *> (mask2_def);
10158       mask2 = gimple_assign_lhs(stmt_mask2);
10159
10160
10161       gimple *stmt_def = SSA_NAME_DEF_STMT (true_vector_operand);
10162
10163       bb = gimple_bb (stmt_def);
10164
10165       /* Create then_bb and if-then structure in CFG, then_bb belongs to
10166    the same loop as if_bb.  It could be different to LOOP when two
10167    level loop-nest is vectorized and mask_store belongs to the inner
10168    one.  */
10169
10170       gsi_stmt_def = gsi_for_stmt (stmt_def);
10171       gsi_next(&gsi_stmt_def);
10172
10173       stmt_def = gsi_stmt(gsi_stmt_def);
10174
10175       e = split_block (bb, stmt_def);
10176       bb_loop = bb->loop_father;
10177    //   gcc_assert (loop == bb_loop || flow_loop_nested_p (loop, bb_loop));
10178       join_bb = e->dest;
10179       store_bb = create_empty_bb (bb);
10180       add_bb_to_loop (store_bb, bb_loop);
10181       e->flags = EDGE_TRUE_VALUE;
10182       efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE);
10183       /* Put STORE_BB to likely part.  */
10184       efalse->probability = profile_probability::unlikely ();
10185       store_bb->count = efalse->count ();
10186       make_single_succ_edge (store_bb, join_bb, EDGE_FALLTHRU);
10187       if (dom_info_available_p (CDI_DOMINATORS))
10188   set_immediate_dominator (CDI_DOMINATORS, store_bb, bb);
10189       if (dump_enabled_p ())
10190   dump_printf_loc (MSG_NOTE, vect_location,
10191        "Create new block %d to sink vect cond expr",
10192        store_bb->index);
10193       /* Create vector comparison with boolean result.  */
10194       vectype = TREE_TYPE (mask);
10195       zero_vector = build_zero_cst (vectype);
10196
10197       tree combined_mask = create_tmp_var(TREE_TYPE(zero_vector), "combined_mask");
10198
10199       gimple *combine_stmt1 = gimple_build_assign(combined_mask, BIT_IOR_EXPR, mask, mask2);
10200
10201       gsi = gsi_for_stmt (mask_def);
10202       gsi_next(&gsi);
10203       gsi_insert_after (&gsi, combine_stmt1, GSI_SAME_STMT);
10204
10205     /*  vec<constructor_elt, va_gc> *ret_ctor_elts_tmp = NULL;
10206       vec_alloc (ret_ctor_elts_tmp, 2);
10207       CONSTRUCTOR_APPEND_ELT(ret_ctor_elts_tmp, NULL_TREE, mask2); // 添加第二个左子树
10208       CONSTRUCTOR_APPEND_ELT(ret_ctor_elts_tmp, NULL_TREE, mask); // 添加第一个左子树
10209
10210     //  tree signed_boolean_type = build_nonstandard_integer_type(64, 1);
10211       tree signed_boolean_type = build_nonstandard_boolean_type(64);
10212
10213       tree vect_type = build_vector_type(signed_boolean_type, 4);
10214       tree constructor = build_constructor(vect_type, ret_ctor_elts_tmp);
10215
10216       tree new_var_constru = create_tmp_var(vect_type, "mask_array");
10217       gimple *new_stmt_construc = gimple_build_assign(make_ssa_name(new_var_constru), constructor);
10218       gsi_next(&gsi);
10219       gsi_insert_after (&gsi, new_stmt_construc, GSI_SAME_STMT);*/
10220
10221       gimple *gcond = gimple_build_cond(EQ_EXPR, combined_mask, zero_vector, NULL, NULL);
10222       gsi_next(&gsi);
10223       gsi_insert_after(&gsi, gcond, GSI_NEW_STMT);
10224
10225
10226       /* Put all masked stores with the same mask to STORE_BB if possible.  */
10227   //    while (true)
10228 //  {
10229     gimple_stmt_iterator gsi_from;
10230     gimple *stmt1 = NULL;
10231
10232     /* Move vec_cond second var def to STORE_BB.  */
10233     last_store = stmt_def;
10234     gsi = gsi_for_stmt (stmt_def);
10235     gsi_from = gsi;
10236     /* Shift GSI to the previous stmt for further traversal.  */
10237     gsi_prev (&gsi);
10238     gsi_to = gsi_start_bb (store_bb);
10239     gsi_move_before (&gsi_from, &gsi_to);
10240     /* Setup GSI_TO to the non-empty block start.  */
10241     gsi_to = gsi_start_bb (store_bb);
10242     if (dump_enabled_p ())
10243       dump_printf_loc (MSG_NOTE, vect_location,
10244            "Move stmt to created bb\n%G", last);
10245     /* Move all stored value producers if possible.  */
10246     while (!gsi_end_p (gsi))
10247       {
10248         tree lhs;
10249         imm_use_iterator imm_iter;
10250         use_operand_p use_p;
10251         bool res;
10252
10253         /* Skip debug statements.  */
10254         if (is_gimple_debug (gsi_stmt (gsi)))
10255     {
10256       gsi_prev (&gsi);
10257       continue;
10258     }
10259         stmt1 = gsi_stmt (gsi);
10260         /* Do not consider statements writing to memory or having
10261      volatile operand.  */
10262         if (gimple_vdef (stmt1)
10263       || gimple_has_volatile_ops (stmt1))
10264     break;
10265         gsi_from = gsi;
10266         gsi_prev (&gsi);
10267         lhs = gimple_get_lhs (stmt1);
10268         if (!lhs)
10269     break;
10270
10271         /* LHS of vectorized stmt must be SSA_NAME.  */
10272         if (TREE_CODE (lhs) != SSA_NAME)
10273     break;
10274
10275         if (!VECTOR_TYPE_P (TREE_TYPE (lhs)))
10276     {
10277       /* Remove dead scalar statement.  */
10278       if (has_zero_uses (lhs))
10279         {
10280           gsi_remove (&gsi_from, true);
10281           continue;
10282         }
10283     }
10284
10285         /* Check that LHS does not have uses outside of STORE_BB.  */
10286         res = true;
10287     //    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
10288         gimple *use_lhs;
10289         FOR_EACH_IMM_USE_STMT (use_lhs, imm_iter, lhs)
10290     {
10291         gimple *use_stmt;
10292         FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) {
10293
10294       //  gimple *use_stmt;
10295         use_stmt = USE_STMT (use_p);
10296         if (is_gimple_debug (use_stmt))
10297         continue;
10298       if (gimple_bb (use_stmt) != store_bb && gimple_bb (use_stmt) != gimple_bb (last))
10299     {
10300          // res = false;
10301
10302         if (dump_enabled_p ())
10303     dump_printf_loc (MSG_NOTE, vect_location,
10304          "LHS have use outside of store_BB\n%G", stmt1);
10305         tree new_lhs,new_lhs1,new_lhs2;
10306         tree new_lhs_phi;
10307         gphi *phi;
10308         tree vectype;
10309         tree zero;
10310         gimple *zero_def;
10311
10312         gimple *new_assign_stmt;
10313
10314         if (is_gimple_assign(stmt1) && is_gimple_assign(use_lhs)) {
10315         for (unsigned int i = 1; i < gimple_num_ops(use_stmt); i++) {
10316           tree rhs = gimple_op(use_stmt, i);
10317           if(TREE_CODE (rhs) == SSA_NAME && (rhs == lhs)) {
10318
10319            if (dump_enabled_p ())
10320               dump_printf_loc (MSG_NOTE, vect_location,
10321          "insert new stmt to use out of BB\n");
10322             new_lhs = create_tmp_var(TREE_TYPE(lhs), "new_tmp_var");
10323             new_lhs1 = make_ssa_name(new_lhs,NULL);
10324             tree rhs1 = gimple_assign_rhs1(stmt1);
10325             tree rhs2 = gimple_assign_rhs2(stmt1);
10326             new_assign_stmt = gimple_build_assign(new_lhs1, gimple_assign_rhs_code(stmt1), rhs1, rhs2);
10327
10328             gimple_stmt_iterator gsi_temp = gsi_for_stmt(use_stmt);
10329             gsi_insert_before (&gsi_temp,new_assign_stmt,GSI_SAME_STMT);
10330             update_stmt(new_assign_stmt);
10331
10332             if( i == 1) {
10333
10334               gimple_assign_set_rhs1(use_stmt, new_lhs1);
10335            //   update_stmt(use_stmt);
10336             }
10337             else if (i == 2) {
10338               gimple_assign_set_rhs2(use_stmt, new_lhs1);
10339             //  update_stmt(use_stmt);
10340             }
10341
10342           //  update_stmt(use_stmt);
10343           }
10344         }
10345       }
10346     }
10347         }
10348
10349             update_stmt(use_stmt);
10350     }
10351
10352         /* Can move STMT1 to STORE_BB.  */
10353      /*   if (dump_enabled_p ())
10354     dump_printf_loc (MSG_NOTE, vect_location,
10355          "Move stmt to created bb\n%G", stmt1);*/
10356         gsi_move_before (&gsi_from, &gsi_to);
10357         /* Shift GSI_TO for further insertion.  */
10358         gsi_prev (&gsi_to);
10359       }
10360     if (!worklist.is_empty ())
10361     last = worklist.pop ();
10362     }
10363
10364 }

对 if continue的分块

10161   /*    if(worklist.length()== 1) {
10162         if (dump_enabled_p ())
10163                dump_printf_loc (MSG_NOTE, vect_location,
10164             " if-continue split bb\n");
10165         tree mask_tmp2 = gimple_assign_rhs2(stmt_mask2);
10166         tree mask_tmp1 = gimple_assign_rhs2(stmt_mask1);
10167
10168         gimple *mask_temp2_def = SSA_NAME_DEF_STMT (mask_tmp2);
10169         gimple *mask_temp1_def = SSA_NAME_DEF_STMT (mask_tmp1);
10170
10171         gassign *stmt_mask_tmp2 = dyn_cast <gassign *> (mask_temp2_def);
10172         gassign *stmt_mask_tmp1 = dyn_cast <gassign *> (mask_temp1_def);
10173
10174         tree temp2_rhs1 = gimple_assign_rhs1(stmt_mask_tmp2);
10175         tree temp1_rhs1 = gimple_assign_rhs1(stmt_mask_tmp1);
10176
10177         tree target_mask3 = gimple_assign_lhs(stmt_mask_tmp2);
10178         tree target_mask4 = gimple_assign_lhs(stmt_mask_tmp1);
10179
10180         tree temp2_rhs2 = gimple_assign_rhs2(stmt_mask_tmp2);
10181         tree temp1_rhs2 = gimple_assign_rhs2(stmt_mask_tmp1);
10182
10183         gimple *target_stmt1 = SSA_NAME_DEF_STMT (temp2_rhs1);
10184         gimple *target_stmt2 = SSA_NAME_DEF_STMT (temp1_rhs1);
10185
10186         gassign *stmt_target_stmt1 = dyn_cast <gassign *> (target_stmt1);
10187         gassign *stmt_target_stmt2 = dyn_cast <gassign *> (target_stmt2);
10188
10189         tree target_mask1 = gimple_assign_lhs(stmt_target_stmt1);
10190         tree target_mask2 = gimple_assign_lhs(stmt_target_stmt2);
10191
10192
10193         gimple *target_stmt3 = SSA_NAME_DEF_STMT (temp2_rhs2);
10194         gimple *target_stmt4 = SSA_NAME_DEF_STMT (temp1_rhs2);
10195
10196         basic_block bb_tmp =  gimple_bb (target_stmt1);
10197         basic_block bb_tmp_next =  gimple_bb (target_stmt4);
10198         edge e_tmp;
10199         gimple_stmt_iterator target_stmt4_gsi = gsi_for_stmt(mask_temp1_def);
10200         gsi_next(&target_stmt4_gsi);
10201         gimple *target_stmt4_next = gsi_stmt(target_stmt4_gsi);
10202
10203         gimple_stmt_iterator target_stmt2_gsi = gsi_for_stmt(target_stmt2);
10204         gsi_next(&target_stmt2_gsi);
10205         gimple *target_stmt2_next = gsi_stmt(target_stmt2_gsi);
10206
10207         e_tmp = split_block (bb_tmp, target_stmt4_next);
10208         class loop *bb_loop_tmp = bb_tmp->loop_father;
10209         gcc_assert (loop == bb_loop_tmp || flow_loop_nested_p (loop, bb_loop_tmp));
10210
10211         basic_block bb_last_tmp = gimple_bb(last);
10212         basic_block join_bb_tmp;
10213         gimple *last_stmt_tmp = last_stmt(bb_last_tmp);
10214         if (last_stmt_tmp && gimple_code(last_stmt_tmp) == GIMPLE_COND) {
10215
10216             edge e_tmp2;
10217             edge_iterator ei_tmp2;
10218              basic_block true_bb;
10219
10220             FOR_EACH_EDGE(e_tmp2, ei_tmp2, bb_last_tmp->succs) {
10221         // 检查是否为 true 分支
10222               if (e_tmp2->flags & EDGE_TRUE_VALUE) {
10223                   true_bb = e_tmp2->dest;
10224               }
10225             }
10226             join_bb_tmp = e_tmp->dest;
10227             basic_block store_bb_tmp = create_empty_bb (bb_tmp);
10228             add_bb_to_loop (store_bb_tmp, bb_loop_tmp);
10229        //     e_tmp->flags = EDGE_TRUE_VALUE;
10230
10231             edge efalse_tmp_true = make_edge (bb_tmp, bb_last_tmp, EDGE_TRUE_VALUE);
10232                /* Put STORE_BB to likely part.  */
10233     /*        efalse_tmp_true->probability = profile_probability::likely ();
10234             store_bb_tmp->count = efalse_tmp_true->count ();
10235
10236             edge efalse_tmp = make_edge (bb_tmp, store_bb_tmp, EDGE_FALSE_VALUE);
10237                /* Put STORE_BB to likely part.  */
10238       /*      efalse_tmp->probability = profile_probability::unlikely ();
10239             store_bb_tmp->count = efalse_tmp->count ();
10240        //     make_single_succ_edge (store_bb_tmp, join_bb_tmp, EDGE_FALLTHRU);
10241
10242             edge efalse_tmp_next = make_edge (store_bb_tmp, join_bb_tmp, EDGE_FALSE_VALUE);
10243             efalse_tmp_next->probability = profile_probability::unlikely ();
10244        //     store_bb_tmp->count = efalse_tmp_true->count ();
10245
10246             edge etrue_tmp_next = make_edge (store_bb_tmp, bb_last_tmp, EDGE_TRUE_VALUE);
10247             etrue_tmp_next->probability = profile_probability::likely ();
10248             store_bb_tmp->count = efalse_tmp_true->count ();
10249         //    true_bb = e_tmp->dest;
10250
10251         //    e_tmp->dest = NULL;
10252          //   e_tmp->flags = EDGE_TRUE_VALUE;
10253
10254             edge e_dele = find_edge(bb_tmp, join_bb_tmp);
10255             if (e_dele) {
10256                   remove_edge(e_dele); // 删除这条边
10257                  }
10258
10259         //    true_bb->preds = chainon(true_bb->preds, e_tmp);
10260             add_to_dominance_info(CDI_DOMINATORS,join_bb_tmp);
10261
10262             if (dom_info_available_p (CDI_DOMINATORS)) {
10263                 set_immediate_dominator (CDI_DOMINATORS, store_bb_tmp, bb_tmp);
10264                 set_immediate_dominator (CDI_DOMINATORS, join_bb_tmp, store_bb_tmp);
10265                 set_immediate_dominator (CDI_DOMINATORS, bb_last_tmp, bb_tmp);
10266            //     free_dominance_info(CDI_DOMINATORS);
10267                 calculate_dominance_info(CDI_DOMINATORS);
10268             }
10269
10270        //     free_dominance_info(CDI_DOMINATORS);
10271     //        calculate_dominance_info(CDI_DOMINATORS);
10272
10273             tree vectype_tmp = TREE_TYPE (mask_tmp1);
10274             tree zero_vector_tmp = build_zero_cst (vectype_tmp);
10275
10276             tree combined_mask_tmp = create_tmp_var(TREE_TYPE(zero_vector_tmp), "combined_mask_ifconti");
10277
10278             tree combined_mask_tmp2 = create_tmp_var(TREE_TYPE(zero_vector_tmp), "combined_mask_ifconti2");
10279
10280             gimple *combine_stmt1_tmp = gimple_build_assign(combined_mask_tmp, BIT_IOR_EXPR, target_mask1, target_mask2);
10281
10282             gimple *combine_stmt1_tmp2 = gimple_build_assign(combined_mask_tmp2, BIT_IOR_EXPR, target_mask3, target_mask4);
10283
10284             gimple_stmt_iterator gsi_tmp = gsi_for_stmt (target_stmt2);
10285             gsi_next(&gsi_tmp);
10286             gsi_insert_after (&gsi_tmp, combine_stmt1_tmp, GSI_SAME_STMT);
10287
10288             gimple_stmt_iterator gsi_tmp_next_if = gsi_last_bb (store_bb_tmp);
10289          //   gsi_prev(&gsi_tmp_next_if);
10290             gsi_insert_before (&gsi_tmp_next_if, combine_stmt1_tmp2, GSI_SAME_STMT);
10291
10292             gimple *gcond_tmp = gimple_build_cond(EQ_EXPR, combined_mask_tmp, zero_vector_tmp, NULL, NULL);
10293             gsi_next(&gsi_tmp);
10294             gsi_insert_after(&gsi_tmp, gcond_tmp, GSI_NEW_STMT);
10295
10296             gimple *gcond_tmp_next = gimple_build_cond(EQ_EXPR, combined_mask_tmp2, zero_vector_tmp, NULL, NULL);
10297          //   gsi_next(&gsi_tmp_next_if);
10298             gsi_insert_before(&gsi_tmp_next_if, gcond_tmp_next, GSI_NEW_STMT);
10299
10300         //    calculate_dominance_info(CDI_DOMINATORS);
10301
10302             gimple_stmt_iterator gsi_from_tmp;
10303             gimple *stmt1 = NULL;
10304
10305     /* Move vec_cond second var def to STORE_BB.  */
10306      /*       gimple *last_store = target_stmt4_next;
10307             gimple_stmt_iterator gsi_tmp4 = gsi_for_stmt (target_stmt4_next);
10308             gsi_from_tmp = gsi_tmp4;
10309     /* Shift GSI to the previous stmt for further traversal.  */
10310     /*        gsi_prev (&gsi_tmp4);
10311             gimple_stmt_iterator gsi_to_tmp = gsi_start_bb (store_bb_tmp);
10312             gsi_move_before (&gsi_from_tmp, &gsi_to_tmp);
10313     /* Setup GSI_TO to the non-empty block start.  */
10314     /*        gsi_to_tmp = gsi_start_bb (store_bb_tmp);
10315             if (dump_enabled_p ())
10316               dump_printf_loc (MSG_NOTE, vect_location,
10317            "Move if-continue stmt to created bb\n%G", last);
10318     /* Move all stored value producers if possible.  */
10319      /*       while (!gsi_end_p (gsi_tmp4)) {
10320
10321                tree lhs;
10322                imm_use_iterator imm_iter;
10323                use_operand_p use_p;
10324                bool res;
10325
10326               /* Skip debug statements.  */
10327        /*        if (is_gimple_debug (gsi_stmt (gsi_tmp4)))
10328                 {
10329                   gsi_prev (&gsi_tmp4);
10330                   continue;
10331                 }
10332                stmt1 = gsi_stmt (gsi_tmp4);
10333         /* Do not consider statements writing to memory or having
10334      volatile operand.  */
10335         /*       if (gimple_vdef (stmt1) || gimple_has_volatile_ops (stmt1))
10336                  break;
10337                gsi_from_tmp = gsi_tmp4;
10338                gsi_prev (&gsi_tmp4);
10339                lhs = gimple_get_lhs (stmt1);
10340                if (!lhs)
10341                  break;
10342
10343         /* LHS of vectorized stmt must be SSA_NAME.  */
10344         /*       if (TREE_CODE (lhs) != SSA_NAME)
10345                  break;
10346
10347                if (!VECTOR_TYPE_P (TREE_TYPE (lhs)))
10348                  {
10349                /* Remove dead scalar statement.  */
10350         /*           if (has_zero_uses (lhs))
10351                      {
10352                        gsi_remove (&gsi_from_tmp, true);
10353                        continue;
10354                      }
10355                  }
10356
10357                 gsi_move_before (&gsi_from_tmp, &gsi_to_tmp);
10358                /* Shift GSI_TO for further insertion.  */
10359           /*     gsi_prev (&gsi_to_tmp);
10360              }
10361         }
10362       }*/

当vf 是4的时候,进行mask的合并,以及将合并后的mask加入到数学函数里面

mask合并代码

10410       vec<constructor_elt, va_gc> *ret_ctor_elts_tmp = NULL;
10411       vec_alloc (ret_ctor_elts_tmp, 2);
10412       CONSTRUCTOR_APPEND_ELT(ret_ctor_elts_tmp, NULL_TREE, mask2); // 添加第二个左子树
10413       CONSTRUCTOR_APPEND_ELT(ret_ctor_elts_tmp, NULL_TREE, mask); // 添加第一个左子树
10414
10415     //  tree signed_boolean_type = build_nonstandard_integer_type(64, 1);
10416       tree signed_boolean_type = build_nonstandard_boolean_type(64);
10417
10418       tree vect_type = build_vector_type(signed_boolean_type, 4);
10419       tree constructor = build_constructor(vect_type, ret_ctor_elts_tmp);
10420
10421       tree new_var_constru = create_tmp_var(vect_type, "mask_array");
10422       gimple *new_stmt_construc = gimple_build_assign(make_ssa_name(new_var_constru), constructor);
10423       gsi_next(&gsi);
10424       gsi_insert_after (&gsi, new_stmt_construc, GSI_SAME_STMT);

将合并后的mask加入到数学函数里面

195                       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mask_operand)
196                       {
197                         gimple *use_stmt;
198                         use_stmt = USE_STMT (use_p);
199                         if(is_gimple_assign(use_stmt)) {
200                           tree rhs1_tmp1 = gimple_assign_rhs1(use_stmt);
201                           if (TREE_CODE(rhs1_tmp1) == CONSTRUCTOR) {
202                             tree lhs_tmp1 = gimple_assign_lhs(use_stmt);
203                             if(stmt_vecmath)
204                             add_mask_to_call(stmt_vecmath,lhs_tmp1);
205                           }
206                         }
207                       }

oneapi的cfg图

在移动的过程中如果store bb的中的LHS在 除了store bb外的其他bb中被使用,则需要重新计算

10490         /* Check that LHS does not have uses outside of STORE_BB.  */
10491         res = true;
10492     //    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
10493         gimple *use_lhs;
10494         FOR_EACH_IMM_USE_STMT (use_lhs, imm_iter, lhs)
10495     {
10496         gimple *use_stmt;
10497         FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) {
10498
10499       //  gimple *use_stmt;
10500         use_stmt = USE_STMT (use_p);
10501         if (is_gimple_debug (use_stmt))
10502         continue;
10503       if (gimple_bb (use_stmt) != store_bb && gimple_bb (use_stmt) != gimple_bb (last))
10504     {
10505          // res = false;
10506
10507         if (dump_enabled_p ())
10508     dump_printf_loc (MSG_NOTE, vect_location,
10509          "LHS have use outside of store_BB\n%G", stmt1);
10510         tree new_lhs,new_lhs1,new_lhs2;
10511         tree new_lhs_phi;
10512         gphi *phi;
10513         tree vectype;
10514         tree zero;
10515         gimple *zero_def;
10516
10517         gimple *new_assign_stmt;
10518
10519         if (is_gimple_assign(stmt1) && is_gimple_assign(use_lhs)) {
10520         for (unsigned int i = 1; i < gimple_num_ops(use_stmt); i++) {
10521           tree rhs = gimple_op(use_stmt, i);
10522           if(TREE_CODE (rhs) == SSA_NAME && (rhs == lhs)) {
10523
10524            if (dump_enabled_p ())
10525               dump_printf_loc (MSG_NOTE, vect_location,
10526          "insert new stmt to use out of BB\n");
10527             new_lhs = create_tmp_var(TREE_TYPE(lhs), "new_tmp_var");
10528             new_lhs1 = make_ssa_name(new_lhs,NULL);
10529             tree rhs1 = gimple_assign_rhs1(stmt1);
10530             tree rhs2 = gimple_assign_rhs2(stmt1);
10531             new_assign_stmt = gimple_build_assign(new_lhs1, gimple_assign_rhs_code(stmt1), rhs1, rhs2);
10532
10533             gimple_stmt_iterator gsi_temp = gsi_for_stmt(use_stmt);
10534             gsi_insert_before (&gsi_temp,new_assign_stmt,GSI_SAME_STMT);
10535             update_stmt(new_assign_stmt);
10536
10537             if( i == 1) {
10538
10539               gimple_assign_set_rhs1(use_stmt, new_lhs1);
10540            //   update_stmt(use_stmt);
10541             }
10542             else if (i == 2) {
10543               gimple_assign_set_rhs2(use_stmt, new_lhs1);
10544             //  update_stmt(use_stmt);
10545             }
10546
10547           //  update_stmt(use_stmt);
10548           }
10549         }
10550       }
10551     }
10552         }
10553
10554             update_stmt(use_stmt);
10555     } */

消除同一个reduction 在loop 中使用多次

        # temp_value.920_2824 = PHI <tmp_var.921_2823(234), 0.0(279)>
48420   # temp_value.923_2821 = PHI <tmp_var.924_2820(234), 0.0(279)>
48421   # temp_value.926_2814 = PHI <tmp_var.927_2813(234), 0.0(279)>
48422   # temp_value.929_2807 = PHI <tmp_var.930_2806(234), 0.0(279)>
48423   # temp_value.932_2800 = PHI <tmp_var.933_2798(234), 0.0(279)>_ifc__2843 = _3089 ? _2132 : 0.0;
48574   tmp_var.927_2813 = _ifc__2843 + temp_value.926_2814;
48575   _ifc__2842 = _3084 ? _2145 : 0.0;
48576   tmp_var.930_2806 = _ifc__2842 + temp_value.929_2807;
48577   _ifc__2841 = _3192 ? _2085 : 0.0;
48578   tmp_var.921_2823 = _ifc__2841 + temp_value.920_2824;
48579   _ifc__2840 = _3172 ? _2101 : 0.0;
48580   tmp_var.933_2798 = _ifc__2840 + temp_value.932_2800;
48581   _ifc__2839 = _3161 ? _2113 : 0.0;
48582   tmp_var.924_2820 = _ifc__2839 + temp_value.923_2821;# tmp_sumi.922_2822 = PHI <tmp_var.921_2823(83), 0.0(81), 0.0(276)>
48880   # tmp_sumi.925_2816 = PHI <tmp_var.924_2820(83), 0.0(81), 0.0(276)>
48881   # tmp_sumi.928_2809 = PHI <tmp_var.927_2813(83), 0.0(81), 0.0(276)>
48882   # tmp_sumi.931_2805 = PHI <tmp_var.930_2806(83), 0.0(81), 0.0(276)>
48883   # tmp_sumi.934_2793 = PHI <tmp_var.933_2798(83), 0.0(81), 0.0(276)>_2752 = tmp_sumi.922_2822 + tmp_sumi.925_2816;
48885   _2750 = _2752 + tmp_sumi.928_2809;
48886   _2747 = _2750 + tmp_sumi.931_2805;
48887   _2746 = _2747 + tmp_sumi.934_2793;_2156 = ri1i_2025 + _2746;
48931   _2163 = _2160 * _2746;

1761           for (k = 0; k < lpears[i] + upears[i]; k++) {
1762
1763             if (pearlist[i] == NULL) {
1764                fprintf(nabout,
1765                        "NULL pair list entry in egb loop 1, taskid = %d\n",
1766                        mytaskid);
1767                fflush(nabout);
1768             }
1769             j = pearlist[i][k];
1770
1771             xij = xi - x[dim * j];
1772             yij = yi - x[dim * j + 1];
1773             zij = zi - x[dim * j + 2];
1774             r2 = xij * xij + yij * yij + zij * zij;
1775
1776             if (dim == 4) {                     // delete
1777                wij = wi - x[dim * j + 3];
1778                r2 += wij * wij;
1779             }
1780
1781             if (r2 > rgbmaxpsmax2)      //  %hir.cmp.4310 ule
1782                continue;
1783             dij1i = 1.0 / sqrt(r2);
1784             dij = r2 * dij1i;
1785             sj = fs[j] * (rborn[j] - BOFFSET);   //   select fast
1786             sj2 = sj * sj;
1787
1788             /*
1789              * ---following are from the Appendix of Schaefer and Froemmel,
1790              * JMB 216:1045-1066, 1990;  Taylor series expansion for d>>s
1791              * is by Andreas Svrcek-Seiler; smooth rgbmax idea is from
1792              * Andreas Svrcek-Seiler and Alexey Onufriev.
1793              */
1794
1795             if (dij > rgbmax + sj)      // rgbmax = 20;   %hir.cmp.4333 ule
1796                continue;
1797
1798             if ((dij > rgbmax - sj)) {      //    %hir.cmp.4349  ogt
1799                uij = 1. / (dij - sj);
1800                sumi -= 0.125 * dij1i * (1.0 + 2.0 * dij * uij +
1801                                         rgbmax2i * (r2 -
1802                                                     4.0 * rgbmax *
1803                                                     dij - sj2) +
1804                                         2.0 * log((dij - sj) * rgbmax1i));
1805
1806             } else if (dij > 4.0 * sj) {
1807                dij2i = dij1i * dij1i;
1808                tmpsd = sj2 * dij2i;
1809                dumbo =
1810                    TA + tmpsd * (TB +
1811                                  tmpsd * (TC +
1812                                           tmpsd * (TD + tmpsd * TDD)));
1813                sumi -= sj * tmpsd * dij2i * dumbo;
1814
1815             } else if (dij > ri + sj) {
1816                sumi -= 0.5 * (sj / (r2 - sj2) +
1817                               0.5 * dij1i * log((dij - sj) / (dij + sj)));
1818
1819             } else if (dij > fabs(ri - sj)) {
1820                theta = 0.5 * ri1i * dij1i * (r2 + ri * ri - sj2);
1821                uij = 1. / (dij + sj);
1822                sumi -= 0.25 * (ri1i * (2. - theta) - uij +
1823                                dij1i * log(ri * uij));
1824
1825             } else if (ri < sj) {
1826                sumi -= 0.5 * (sj / (r2 - sj2) + 2. * ri1i +
1827                               0.5 * dij1i * log((sj - dij) / (sj + dij)));
1828
1829             }
1830
1831          }

1:if fprintf 分析不出内存关系,无法ifcvt。(lim pass 其无法外提也是因为fprintf中内存关系无法分析)

解决:将其外提到最内层循环外面。

2 : dim常量传播  (ipa-cp pass)

mme  → mme34 → egb

dim 作为全局变量无法常量传播,作为函数参数的时候可以传播到。

解决:新建一个pass,识别全局变量(当其没有作为函数传参时)和函数调用关系,在函数调用的地方将变量替换为常量值。(pass 的位置?是否有参数能解决)

根据inline pass debug的信息,发现mme34无法inline进mme 原因是--param early-inlining-insns= 值过小,将此值调大,可以成功inline。

inline 过后

 ;;   basic block 2, loop depth 0, count 27580514 (estimated locally), maybe hot74798 ;;    prev block 0, next block 3, flags: (NEW, REACHABLE, VISITED)74799 ;;    pred:       ENTRY [always]  count:27580514 (estimated locally) (FALLTHRU,EXECUTABLE)74800   # .MEM_2325 = VDEF <.MEM_2324(D)>74801   dim.lto_priv.0D.4751 = 3;74802   # VUSE <.MEM_2325>basic block 96, loop depth 2, count 954868629 (estimated locally), maybe hot77095 ;;    prev block 95, next block 97, flags: (NEW, REACHABLE, VISITED)77096 ;;    pred:       94 [82.6% (guessed)]  count:788435027 (estimated locally) (FALSE_VALUE,EXECUTABLE)77097 ;;                95 [always]  count:166433602 (estimated locally) (FALLTHRU,EXECUTABLE)_698 = dim.lto_priv.0D.4751;77112     _699 = j_697 * _698;if (_698 == 4)77146      goto <bb 97>; [34.00%]77147       else77148       goto <bb 98>; [66.00%]

怀疑是mme34函数中其他部分的代码,影响了其做常量传播的分析,注释掉mme34函数中的部分代码,发现其能够做到常量将dim =3 作为常量。

 _77 = j_76 * 3;

但是需要同时注释掉的内容较多,无法准确找到哪部分代码影响了传播,以及这部分代码的特性。

写了一个例子发现其静态全局变量可以成功作为常量计算,怀疑是mme34函数中的其他部分,影响到dim的常量传播。

 1     #include<stdio.h>2     #include<math.h>3     #include<stdlib.h>456     static int threshold = 5;78     static inline int check_value1(int x) {9       if(threshold < 20)10       return x*threshold;11       else return threshold;12     }1314     static inline int check_value2(int x) {15       if(threshold < 5)16       return x+threshold;17       else return threshold;18     }19     static inline int check_value3(int x) {20       threshold = 10;21       return check_value1(x);22     }23     static inline int check_value4(int x) {24       threshold = 50;25       return check_value2(x);26     }2728     int use_threshold(int threshold) {2930       return 10 + threshold;31     }32     int main()33     {34       int num = 30;35       int num2 = 5;36       int ans3 = use_threshold(threshold);37       int ans1 = check_value3(num);38       int ans2 = check_value4(num2);39       int ans = ans1 + ans2 +ans3;40       printf("ans is %d\n",ans);41       return 0;42     }

查看ccp pass 中的debug的信息

39040 Visiting statement:39041 # VUSE <.MEM_2279>39042 _698 = dim.lto_priv.0D.4751;39043 which is likely CONSTANT39044 Lattice value changed to VARYING.  Adding SSA edges to worklist.

在这里进行gdb 调试,

69046 Substituting values and folding statements69048 Folding statement: dim = 3;69049 Not folded

1761          for (k = 0; k < lpears[i] + upears[i]; k++) {
1762
1763             if (pearlist[i] == NULL) {
1764                fprintf(nabout,
1765                        "NULL pair list entry in egb loop 1, taskid = %d\n",
1766                        mytaskid);
1767                fflush(nabout);abort();
1768             }
1769             j = pearlist[i][k];
1770

在ifcvt pass k看if 并没有被外提,无法ifcvt

插入abort需要识别的patern

14044   <bb 148> [local count: 919275880]:
14045   _2044 = _127 + _2039;
14046   _2045 = *_2044;
14047   if (_2045 == 0B)
14048     goto <bb 149>; [17.43%]
14049   else
14050     goto <bb 150>; [82.57%]
14051
14052   <bb 149> [local count: 160229786]:
14053   _2046 = 0;
14054   _2047 = nabout;
14055   fprintf (_2047, "NULL pair list entry in egb loop 1, taskid = %d\n", _2046);
14056   _2048 = nabout;
14057   fflush (_2048);
14058
14059   <bb 150> [local count: 919275880]:
14060   _2049 = *_2044;
14061   _2051 = (long unsigned int) k_2050;
14062   _2052 = _2051 * 4;
14063   _2053 = _2049 + _2052;
14064   j_2054 = *_2053;

Eff.c:3282

build_base_HygonGCC_Spec2017_rate_perf-test.cfg-64.0000

build_base_HygonGCC_Spec2017_rate_perf.cfg-64.0001

加上一个参数使mme34内联进mme中,但是dim = 3的常量传播无法做到。写了一个静态全局变量的例子,发现其能够传播到,怀疑是函数中的其他代码影响了对常量的分析无法传播到,通过注释原题中的代码

加上if -continue 107

不加 106

Base 99.6

相关文章:

544 eff.c:1761处loop vect 分析

2.6 带有mask的向量数学函数 gcc 支持的svml向量数学函数 32652 GCC currently emits calls to code{vmldExp2}, 32653 code{vmldLn2}, code{vmldLog102}, code{vmldPow2}, 32654 code{vmldTanh2}, code{vmldTan2}, code{vmldAtan2}, code{vmldAtanh2}, 32655 code{vmldCbrt2}…...

搜狗拼音输入法纯净优化版:去广告,更流畅输入体验15.2.0.1758

前言 搜狗输入法电脑版无疑是装机必备的神器。它打字精准&#xff0c;词库丰富全面&#xff0c;功能强大&#xff0c;极大地提升了输入效率。最新版的搜狗拼音输入法更是借助AI技术&#xff0c;让打字变得既准确又高效。而搜狗输入法的去广告精简优化版&#xff0c;通过移除广…...

YOLOv11改进 | YOLOv11引入MobileNetV4

前言&#xff1a; 主要是对该文章YOLOv11改进 | YOLOv11引入MobileNetV4进行复现&#xff0c;以及对一些问题进行解答 1、mobilenetv4核心代码 from typing import Optional import torch import torch.nn as nn import torch.nn.functional as F__all__ [MobileNetV4ConvLa…...

Java中的ArrayList方法

1. 创建 ArrayList 实例 你可以通过多种方式创建 ArrayList 实例&#xff1a; <JAVA> ArrayList<String> list new ArrayList<>(); // 创建一个空的 ArrayList ArrayList<String> list new ArrayList<>(10); // 创建容量为 10 的 ArrayList …...

wordpress 利用 All-in-One WP Migration全站转移

导出导入站点 在插件中查询 All-in-One WP Migration备份并导出全站数据 导入 注意事项&#xff1a; 1.导入部分限制50MB 宝塔解决方案&#xff0c;其他类似&#xff0c;修改php.ini配置文件即可 2. 全站转移需要修改域名 3. 大文件版本&#xff0c;大于1G的可以参考我的…...

零基础教程:Windows电脑安装Linux系统(双系统/虚拟机)全攻略

一、安装方式选择 方案对比表 特性双系统安装虚拟机安装性能原生硬件性能依赖宿主机资源分配磁盘空间需要独立分区&#xff08;建议50GB&#xff09;动态分配&#xff08;默认20GB起&#xff09;内存占用独占全部内存需手动分配&#xff08;建议4GB&#xff09;启动方式开机选…...

聚焦AI与大模型创新,紫光云如何引领云计算行业快速演进?

【全球云观察 &#xff5c; 科技热点关注】 随着近年来AI与大模型的兴起&#xff0c;云计算行业正在发生着一场大变局。 “在2025年春节期间&#xff0c;DeepSeek两周火爆全球&#xff0c;如何进行私域部署成了企业关心的问题。”紫光云公司总裁王燕平强调指出&#xff0c;AI与…...

mapreduce 过程中,maptask的partitioner是在map阶段中具体什么阶段分区的?

在MapReduce的Map阶段中&#xff0c;Partitioner&#xff08;分区器&#xff09;的作用发生在map函数输出键值对之后&#xff0c;但在数据被写入磁盘&#xff08;spill到本地文件&#xff09;之前。具体流程如下&#xff1a; 分区发生的具体阶段&#xff1a; Map函数处理完成 当…...

找到字符串中所以字母异位词 --- 滑动窗口

目录 一&#xff1a;题目 二&#xff1a;算法原理 三&#xff1a;代码实现 一&#xff1a;题目 题目链接&#xff1a;438. 找到字符串中所有字母异位词 - 力扣&#xff08;LeetCode&#xff09; 二&#xff1a;算法原理 三&#xff1a;代码实现 版本一&#xff1a;无co…...

密码破解工具

1. 引言 密码是信息安全的核心之一,而攻击者往往利用各种工具和技术来破解密码。密码破解工具可以分为 离线破解(Offline Cracking) 和 在线破解(Online Cracking) 两大类: 离线破解:攻击者已经获取了加密的密码哈希(hash),可以在本地进行破解,无需与目标系统交互。…...

路由策略在双点双向路由重发布的应用

一、背景叙述 路由重发布通常是解决两个不同路由协议之间的互通问题&#xff0c;也就是路由双向引入。有时候&#xff0c;单点路由重发布在大规模网络中压力较大&#xff0c;缺乏冗余性&#xff0c;于是就有了双点双向路由重发布 问题&#xff1a;但是双点双向路由重发布也会…...

在Python软件中集成智能体:以百度文心一言和阿里通义千问为例

摘要 本文旨在探讨如何在Python软件中集成智能体&#xff0c;具体以百度文心一言和阿里通义千问等大模型生成的智能体为例。文章详细介绍了集成这些智能体的方法&#xff0c;包括环境准备、API调用、代码实现等步骤&#xff0c;并提供了相关的示例代码。通过集成这些智能体&…...

day22 学习笔记

文章目录 前言一、遍历1.行遍历2.列遍历3.直接遍历 二、排序三、去重四、分组 前言 通过今天的学习&#xff0c;我掌握了对Pandas的数据类型进行基本操作&#xff0c;包括遍历&#xff0c;去重&#xff0c;排序&#xff0c;分组 一、遍历 1.行遍历 intertuples方法用于遍历D…...

谈Linux之磁盘管理——万字详解

—— 小 峰 编 程 目录 一、硬盘的基本知识 1.了解硬盘的接口类型 2. 硬盘命名方式 3. 磁盘设备的命名 4. HP服务器硬盘 5. 硬盘的分区方式 二、 基本分区管理 1. 磁盘划分思路 2. 分区 2.1 MBR分区 2.2GPT分区 3.格式化—命令&#xff1a;mkfs 4.挂载 4.1手动挂…...

做好一个测试开发工程师第二阶段:java入门:idea新建一个project后默认生成的.idea/src/out文件文件夹代表什么意思?

时间&#xff1a;2025.4.8 一、前言 关于Java与idea工具安装不再展开&#xff0c;网上很多教程&#xff0c;可以自己去看 二、project建立后默认各文件夹代表意思 1、首先new---->project后会得到文件如图 其中&#xff1a; .idea文件代表&#xff1a;存储这个项目的历史…...

伪代码的定义与应用场景

李升伟 整理 伪代码&#xff08;Pseudocode&#xff09;是一种用近似自然语言&#xff08;通常是英语或开发者熟悉的语言&#xff09;和简单语法描述的算法逻辑工具。它介于自然语言和编程语言之间&#xff0c;不依赖具体语法规则&#xff0c;专注于表达思路&#xff0c;是编程…...

/sys/fs/cgroup/memory/memory.stat 关键指标说明

目录 1. **total_rss**2. **total_inactive_file**3. **total_active_file**4. **shmem**5. **其他相关指标**总结 以下是/sys/fs/cgroup/memory/memory.stat文件中一些关键指标的详细介绍&#xff0c;特别是与PostgreSQL相关的指标&#xff1a; 1. total_rss 定义&#xff1…...

机器学习中的聚类分析算法:原理与应用

一、什么是聚类分析&#xff1f; 聚类分析(Clustering Analysis)是机器学习中一种重要的无监督学习技术&#xff0c;它的目标是将数据集中的样本划分为若干个组(称为"簇")&#xff0c;使得同一簇内的样本彼此相似&#xff0c;而不同簇的样本差异较大。与分类不同&am…...

VUE中的路由处理

1.引入,预处理main.ts import {} from vue-router import { createRouter, createWebHistory } from vue-router import HomePages from @/pages/HomePages.vue import AboutPage from @/pages/AboutPage.vue import NewsPage from @/pages/NewsPage.vue //1. 配置路由规…...

MATLAB学习笔记(二) 控制工程会用到的

MATLAB中 控制工程会用到的 基础传递函数表达传递函数 零极点式 状态空间表达式 相互转化画响应图线根轨迹Nyquist图和bode图现控部分求约旦判能控能观极点配置和状态观测 基础 传递函数表达 % 拉普拉斯变换 syms t s a f exp(a*t) %e的a次方 l laplace(f) …...

Python: 实现数据可视化分析系统

后端基于Python 开源的 Web 框架 Flask&#xff0c;前端页面采用 LayUI 框架以及 Echarts 图表&#xff0c;数据库为sqlite。系统的功能模块分为数据采集和存储模块、数据处理和分析模块、可视化展示模块和系统管理模块。情感分析方面使用LDA等主题建模技术&#xff0c;结合领域…...

VectorBT量化入门系列:第一章 VectorBT基础与环境搭建

VectorBT量化入门系列&#xff1a;第一章 VectorBT基础与环境搭建 本教程专为中高级开发者设计&#xff0c;系统讲解VectorBT技术在量化交易中的应用。通过结合Tushare数据源和TA-Lib技术指标&#xff0c;深度探索策略开发、回测优化与风险评估的核心方法。从数据获取到策略部署…...

典型反模式深度解析及重构方案

反模式 1&#xff1a;魔法数字/字符串&#xff08;Magic Numbers/Strings&#xff09; ▐ 问题场景 // 订单状态校验 if (order.getStatus() 3) { // 3代表已发货&#xff1f;sendNotification(); }// 折扣计算 double discount price * 0.15; // 0.15是什么&#xff1f;…...

神经探针与价值蓝海:AI重构需求挖掘的认知拓扑学

当产品经理的决策边界遭遇量子态的用户需求&#xff0c;传统需求分析工具已显露出经典物理般的局限性。Gartner 2024报告揭示&#xff1a;全球Top 500企业中有83%遭遇需求洞察的"测不准困境"——用户声称的需求与行为数据偏差率达47%&#xff0c;而未被表达的潜在需求…...

Tomcat 负载均衡

目录 二、Tomcat Web Server 2.1 Tomcat 部署 2.1.1 Tomcat 介绍 2.1.2 Tomcat 安装 2.2 Tomcat 服务管理 2.2.1 Tomcat 启停 2.2.2 目录说明 2.2.3编辑主页 2.3 Tomcat管理控制台 2.3.1开启远程管理 2.3.2 配置远程管理密码 三、负载均衡 3.1 重新编译Nginx 3.1.1 确…...

CSS >子元素选择器和空格

在 CSS 中&#xff0c;> 符号是 子元素选择器&#xff08;Child Combinator&#xff09;&#xff0c;它用于选择某个元素的直接子元素&#xff08;仅限第一层嵌套的子元素&#xff0c;不包含更深层的后代元素&#xff09;。 语法 父元素 > 子元素 {样式规则; } 示例 …...

duckdb源码阅读学习路径图

🧭 DuckDB 最小内存源码阅读路径图 1️⃣ 数据流入口与批处理:DataChunk 项目内容✅ 目标理解 DuckDB 向量化执行的数据载体结构,如何影响内存📁 路径src/common/types/data_chunk.cpp/hpp🔍 入口函数DataChunk::Initialize, DataChunk::SetCardinality, Reset📌 优化…...

C#二叉树

C#二叉树 二叉树是一种常见的数据结构&#xff0c;它是由节点组成的一种树形结构&#xff0c;其中每个节点最多有两个子节点。二叉树的一个节点通常包含三部分&#xff1a;存储数据的变量、指向左子节点的指针和指向右子节点的指针。二叉树可以用于多种算法和操作&#xff0c;…...

BT-Basic函数之首字母W

BT-Basic函数之首字母W 文章目录 BT-Basic函数之首字母Wwaitwait for start wait wait函数使程序在执行下一个功能之前暂停指定的秒数。 语法 wait <数值表达式>参数 <数值表达式> 等待时长&#xff0c;以秒为单位。该值必须大于或等于0。小于25毫秒的正值会被…...

如何避免论文内容被误认为是 AI 生成的?

AIGC 检测的原理 AIGC 检测主要基于自然语言处理&#xff08;NLP&#xff09;和机器学习技术&#xff0c;通过深度分析文本内容来识别其中的 AI 生成痕迹。具体原理如下&#xff1a; 基础学习算法&#xff1a;利用机器学习算法对文本信息进行特征提取和表示&#xff0c;以便计…...

node.js之path常用方法

node.js之path常用方法 1.path.join([…paths]) 用于将多个路径片段拼接成一个路径&#xff0c;会自动处理路径分隔符&#xff0c;避免手动拼接时可能出现的问题 const joinedPath path.join(folder1, folder2, file.txt); console.log(joinedPath); // 输出: folder1/fol…...

【面试】C++与C override的报错阶段 RAII

文章目录 C 相对于 C 语言的主要区别**1. 面向对象编程&#xff08;OOP&#xff09;****2. 函数增强****3. 内存管理****4. 引用&#xff08;Reference&#xff09;****5. 标准模板库&#xff08;STL&#xff09;****6. 异常处理****7. 类型安全增强****8. 其他特性****9. 兼容…...

LeetCode 3396.使数组元素互不相同所需的最少操作次数:O(n)一次倒序遍历

【LetMeFly】3396.使数组元素互不相同所需的最少操作次数&#xff1a;O(n)一次倒序遍历 力扣题目链接&#xff1a;https://leetcode.cn/problems/minimum-number-of-operations-to-make-elements-in-array-distinct/ 给你一个整数数组 nums&#xff0c;你需要确保数组中的元素…...

机器学习课堂7用scikit-learn库训练SVM模型

1.用scikit-learn库训练SVM模型 代码 # 2-11用scikit-learn库训练SVM模型 import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn import svm # 导入sklearn# 参数设置 m_train 250 # 训练样本数量 svm_C 100 # SVM的C值 svm_kernel …...

模拟考试系统(ssm+vue+mysql5.x)

模拟考试系统(ssmvuemysql5.x) 模拟考试系统是一个为考试准备和管理提供全面支持的平台。系统提供了丰富的功能模块&#xff0c;包括个人中心、科目管理、复习资料管理、参考文献管理、用户管理、留言板管理、试题管理、试卷管理、系统管理和考试管理。用户可以在个人中心修改…...

【计网】作业4

一. 单选题&#xff08;共22题&#xff0c;64分&#xff09; 1. (单选题)主机甲采用停止-等待协议向主机乙发送数据&#xff0c;数据传输速率是4kb/s&#xff0c;单向传播时延为30ms&#xff0c;忽略确认帧的发送时延。当信道利用率等于80%时&#xff0c;数据帧的长度为&#…...

MYSQL数据库语法补充

一&#xff0c;DQL基础查询 DQL&#xff08;Data Query Language&#xff09;数据查询语言&#xff0c;可以单表查询&#xff0c;也可以多表查询 语法&#xff1a; select 查询结果 from 表名 where 条件&#xff1b; 特点&#xff1a; 查询结果可以是&#xff1a;表中的字段…...

Java基础编程练习第38题-除法器

题目&#xff1a;编写一个除法器&#xff0c;输入被除数和除数&#xff0c;并将结果输出。 这道题看似很简单&#xff0c;实则也不难。 就是假如用户输入的类型不同怎么办呢&#xff1f;用户输入int或者double类型应该怎么解决。这里我们就需要用到函数的重载。 代码如下&am…...

【基于Vue3组合式API的互斥输入模式实现与实践分享】

基于Vue3组合式API的互斥输入模式实现与实践分享 目录 背景与痛点设计思路技术实现使用场景与案例遇到的问题与解决方案最佳实践总结 1. 背景与痛点 在表单交互设计中&#xff0c;我们经常面临这样的场景&#xff1a;多种输入方式互斥。例如&#xff0c;在评分系统中&#…...

Linux进程概念及理解

目录 冯诺依曼体系结构 操作系统(Operator System) 概念 设计OS的目的 定位 如何理解 "管理" 总结 系统调用和库函数概念 进程 基本概念 描述进程-PCB task_struct-PCB的一种 task_ struct内容分类 组织进程 查看进程 通过系统调用获取进程标示符 通过系统调用创建进…...

苹果签名是否安全

苹果开发者与运营商都对苹果签名有一定了解&#xff0c;那么苹果签名安全吗&#xff1f;下面我来跟大家聊一聊。 苹果签名能验证应用的来源&#xff0c;但存在一些风险&#xff0c;有开发者伪造签名&#xff0c;让用户认为此产品是可信的&#xff0c;这样就安装到了恶意应用&am…...

STM32在裸机(无RTOS)环境下,需要手动实现队列机制来替代FreeRTOS的CAN发送接收函数

xQueueSendToBackFromISR(ecuCanRxQueue, hcan->pRxMsg, &xHigherPriorityTaskWoken)&#xff0c;xQueueReceive(mscCanRxQueue,&mscRxMsg,0)和xQueueSendToBack(mscCanTxQueue, &TxMessageTemp, 0 )这3个函数&#xff0c;在裸机下实现&#xff1a; 在裸机&…...

无法看到新安装的 JDK 17

在 Linux 系统中使用 update-alternatives --config java 无法看到新安装的 JDK 17&#xff0c;可能是由于 JDK 未正确注册到系统备选列表中。 一、原因分析 JDK 未注册到 update-alternatives update-alternatives 工具需要手动注册 JDK 路径后才能识别新版本。如果仅安装 JDK…...

JavaEE——线程的状态

目录 前言1. NEW2. TERMINATED3. RUNNABLE4. 三种阻塞状态总结 前言 本篇文章来讲解线程的几种状态。在Java中&#xff0c;线程的状态是一个枚举类型&#xff0c;Thread.State。其中一共分为了六个状态。分别为&#xff1a;NEW,RUNNABLE,BLOCKED,WAITING,TIMED_WAITING, TERMI…...

数据结构与算法-数学-(同余,线性同余方程,中国剩余定理,卡特兰数,斯特林数)

同余方程&#xff1a; 1.1 线性同余方程 & 乘法逆元 线性同余方程是形如 ax≡b(mod m) 的方程&#xff0c;可转化为 axmyb 的线性不定方程&#xff0c;利用扩展欧几里得算法求解。当 b1 时&#xff0c;x 就是 a 在模 m 意义下的乘法逆元。 代码&#xff1a; #include &…...

RAG 系统中的偏差是什么?

检索增强生成 (RAG) 在减少模型幻觉和增强大型语言模型 (LLM)的领域特定知识库方面已获得广泛认可。通过外部数据源佐证大型语言模型生成的信息&#xff0c;有助于保持模型输出的新鲜度和真实性。然而&#xff0c;最近在 RAG系统中的发现&#xff0c;突显了基于 RAG 的大型语言…...

[创业之路-362]:用确定性的团队、组织、产品开发流程和方法,应对客户、市场、竞争和商业模式的不确定性。

在充满不确定性的商业环境中&#xff0c;通过确定性的团队、组织、产品开发流程和方法构建核心竞争力&#xff0c;是应对客户、市场、竞争和商业模式变化的核心策略。以下从团队韧性、组织敏捷、产品开发闭环三个维度&#xff0c;结合实战方法论&#xff0c;提供可落地的解决方…...

系统与网络安全------网络通信原理(1)

资料整理于网络资料、书本资料、AI&#xff0c;仅供个人学习参考。 文章目录 网络通信模型协议分层计算机网络发展计算机网络功能什么是协议为什么分层邮局实例 OSI模型OSI协议模型OSI七层模型OSI七层的功能简介 TCP/IP模型OSI模型与TCP/IP模型TCP/IP协议族的组成各层PDU设备与…...

ArkTS语言基础之函数

前言 臭宝们终于来到了ArkTS基础之函数&#xff0c;今天我们来学习一下ArkTS的函数的相关知识&#xff0c;上一节中也有一些函数的基础知识。 函数声明 函数声明引入一个函数&#xff0c;包含其名称、参数列表、返回类型和函数体,在下面的例子中&#xff0c;我们声明了一个名…...

synchronized锁升级的锁对象和Mark Word

在讨论synchronized锁升级和Mark Word时&#xff0c;提到的"对象"通常指的是锁对象&#xff0c;也就是被用作synchronized同步锁的那个Java对象。 1. 什么是锁对象&#xff1f; 锁对象是指被用于synchronized同步代码块或方法的对象实例。例如&#xff1a; // 这个…...