
    ^h2d                         d dl Z d dlmZ d dlZd dlZd dlZ	 d dlmZ n# e	$ r dZY nw xY wddl
mZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ  G d	 d
          Z G d d          ZdS )    N)Any)to_array_extended   )
TensorData)	ONNXModel)DEQUANT_OP_NAMEONNX_TYPE_TO_NP_TYPEQUANT_OP_NAMETENSOR_NAME_QUANT_SUFFIXfind_by_nameget_opset_versionmodel_has_infer_metadatanormalize_axispack_bytes_to_4bitquantize_dataquantize_nparray&save_and_reload_model_with_shape_infertensor_proto_to_array)TensorQuantOverridesHelperc                   J    e Zd Zdeeef         fdZd	dZd Zd Z	d Z
d ZdS )
QuantizationParamsdatac                 t   i | _         |                                D ]\  }}t          |t                    s#t	          dt          |           d|d          |dk    rPt          |t          t          t          j        t          f          s#t	          dt          |           d|d          |dk    r7t          |t                    s"| t	          dt          |           d          |dk    r9|j
        t          j        t          j        fvrt          d|j
         d|          || j         |<   d S )	NzKeys must be strings not z for k=.axisz1Values must be numpy arrays, int, float, str not z'Axis value must be an int or None, not scalez5scale must a float32 or float16 numpy element but is )r   items
isinstancestr	TypeErrortypeintnpndarrayfloatdtypefloat32float16
ValueError)selfr   kvs       g/var/www/html/auto_sub_bot/venv/lib/python3.11/site-packages/onnxruntime/quantization/base_quantizer.py__init__zQuantizationParams.__init__(   sB   	JJLL 		 		DAqa%% T RDGG R RA R R RSSSF{{:a#sBJ1N#O#O{ jTXYZT[T[ j jde j j jkkkF{{:a#5#5{!- T$q'' T T TUUUG||
BJ/G G G !nYZY`!n!nij!n!noooDIaLL		 		    Nc                 8    | j                             ||          S N)r   get)r*   keydefault_values      r-   r2   zQuantizationParams.get5   s    y}}S-000r/   c              #   $   K   | j         E d {V  d S r1   r   r*   s    r-   __iter__zQuantizationParams.__iter__8   s&      9r/   c                     | j         |         S r1   r6   )r*   r3   s     r-   __getitem__zQuantizationParams.__getitem__;   s    y~r/   c                     || j         |<   d S r1   r6   )r*   r3   values      r-   __setitem__zQuantizationParams.__setitem__>   s    	#r/   c                 *    t          | j                  S r1   )lenr   r7   s    r-   __len__zQuantizationParams.__len__A   s    49~~r/   r1   )__name__
__module____qualname__dictr   r   r.   r2   r8   r:   r=   r@    r/   r-   r   r   '   s        tCH~    1 1 1 1          r/   r   c                   |    e Zd Z	 ddZdej        j        defdZd Z	d Z
d Zd	 Zd
 ZddZddZ	 	 ddZd ZdS )BaseQuantizerNc                    t          |          st          |          }d |j        j        D             | _        | j                            d |j        j        D                        | j                            d |j        j        D                        t          |          | _	        t          |          | _        || _        || _        |
r|
ni | _        d| j        v o| j        d         | _        d | _        d| j        v o| j        d         | _        | j                            dd           | _        | j                            dd          | _        | j                            d	          | _        t-          |d
|          | _        t-          |d
|          | _        	 |Zt3          d |                                D                       r/t7          dd |                                D              d          || _        || _        || _        |	| _        tA          | j                            di                     | _!        d | j	        "                                D             | _#        | j!        $                    | j#        | j        %                                |          \  }}|stM          |          | j!        '                                | _(        d S )Nc                     i | ]
}|j         |S rE   name).0vis     r-   
<dictcomp>z*BaseQuantizer.__init__.<locals>.<dictcomp>U   s    IIIBBGRIIIr/   c                     i | ]
}|j         |S rE   rJ   )rL   ots     r-   rN   z*BaseQuantizer.__init__.<locals>.<dictcomp>V   s     J J J" J J Jr/   c                     i | ]
}|j         |S rE   rJ   )rL   its     r-   rN   z*BaseQuantizer.__init__.<locals>.<dictcomp>W   s     I I I" I I Ir/   EnableSubgraphForceQuantizeNoInputCheckWeightSymmetricActivationSymmetricFMinimumRealRangetensor_typec              3   B   K   | ]}t          |t                     V  d S r1   )r   r   )rL   ts     r-   	<genexpr>z)BaseQuantizer.__init__.<locals>.<genexpr>{   s/      ,k,kqAz1J1J-J,k,k,k,k,k,kr/   z(tensors_range contains unexpected types c                 ,    h | ]}t          |          S rE   )r!   )rL   r,   s     r-   	<setcomp>z)BaseQuantizer.__init__.<locals>.<setcomp>}   s    <e<e<eT!WW<e<e<er/   z, not TensorData.TensorQuantOverridesc                     i | ]
}|j         |S rE   rJ   )rL   initzers     r-   rN   z*BaseQuantizer.__init__.<locals>.<dictcomp>   s    [[[wW\7[[[r/   ))r   r   graph
value_infovalue_infosupdateoutputinputr   modelr   opset_versionper_channelreduce_rangeextra_optionsenable_subgraph_quantizationparentforce_quantize_no_input_checkr2   _is_weight_symmetricis_activation_symmetricmin_real_rangegetattractivation_qTypeweight_qTypeanyvaluesr    tensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantizer   tensor_quant_overridesinitializerinitializersis_validkeysr)   get_quant_typestensor_quant_override_qtypes)r*   rg   ri   rj   rt   rs   rw   rx   ry   rz   rk   overrides_validoverrides_errs                r-   r.   zBaseQuantizer.__init__F   s    (.. 	B:5AAEII%+2HIII J Ju{7I J J JKKK I Iu{7H I I IJJJu%%
.u55&(.;C]] 22[t7IJZ7[ 	) '4+==q$BTUpBq 	* 261C1G1GHY[_1`1`!'+'9'='=>SUZ'['[$"0445GHH '(8-IY Z Z#L-NN
	 $,k,kTaThThTjTj,k,k,k)k)k$y<e<emNbNbNdNd<e<e<eyyy   +!2 0$8! 'AASAWAWXnprAsAs&t&t#[[$*BXBXBZBZ[[[)-)D)M)Mt/44668H*
 *
&  	,]+++,0,G,W,W,Y,Y)))r/   weight_quant_typereturnc                     | j         | j         S |t          j        j        t          j        j        t          j        j        t          j        j        fv S r1   )ro   onnxTensorProtoINT4INT8INT16FLOAT8E4M3FN)r*   r   s     r-   is_weight_symmetricz!BaseQuantizer.is_weight_symmetric   sJ    $0,, !!")	%
 
 	
r/   c                     t           r1   )NotImplementedErrorr7   s    r-   quantize_modelzBaseQuantizer.quantize_model   s    !!r/   c                 X    t          || j                                                  }|d uS r1   )r   rg   r|   )r*   
input_namer|   s      r-   is_input_a_initializerz$BaseQuantizer.is_input_a_initializer   s*    ":tz/E/E/G/GHH$&&r/   c                     | j         S r1   )ri   r7   s    r-   is_per_channelzBaseQuantizer.is_per_channel   s    r/   c                     t          || j                                                  }|)|j        t          j        j        t          j        j        fv S | j        r| j	        dS | j	        
                    |          S )NF)r   rg   r|   	data_typer   r   FLOATFLOAT16rl   rm   is_valid_quantize_weight)r*   weight_nameweights      r-   r   z&BaseQuantizer.is_valid_quantize_weight   sq    k4:+A+A+C+CDD#(8(>@P@X'YYY1 	t{7J5{33K@@@r/   c                     | j         (t          | j                   dk    r|j        | j         vrdS |j        | j        vrdS |j        t
          t          fv rdS | j        |j        | j        v rdS dS )Nr   FT)rx   r?   rK   op_typerz   r   r
   ry   )r*   nodes     r-   should_quantize_nodez"BaseQuantizer.should_quantize_node   s    ".D*++q00	!7775<t8885<O];;;5 ,d>S1S1S5tr/         ?c                 n	   t          || j                                                  }t          |          }|t          z   }| j        t          j        j        k    r	t          j
        |          }|j        t          j        k    rt          j        j        }	n?|j        t          j        k    rt          j        j        }	nt!          d|j         d          |                    t          j                  }
t          j        dg|
j                  }|                    d          }t          j                            |
|          }| j                            |g           d}n||z  |z  }t          j
        |t          j                  t          j
        |t          j                  z  }
|
                                }
t          j        t          j        t          j                  j                  }t          j        t          j        t          j                  j                  }t          j        |
|k               st          j        |
|k              rt=          j        d| d           t          j         |
||                              t          j                  }
t          j
        |
t          j                                      |j!                  }t          j                            ||          }| j                            |g           t          j
        ||j                                      d          }d	}| j        }	|d
z   }t          j                            ||          }| j                            |g           | j        t          j        j        k    r| j        }nt          j        j"        }|dz   }| j        t          j        j        k    r*t          j#        $                    || j        dgdg          }n|j%        dk    rYt          j&        |j'        t          j                                      d          }t          j                            ||          }n#t          j#        $                    ||g dg          }| j                            |g           ||||||	fS )z]
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        zEOnly float16 or float32 are supported with float 8 but bias dtype is r   r   r&   CastzQuantized bias `z<` exceeds the range of a int32. The bias scale is too small.DequantizeLinear_scale_zero_point        r   )(r   rg   r|   r   r   rt   r   r   r   r#   asarrayr&   r(   r   r'   r   r    astypearrayreshapenumpy_helper
from_arrayinitializer_extendfloat64roundiinfoint32minmaxru   loggingwarningclipdimsINT32helpermake_tensorsizezerosshape)r*   	bias_nameinput_scaleweight_scalebetabias_initializer	bias_dataquantized_bias_namer   
node_qtypequantized_data
bias_scalebias_scale_datapacked_bias_initializer	node_type	int32_min	int32_maxbias_np_dataquantized_bias_scale_namepacked_bias_scale_initializerrX   quantized_bias_zp_namepacked_bias_zp_initializerbias_zp_datas                           r-   quantize_bias_static_implz'BaseQuantizer.quantize_bias_static_impl   s-    (	4:3I3I3K3KLL)*:;;	'*BB  0 ===:i((DzRZ''!-5

rz))!-3

 uhlhr u u uvvv![[44N1#^-ABBBJ(0044O&*&7&B&B>Sf&g&g#J))+B*CDDDII %|3d:J  Z	DDDrzR\dfdnGoGoGooN+1133N 
28BH#5#5#9::I
28BH#5#5#9::Ivny011 RVNY<V5W5W nynnn    W^Y	JJQQRTRZ[[N :nBHEEEMMN^NcddL&*&7&B&B<Qd&e&e#J))+B*CDDD !j9?KKKSSTVWWO*I*J %8($B!(,(9(D(D_Vo(p(p%
%%'D&EFFF  0 ===+KK*0K!4}!D 0 ===)-)@)@AWY]Yjmnloruqv)w)w&&_q  8J$4BHEEEMMbQQL)-):)E)ElTj)k)k&&)-)@)@AWYdfhkljm)n)n&
%%'A&BCCC  %"
 	
r/   Fc                 2   |j         t          z   }|j         dz   }|j         dz   }t          |          }| j                            |j         i           }	d|	v r|	d         j        }d|	v rd|	v rt          j        |	d         t          |                   }
t          j        |	d                   }t          ||
                                ||
          }t          |
t          j                  sJ dt          |
                       |
j        t          j        k    r|
j        t          j        k    sJ d	|
j                     t          |t          j                  sJ dt          |                       nQ|| j        k    r|                     |          n| j        }t)          |
                                ||	                    d
|          |	                    d| j        o|          | j        |	                    d          |	                    d                    \  }
}}t          |
t          j                  sJ dt          |
                       |
j        t          j        k    r|
j        t          j        k    sJ d	|
j                     t          |t          j                  sJ dt          |                       |j        }t2          j                            ||g |                    d                                                    }t2          j                            ||g |
                    d                                                    }| j                            ||g           |s| j        t2          j         j!        k    rFt3          j                     }| j        |_        |j"        #                    |j"                   ||_         |
                                $                                %                                |_&        tN          tO          |          }|j(        |j(        k    s*|%                                |%                                k    rrtS          d|j(         d|%                                dd          d|%                                dd          d|j(         dtU          |          dd          d          n|t2          j         j+        t2          j         j,        fv r|j        t          j-        t          j.        fvrtS          d| d          t_          ta          |%                                                    }t2          j                            |||j"        |d          }nkt          j1        |t2          j        2                    |                                        |j"                  }t2          j3        4                    ||          }| j                            |g           |||fS )a  
        :param weight: TensorProto initializer
        :param qType: type to quantize to
        :param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                                  If keep_float_weight is False, quantize the weight, or don't quantize the weight.
        :return: quantized weight name, zero point name, scale name
        r   r   default_val
quant_typer   
zero_pointr   Unexpected type Unexpected dtype 	symmetricrj   rminrmaxrj   rq   rmin_overridermax_override)r   NzThe initializer of shape z! could not be created, expecting 
   z, got z and shape=z
raw=   r   Quantized weights for . must be 8-bit before packing as 4-bit values.Traw)5rK   r   r   r{   get_per_tensor_overridesrX   r#   r   r	   r   flattenr   r$   r!   r&   r'   r(   rt   r   rp   r   r2   rj   rq   r   r   r   r   r   tolistrg   r   r   r   r   extendcopytobytesraw_datar   r   RuntimeErrorr   r   UINT4int8uint8bytesr   r   tensor_dtype_to_np_dtyper   r   )r*   r   qTyperj   keep_float_weightq_weight_namezp_name
scale_nameweight_dataquant_overridesr   r   q_weight_datar   scale_dtypescale_initializerzero_initializerq_weight_initializercheckpacked_datas                       r-   quantize_initializer_implz'BaseQuantizer.quantize_initializer_impl  s    &>>+-[8+
 ,F335NNv{hjNkk?**#L1=Eo%%,/*I*I/,"?G[\aGbcccJH_W566E,UK4G4G4I4I5R\]]Mj"*55\\7\$zJZJZ7\7\\\5#rz11j6F"*6T6T6T6J$466 7U6TT eRZ00RR2RT%[[2R2RRR0R <ADDU;U;U00777[_[wI/<##%%##K;;,00ARAcWcdd#2-11&99-11&990 0 0,J} j"*55\\7\$zJZJZ7\7\\\5#rz11j6F"*6T6T6T6J$466 7U6TT eRZ00RR2RT%[[2R2RRR0& K33JRQVQ^Q^_dQeQeQlQlQnQnoo;227E2zGYGYZ_G`G`GgGgGiGijj
%%'8:J&KLLL  #	B D$4$AAA'+'7'9'9$151B$.$)00===,9$)0=0E0E0G0G0L0L0N0N0V0V0X0X$-$0 ..BCCE{k&7775==??mNcNcNeNe;e;e*H8I H H,4466ss;H HCH==??SVTVSVCWH HdjdpH H%()=%>%>tt%DH H H  
 4+0$2B2HIII &rw.AAA&nnnn   $$6}7L7L7N7N$O$OPP (,{'>'>}eU[U`bmsw'>'x'x$$ "
=@d@dej@k@k l l l t tK! ! (,'8'C'CMS`'a'a$J))+?*@AAAgz11r/   Tc                 @   t          || j                                                  }|t          d|          t	          |          }t          |j                  }t          ||          \  }	}
|	st          d| d| d|           |
}|j        |         }| j        	                    |d|ig          }t          |          }|dk    r||k    rt          d| d	| d
          t          |d         d         |          \  }}|r||k    r%t          d| d| d|d         d          d          d|d         v r|d         d         j
        }|d                             d|                     |                    }|d                             d| j        o|          }g }g }g }t          |j                  }t          |          }d||<   t          |          D ]}|                    ||          }||k     r|nd}||         }d|v rDd|v r?t#          j        |d         t&          |                   }t#          j        |d                   }t)          ||                                ||          }t-          |t"          j                  sJ dt1          |                       |j        t"          j        k    r|j        t"          j        k    sJ d|j                     t-          |t"          j                  sJ dt1          |                       t-          |t"          j                  sJ dt1          |                       n/t9          |                                |||| j        |                    d          |                    d                    \  }}}t-          |t"          j                  sJ dt1          |                       |j        t"          j        k    r|j        t"          j        k    sJ d|j                     t-          |t"          j                  sJ dt1          |                       t-          |t"          j                  sJ dt1          |                       |                    |           |                    |           |                    t#          j        |                               |                     	t#          j!        ||          }|tD          z   }|dz   }|dz   } |j#        |         g}!tH          j%        &                    | |j'        |!t#          j(        |          )                                          }"tH          j%        &                    |||!t#          j(        |          )                                          }#| j        *                    |"|#g           |sJ|tH          j+        j,        tH          j+        j-        fv r|j        t"          j.        t"          j/        fvrta          d| d          tc          te          |3                                                    }$tH          j%        &                    ||||$d           }%| j        *                    |%g           nt#          j        |tH          j%        4                    |                                         |j#                  }tH          j5        6                    ||          }%| j        *                    |%g           ||| fS )!Nz{} is not an initializerzWeight z# has a per-channel axis with value z  that is out-of-bounds for rank r   r   r   z.Per-channel tensor quantization overrides for z must have either 1 or z& elements in the list of dictionaries.r   z"Tensor quantization overrides for z& specify an unexpected axis. Expected z
, but got r   r   r   rj   r   r   r   r   r   r   r   r   r   r   r   r   Tr   )7r   rg   r|   r)   r   r?   r   r   r{   get_per_channel_overridesrX   r2   r   rj   listrangetaker#   r   r	   r   r   r   r$   r!   r&   r'   r(   r   rq   appendr   r   concatenater   r   r   r   r   r   hstackr   r   r   r   r   r   r   r   r   r   r   r   r   r   )&r*   r   rt   channel_axisrj   r   r|   weightsweights_rankis_axis_valid	axis_normchannel_countquant_overrides_for_channelsnum_channel_overridesis_axis_override_validaxis_overrider   zero_point_list
scale_listquantized_per_channel_data_listweights_shapereshape_dimsiper_channel_datachannel_override_indexchannel_quant_overridesr   r   quantized_per_channel_dataquantized_weightsr   r   r   zero_scale_shaper   r  r  r  s&                                         r-    quantize_weight_per_channel_implz.BaseQuantizer.quantize_weight_per_channel_impln  s    #;
0F0F0H0HII7EEE'447=))#1,#M#M y 	9+ 9 9, 9 9*69 9  
 !l3'+'B'\'\v|&<%= (] (
 (
$ !$$@ A A A%%*?=*P*PU U U,U U U  
 1??[\]?^_e?fht0u0u-% 	,)F)F_[ _ _(_ _4PQR4STZ4[_ _ _   7:::7:<HTL0377TE]E]^jEkEkll	3A6::>4K\Kmamnn
*,'W]++M**%&\"}%% *	q *	qA&||A|<<*+.C*C*CQQ"&BCY&Z#111lF]6]6]X&=l&KSghtSuvvv
!8!ABB-= "2":":"<"<eZ. .* "*bj99``;`dS]N^N^;`;```9!'2:55*:Jbj:X:X:X:
(8:: ;Y:XX "%44VV6Ve6V6VVV4!"<bjII  It,F'G'GII I 
 AN$,,.. !-#'#6"9"="=f"E"E"9"="=f"E"EA A A=
E#= "*bj99``;`dS]N^N^;`;```9!'2:55*:Jbj:X:X:X:
(8:: ;Y:XX "%44VV6Ve6V6VVV4!"<bjII  It,F'G'GII I "":...e$$$+222:>X3Y3Y3a3abn3o3opppp N+JLYY#&>>- 8+
 (,\:; K33-/?:AVAVA]A]A_A_
 
  ;22\#3RY5O5O5V5V5X5X
 
 	
%%'8:J&KLLL  	F 0 5t7G7MNNN$*27BH2EEE&nnnn   $$67H7P7P7R7R$S$STT (,{'>'>!<QU (? ( ($ 
--/C.DEEEE$&J%+>>|LL% % % '+*++ " (,'8'C'CDUWd'e'e$
--/C.DEEEgz11r/   c                    | j         d S | j                                        D ]b}|j        dv r|                     |          s"t          | j                                        |j        d                            dk    r^|j        d         | j         vs|j        d         | j         vr| j         |j        d                  }t          |t                    s.t          dt          |           d|j        d         d          || j         |j        d         <   |j        dk    r_|                     |          st          t          j        d          t          j        d	          
          | j         |j        d         <   dd S )N)ClipRelur   r   r   z for r   Softmaxr   r   )lowesthighest)rw   rg   nodesr   r   r?   input_name_to_nodesrf   re   r   r   r    r!   r#   r'   )r*   r   tds      r-   adjust_tensor_rangesz"BaseQuantizer.adjust_tensor_ranges  s   %FJ$$&& 	q 	qD|///0066 tz5577
1FGG1LL:a=(:::dk!nTXTf>f>f'A7!"j11 [#$YtBxx$Y$YdkRSn$Y$Y$YZZZ46"4:a=11**0066 5?rzRU`b`jkn`o`o5p5p5p"4;q>2#	q 	qr/   r1   )r   )FF)TF)rA   rB   rC   r.   r   r   DataTypeboolr   r   r   r   r   r   r   r  r$  r.  rE   r/   r-   rG   rG   E   s        HZ HZ HZ HZT
T5E5N 
SW 
 
 
 
" " "' ' '     A A A  &R
 R
 R
 R
hY2 Y2 Y2 Y2@ L2 L2 L2 L2\q q q q qr/   rG   ) r   typingr   numpyr#   r   onnx.numpy_helperonnx.reference.op_runr   ImportError	calibrater   
onnx_modelr   quant_utilsr   r	   r
   r   r   r   r   r   r   r   r   r   r   r{   r   r   rG   rE   r/   r-   <module>r9     s                  7777777    " ! ! ! ! ! ! ! ! ! ! !                              ? > > > > >       <Lq Lq Lq Lq Lq Lq Lq Lq Lq Lqs    ))