
    ^hF                         d dl Z d dlZd dlZd dlZd dlmZ ddlmZm	Z	 ddl
mZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddl m!Z!  G d	 d
e          Z"dS )    N)onnx_pb   )BaseQuantizerQuantizationParams)
TensorData)	ONNXModel)TENSOR_NAME_QUANT_SUFFIXQuantizationModeQuantizedValueQuantizedValueType__producer____version__add_infer_metadataattribute_to_kwargcompute_scale_zpcompute_scale_zp_float8find_by_nameget_qmin_qmax_for_qTypeget_qrange_for_qType	ms_domainquantize_onnx_initializer&save_and_reload_model_with_shape_infertensor_proto_to_array)CreateOpQuantizerc                   ^   e Zd Z	 d*dZd Zd Zd Zd Zd Zd Z	d	 Z
d+dZd Zd Zd Zd Zd,dZ	 d-dZd Z	 d*dZdej        dej        dedej        dedeeej        dz  f         fdZdedej        ddfdZd.dZd Zd+d Z	 	 	 	 d/d"Z 	 	 	 	 	 d0d$Z!d1d%Z"	 	 d2d&Z#d' Z$d( Z%d) Z&dS )3ONNXQuantizerNc                    t          j        | |||||||	|
||           |s| j                                         t	          | j        j                  }d |j        j        D             | _        | j                            d |j        j	        D                        | j                            d |j        j
        D                        t          |          | _        || _        || _        | j        dk    | _        d| j        v o| j        d         | _        g | _        d| _        i | _        | j                            d |j        j	        D                        | j                            d |j        j
        D                        | j        j        j        j        D ]:}| j                            t,                              |j	        d	                     ;| j        t0          vrt3          d
| j                   |                                 | _        d| _        d| _        d| _        d| _        i | _         | j        !                                | _"        d S )Nc                     i | ]
}|j         |S  name).0vis     g/var/www/html/auto_sub_bot/venv/lib/python3.11/site-packages/onnxruntime/quantization/onnx_quantizer.py
<dictcomp>z*ONNXQuantizer.__init__.<locals>.<dictcomp>H   s    MMMMMM    c                     i | ]
}|j         |S r   r    r"   ots     r$   r%   z*ONNXQuantizer.__init__.<locals>.<dictcomp>I   s    $N$N$NRRWb$N$N$Nr&   c                     i | ]
}|j         |S r   r    r"   its     r$   r%   z*ONNXQuantizer.__init__.<locals>.<dictcomp>J   s    $M$M$MRRWb$M$M$Mr&   
   MatMulConstBOnly/c                     i | ]
}|j         d S r   r    r(   s     r$   r%   z*ONNXQuantizer.__init__.<locals>.<dictcomp>V   s    !J!J!J"'1!J!J!Jr&   c                     i | ]
}|j         d S r1   r    r+   s     r$   r%   z*ONNXQuantizer.__init__.<locals>.<dictcomp>W   s    !I!I!I"'1!I!I!Ir&   r   zunsupported quantization mode fixed_quantization_range_uint8fixed_quantization_range_int8
fixed_zerofixed_zero_zp)#r   __init__modelreplace_gemm_with_matmulr   graph
value_infovalue_infosupdateoutputinputr   modestaticopset_versionfuse_dynamic_quantextra_optionsq_matmul_const_b_only	new_nodesgraph_scopetensor_namesnodedictfromkeysr
   
ValueErrorcalculate_quantization_paramsquantization_paramsfixed_qrange_uint8_namefixed_qrange_int8_namefixed_zero_namefixed_zero_zp_namequantized_value_mapget_non_initializer_inputsgenerated_value_names)selfr8   per_channelreduce_ranger@   rA   weight_qTypeactivation_qTypetensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantizerD   rI   s                 r$   r7   zONNXQuantizer.__init__'   s`    	 	
 	
 	
  	*J//111:4:;KLLEMMek6LMMMD##$N$N5;;M$N$N$NOOO##$M$M5;;L$M$M$MNNN"5))DJ	"&"4r"9%74;M%M%xRVRdewRx"  !J!Ju{7I!J!J!JKKK  !I!Iu{7H!I!I!IJJJJ$*/ 	D 	DD$$T]]4;%B%BCCCC9,,,IdiIIJJJ#'#E#E#G#G  (H$&E#+"1 $&  &*Z%J%J%L%L"""r&   c                    t           j                            |d| j        j        j                  }t          |           t          || j        | j        | j	        | j
        | j        | j        | j        | j        | j        | j        | j                  }| |_        | j         | d|_        |                                 |j        j        j        S )z
        generate submodel for the subgraph, so that we re-utilize current quantization implementation.
        quantize the submodel
        update subgraph and set it back to node
        onnx-quantizer)producer_nameopset_importsr/   )onnxhelper
make_modelr8   opset_importr   r   rW   rX   r@   rA   rY   rZ   r[   r\   r]   r^   rD   parentrG   quantize_modelr:   )rV   subgraph	graph_keywarped_modelsub_quantizers        r$   quantize_subgraphzONNXQuantizer.quantize_subgraphp   s     {--***7 . 
 

 	<(((%IK!"!%
 
  $'+'7$E$E$E$E!$$&&&"(..r&   c                    d |j         D             }t          |          dk    r|S |j        r|j        n|j         dt          | j                   }i }|j         D ]}|j        t          j        j        k    r-|j        | 	                    |j
        | d|j                   i}n|j        t          j        j        k    rZg }|j        D ]F}|                    | 	                    || d|j         dt          |                     g           G|j        |i}nt          |          }|                    |           t          j        j        |j        |j        |j        fd|j        i|S )z|
        Check subgraph, if any, quantize it and replace it.
        return new_nodes added for quantizing subgraph
        c                 z    g | ]8}|j         t          j        j        k    s|j         t          j        j        k    6|9S r   )typerc   AttributeProtoGRAPHGRAPHS)r"   attrs     r$   
<listcomp>z>ONNXQuantizer.quantize_node_with_sub_graph.<locals>.<listcomp>   sG     
 
 
yD/555dFYF`9`9` 9`9`9`r&   r   _node_count_:r!   )	attributelenr!   op_typerF   rp   rc   rq   rr   rm   grs   graphsextendr   r=   rd   	make_noder?   r>   )	rV   rI   graph_attrs	node_namekwargsrt   kvvalueri   s	            r$   quantize_node_with_sub_graphz*ONNXQuantizer.quantize_node_with_sub_graph   s   

 

 
 

 {q  K!%bDII4<0b0bSQUQ_M`M`0b0b	N 	 	DyD/555i!7!79@Z@Zty@Z@Z![![\d1888 $  HLL 22 (#, G Gty G G3u:: G G     i''--MM"{$T\4:t{eeQUQZe^deeer&   c                 b    t          d | j                                        D                       S )zQ
        Detect if model already has QuantizeLinear or DequantizeLinear.
        c              3   B   K   | ]}|j         d k    p
|j         dk    V  dS )QuantizeLinearDequantizeLinearN)rz   r"   rI   s     r$   	<genexpr>z.ONNXQuantizer.has_QDQ_nodes.<locals>.<genexpr>   sH       
 
W[DL,,R@R0R
 
 
 
 
 
r&   )anyr8   nodes)rV   s    r$   has_QDQ_nodeszONNXQuantizer.has_QDQ_nodes   sA      
 
_c_i_o_o_q_q
 
 
 
 
 	
r&   c                     t          || j                                                  dS | j        | j                            |          S dS )NTF)r   r8   initializerrg   find_initializer_in_path)rV   initializer_names     r$   r   z&ONNXQuantizer.find_initializer_in_path   sJ    ($**@*@*B*BCCO4;";778HIIIur&   c                     | j                             |           |D ]&}|j        D ]}| j                            |           'd S N)rF   r}   r>   rU   add)rV   r   rI   output_names       r$   add_new_nodeszONNXQuantizer.add_new_nodes   sa    e$$$ 	< 	<D#{ < <*..{;;;;<	< 	<r&   c                    |                                  rt          j        d           | j                                        D ]}| j        r|                     |          }t          | j                  }t          | |          }|
                                 t          |t          | j                            D ]1}| j        |         j        D ]}| j                            |           2|                                  | j                                                            d           | j                                        j                            | j                   | j        N| j                                        \  }}t          |          dk    rt-          dt/          |          z             t0          | j        j        _        t4          | j        j        _        d | j        j        j        D             }|sId | j        D             }	|	r6| j        j        j                                        }
d|
_        t<          |
_        | j        j        S )NzPlease check if the model is already quantized. Note you don't need to quantize a QAT model. OnnxRuntime support to run QAT model directly.rI   r   z0Invalid model with unknown initializers/tensors.c                 2    g | ]}|j         t          k    |S r   )domainr   )r"   opsets     r$   ru   z0ONNXQuantizer.quantize_model.<locals>.<listcomp>   s%    bbbeXaHaHaEHaHaHar&   c                 (    g | ]}|j         d k    |S )zcom.microsoft)r   r   s     r$   ru   z0ONNXQuantizer.quantize_model.<locals>.<listcomp>   s$    ZZZ4;/;Y;Y;Y;Y;Yr&   r   ) r   loggingwarningr8   r   enable_subgraph_quantizationr   ry   rF   r   quantizeranger>   rU   r   _dequantize_outputsr:   
ClearFieldrI   r}   rg   clean_initializersRuntimeErrorstrr   ra   r   producer_versionrf   versionr   r   )rV   rI   number_of_existing_new_nodesop_quantizerir   _initializers_not_foundms_opsetms_nodesr   s              r$   rh   zONNXQuantizer.quantize_model   sG    	On  
 J$$&& 
	@ 
	@D0 ?88>>+.t~+>+>(,T488L!!###7T^9L9LMM @ @#'>!#4#; @ @K.22;????@@ 	  """ 	
%%f---
&&t~666 ;(,
(E(E(G(G%A%)**Q.."#UX[\rXsXs#sttt)5
&,7
)bbtz'7'Dbbb 	)ZZZZZH )
(599;; !(zr&   c                     d| j         v r.t          j        d|| j         d                    | j         d         S t          d|d          )NDefaultTensorTypezDget_tensor_type returns DefaultTensorType for tensor name %r, use %dz)Unable to find data type for weight_name=a7  . shape_inference failed to return a type probably this node is from a different domain or using an input produced by such an operator. This may happen if you quantize a model already quantized. You may use extra_options `DefaultTensorType` to indicate the default weight type, usually `onnx.TensorProto.FLOAT`.)rD   r   infor   rV   tensor_names     r$   _get_default_tensor_typez&ONNXQuantizer._get_default_tensor_type   ss    $"444LV"#67  
 %&9::J J J J
 
 	
r&   Fc                 P   t          || j                                                  }||j        S || j        v rd| j        |         }|j                            d          r=|r*|j        j        j        dk    r| 	                    |          S |j        j        j        S | j
        r| j        |r| 	                    |          S d S | j                            |          }||S | j
        r%| j        r| j                            |          }||S |r| 	                    |          S d S )Ntensor_typer   )r   r8   r   	data_typer<   rp   HasFieldr   	elem_typer   r   rg   is_valid_quantize_weightget_tensor_type)rV   r   	mandatoryweightr#   otyperess          r$   r   zONNXQuantizer.get_tensor_type  sF   k4:+A+A+C+CDD##$***!+.Bw.. 5 F!4!>!!C!C88EEEw*441 	t{7J B44[AAA444[AAL, 	 	+--k::C
 	>00===tr&   c                    |                      |          r|                     |          S || j        v r~| j        |         }|j                            d          r5|j        j        j        t          j        j	        t          j        j
        fv rdS t          j        d|d|j         d           dS | j        r!| j        r| j                            |          S t          j        d|d           dS )	Nr   Tz<Inference failed or unsupported type to quantize for tensor z
, type is .Fz%Failed to infer data type of tensor: zS. Please add data type info for this tensor if your model has customized operators.)is_input_a_initializerr   r<   rp   r   r   r   
onnx_protoTensorProtoFLOATFLOAT16r   r   r   rg   is_float_tensor)rV   r   r#   s      r$   r   zONNXQuantizer.is_float_tensor  s"   &&{33 	>00===$***!+.Bw.. 273F3P&,&.U 4 4 tOr{rrhjhorrr   5, 	< 	<;..{;;;7K 7 7 7	
 	
 	
 ur&   c                     |t           j        j        k    r|                     |||          S |t           j        j        k    r|                     |||          S t          d| d          )a  
        Create nodes for dynamic quantization of input and add them to nodes_list.
            parameter input_name: Name of the input.
            parameter nodes_list: new nodes are appended to this list.
            parameter qType: type to quantize to.
            parameter initial_type: type to quantize from
            return: scale_name, zero_point_name, scale_shape, zero_point_shape.
        zUnexpected value for qType=r   )r   r   INT8+_get_dynamic_input_quantization_params_int8UINT8,_get_dynamic_input_quantization_params_uint8rL   )rV   
input_name
nodes_listqTypeinitial_types        r$   &_get_dynamic_input_quantization_paramsz4ONNXQuantizer._get_dynamic_input_quantization_params6  st     J*///CCJPZ\hiiiJ*000DDZQ[]ijjj?u???@@@r&   c                    t           j        j        }|dz   }|dz   }t          j                            d|g|dz   g|d          }|                    |           |dz   }t          j                            d|g|dz   g|d          }	|                    |	           |d	z   }
t          j                            d
|j        d         g|
dz   g|
          }|                    |           |d	z   }t          j                            d
|	j        d         g|dz   g|          }|                    |           |dz   }t          j                            d|j        d         |j        d         g|dz   g|          }|                    |           t          j                            | j	        |g t          |          dz  g          }| j                            |           |dz   }t          j                            d|j        d         | j	        g|g|          }|                    |           t          j                            | j        |g dg          }| j                            |           || j        g g fS )az  
        Create nodes for dynamic quantization of input to int8 and add them to nodes_list
            parameter input_name: Name of the input.
            parameter nodes_list: new nodes are appended to this list.
            parameter initial_type: initial weight type (FLOAT or FLOAT16)
            return: scale_name, zero_point_name, scale_shape, zero_point_shape.
        _scale
_ReduceMin	ReduceMin:0r   keepdims
_ReduceMax	ReduceMax_AbsAbs_Abs_MaxMax       @	scale_DivDiv)r   r   r   rc   rd   r~   appendr>   make_tensorrP   r   r8   add_initializerrR   )rV   r   r   r   r   input_scale_namereduce_min_namereduce_min_nodereduce_max_namereduce_max_nodereduce_min_abs_namereduce_min_abs_nodereduce_max_abs_namereduce_max_abs_nodeabs_max_nameabs_max_nodeinitializer_divscale_div_namescale_div_nodeinitializer_zps                       r$   r   z9ONNXQuantizer._get_dynamic_input_quantization_params_int8E  s    &+ &0$|3+//Lt#$ 0 
 
 	/***$|3+//Lt#$ 0 
 
 	/*** .6"k33#A&' 4'(	
 
 	-...-6"k33#A&' 4'(	
 
 	-...!J.{,, '*,?,Fq,IJD !	
 
 	,'''+11'!%((3./	
 
 	
""?333#k1.. #T%@A	
 
 	.))) 001H%QSVWUXYY
"">222!8"b@@r&   c                    t           j        j        }|dz   }|dz   }|dz   }t          j                            d|g|dz   g|d          }|                    |           |dz   }	t          j                            d	|g|	dz   g|	d          }
|                    |
           t          j                            | j        |g t          |          g          }| j
                            |           t          j                            | j        |g d
g          }| j
                            |           |dz   }t          j                            d|
j        d         |j        d         g|dz   g|          }|                    |           |dz   }t          j                            d|j        d         | j        g|g|          }|                    |           |dz   }t          j                            d| j        |j        d         g|dz   g|          }|                    |           |dz   }t          j                            d|j        d         |g|dz   g|          }|                    |           |dz   }t          j                            d|j        |dz   g|          }|                    |           |dz   }t          j                            d|j        |g||          }|                    |           ||g g fS )a{  
        Create nodes for dynamic quantization of input to uint8 and add them to nodes_list
            parameter input_name: Name of the input.
            parameter nodes_list: new nodes are appended to this list.
            parameter initial_type: initial weight type (FLAOT or FLOAT16)
            return: scale_name, zero_point_name, scale_shape, zero_point_shape.
        r   _zero_pointr   r   r   r   r   r   r           
_scale_SubSub
_scale_Divr   _zero_point_Sub_zero_point_Div_zero_point_FloorFloor_zero_point_CastCast)to)r   r   r   rc   rd   r~   r   r   rO   r   r8   r   rQ   r>   )rV   r   r   r   r   r   input_zp_namer   r   r   r   initializer_qrangeinitializer_qvaluescale_sub_namescale_sub_noder   r   zp_sub_namezp_sub_nodezp_div_namezp_div_nodezp_floor_namezp_floor_nodezp_cast_namezp_cast_nodes                            r$   r   z:ONNXQuantizer._get_dynamic_input_quantization_params_uint8  sb    &,%0"]2$|3+//Lt#$ 0 
 
 	/***$|3+//Lt#$ 0 
 
 	/*** "[44(!%(()	
 
 	
""#5666![44T5I<Y[^a]bcc
""#5666 $l2..#A&(>q(ABd"#	
 
 	.)))#l2.."1%t'CD	
 
 	.))) !#44k++!?#9!#<=4 	
 
 	+&&& #44k++"$454 	
 
 	+&&&"%88--g{7IM\`L`Kacpqq-(((!$66{,,V]5IM?\hmr,ss,'''B66r&   c                 "   | j         }||R| j        	|| j        vrt          j        d| d           dS | j        |         }t	          |t
                    s#t          dt          |           d|d          |t          |          dk    rt          d	| d
|           t          j        |d         g          }t          |d         d          r%|d         j        t          j        t          j        fvr(t          dt          |d                    d|          t          j        |d         g          }|j        t          j        k    sJ |d         }ntt          j        |g          }t          j        |g          }| j        |         }d|v r"|d         j        }|                    |          }|j        t          j        k    sJ g }	|dz   }
g }|dz   }t$          j                            |
||	|                                                                          }| j                            |           |j        t          j        k    rt2          j        j        }nA|j        t          j        k    rt2          j        j        }nt          d|j         d|          t$          j                            ||||                    d                                                    }| j                            |           d||
||	fS )a\  
        Create initializers and inputs in the graph for zero point and scale of output.
        Zero point and scale values are obtained from self.quantization_params if specified.
            parameter param_name: Name of the quantization parameter.
            return: result, scale_name, zero_point_name, scale_shape, zero_point_shape.
        Nz$Quantization parameters for tensor:"z" not specified)F r  r  r  Unexpected type  for r      zbQuantization parameters should contain zero point, scale, quant type. Specified values for output z: 
zero_pointscaledtypez and param_name=
quant_typer   r   zUnexpected dtype=z for param_name=)T)rZ   rN   r   r   
isinstancer   	TypeErrorrp   ry   rL   nparrayhasattrr  float32float16float64astyperc   rd   r   raveltolistr8   r   r   r   r   r   reshape)rV   
param_name	use_scaleuse_zeropointzero_point_typeparamszero_point_valuesscale_valuesr  zero_point_shapezero_point_namescale_shape
scale_nameinit_zp
scale_type
init_scales                   r$   _get_quantization_paramsz&ONNXQuantizer._get_quantization_params  s5    / 5'/:TE]3]3]_J___```,,-j9Ff&899 W U4<< U Uj U U UVVV~V!1!1 J3=J JAGJ J  
 !#&*>)? @ @6'?G44 kw8MVXV`bdblUm8m8m !iD4I4I!i!i[e!i!ijjj8VG_$566L%3333$\2OO "- 9 98YK00L-j9F&  w-+22599%3333$}4(*
 +))_.>@Q@W@W@Y@Y@`@`@b@b
 
 	
""7+++++#/5JJ2:--#/7JJc1CccU_ccddd[,,Z[R^RfRfglRmRmRtRtRvRvww

"":...Z+?OOOr&   c           	         |j         |         }|dk    s
J d            |t          z   }|dz   }	|	|d||}}}
n|                     |          \  }
}}}}g }|
r't          j                            d|||g|g|	          }n| j        rdS | j        rF|t          j	        j
        k    r1|dz   }|dz   }t          j                            d	|g|||g|	          }n\|J d
|d| d| d|             |                     ||||          \  }}}}t          j                            d|||g|g|	          }t          |||||          | j        |<   g ||S )a  
        Given an input for a node (which is not a initializer), this function

        - add nodes to compute zero point and scale for this input if they don't exist.
        - add new QuantizeLinear node to quantize the input.

        :param node: node being quantized in NodeProto format.
        :param input_index: index of input in node.input.
        :param qType: type to quantize to.
        :param given_scale_name: if those inputs need to be quanitzed using this scale tensor.
        :param given_zp_name: if those inputs to be quantized using this zeropoint tensor.
        :param initial_type: type of the weight to quantize
        :return: List of newly created nodes in NodeProto format.
        r  z*Cannot access undefined variable in graph._QuantizeLinearNTr   r   r   DynamicQuantizeLinearzCCannot quantize input without knowing the initial type, input_name=z, input_index=z, qType=z, node=r   )r?   r	   r2  rc   rd   r~   rA   rC   r   r   r   r   r   rS   )rV   rI   input_indexr   given_scale_namegiven_zp_namer   r   r   ql_node_name
data_foundr.  zp_namer   r   qlinear_noder-  zp_shapes                     r$   _get_quantize_input_nodesz'ONNXQuantizer._get_quantize_input_nodes2  s
   " Z,
R!M #;;!$55(}/H/35E}G
JJ484Q4QR\4]4]1J
GQ %	;00 Z1	 LL { t & 5J4J4P+P+P'(2
$}4#{44+L *g6 	    $//h",h h>Ih hSXh haeh h 0// ??
ESXgs?tt#{44$W5 M 	    0>j+Wacjlq/r/r ,%%%%r&   c                 t    || j         v r| j         |         S | j        | j                            |          S d S r   )rS   rg   find_quantized_value)rV   r   s     r$   rA  z"ONNXQuantizer.find_quantized_valuex  sA    111+J77;";33J???tr&   c
                 &   t          j        |          }
|d|
z  z  |z  }t          j        |                                t           j                  }t          j        |                                t           j                  }||z  }||k     r|dk    rz||z  }||z  }|	6t          j        d| d| d| d           d	t          j        ||          fS t          j        d
|	 d| d| d| d	           d	|                    |          fS d|fS )zHAdjust a single weight scale to ensure the int32 bias does not overflow.r   r  r   NzIncreasing scale for weight `z` by the ratio z to ensure bias `z` has a valid scale.TzIncreased scale[z] for weight `z` by ratio F)r  absr  itemr  r   r   r   )rV   bias_valinput_scaleweight_scaleweight_scale_dtypeweight_name	bias_nameqrangemultiplicative_epsilonidxabsmaxbias_smallest_valid_scaleinput_scale_fp64weight_scale_fp64bias_candidate_scaleratio	new_scales                    r$   $adjust_single_weight_scale_if_neededz2ONNXQuantizer.adjust_single_weight_scale_if_needed  s    !!$:cFl$Kf$T!8K$4$4$6$6bjIIIH\%6%6%8%8
KKK/2CC #<<<CWZ]C]C]-0DDE)E1I{DK D DPU D D$-D D D   RXi7IJJJJJGs G G+ G GRW G G'0G G G   Y--.@AAAAl""r&   rG  rH  rJ  bias_tpis_per_channelreturnc                    |j         sdS t          |          }t          j        t          j                  }d}t          j        |j        t          j                  t          j        |j        dz   t          j                  z
  }	|j	        }
d}|st          j
        |                                t          j        dt          j                            }t          j        |                                t          j        dt          j                            }t          j        t          j        |          t          j        |                    }|                     ||||
||j        |	|          \  }}|r|}d}nw|j        rpt!          |j                  dk    rXt#          |j        d                   D ]=}|                     ||         |||         |
||j        |	||	  	        \  }}|r|||<   d}>||fS )	zOChecks if the bias scale is too small and increases the weight scale if needed.)FNgqh ?rC  r   Fr   T)rN  )sizer   r  iinfoint32r  maxr  minr  minimummaximumrD  rV  r!   shapery   r   )rV   rG  rH  rJ  rW  rX  bias_float_data
int32_inforM  rL  rI  updatedrminrmaxrO  changedrU  r   s                     r$   #_adjust_weight_scale_for_int32_biasz1ONNXQuantizer._adjust_weight_scale_for_int32_bias  s      	;/88Xbh''
!'*.
;;;bhz~XYGYacak>l>l>ll)/  	#:o1133RXarz5R5R5RSSD:o1133RXarz5R5R5RSSDZtbfTll;;F!%!J!J"&	" 	"GY  ( 	#C(:$;$;q$@$@<-a011 # #%)%N%N#A& O&L* &O 
& 
&"  #&/LO"G$$r&   rU  c                    || j         vrdS | j         |         }t          || j                                                  }t          |j        | j                                                  }t          |j        | j                                                  }t          |j        | j                                                  }||||dS | j                            |           | j                            |           t          j	        
                    |          }|j        }	t          j        |t          j                            |j                            }
t          j	                            |
                    |j                  |j                  }| j                            |           t+          || j        ||
|	|j                  }| j                            |           dS )zCRe-quantizes the given weight initializer using the provided scale.NrC  )quant_weight_name)rS   r   r8   r   r.  r<  q_nameremove_initializerrc   numpy_helperto_arrayaxisr  asarrayrd   tensor_dtype_to_np_dtyper   
from_arrayr#  dimsr   r   rY   )rV   rJ  rU  qv	weight_tp
scale_initzp_initq_weight_initweight_zero_pointrp  scale_npnew_scale_initnew_q_weights                r$   _requantize_weightz ONNXQuantizer._requantize_weight  s    d666F%k2 dj.D.D.F.FGG	!"-1G1G1I1IJJ
rz4:+A+A+C+CDD$RY
0F0F0H0HII
 2goI^F
%%j111
%%m444 -66w??w :it{/S/ST]Tg/h/hiii*55h6F6Fz6W6WY[Yfgg
"">222 1 i
 
 
 	
""<00000r&         ?c           
         || j         v r| j         |         j        S | j         |         j        }t          || j                                                  }t          |          }|| j         v r| j         |         j        }n8|| j        v r|                     |          \  }	}}	}	}	nt          d| d          t          || j                                                  }
t          |
          }| j         |         j
        }t          || j                                                  }|t          j                            |          nd}| j        }||j        r|                                sv| j        t$          j        j        fv r]t          || j                                                  }|                     |||||          \  }}|r|                     ||           |}|                     ||||          \  }}}}}}|| j         vsJ t1          ||||t2          j        |j        dk    rdnd||          }|| j         |<   |S )z]
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        z	Expected z5 to be in quantized value map for static quantizationNr   r   )	node_type
node_qtype)rS   rl  r.  r   r8   r   r   rN   r2  rL   r<  rc   rn  ro  rW   r[  r   rY   r   r   r   ri  r~  quantize_bias_static_implr   r   Initializer)rV   rK  r   rJ  betaweight_scale_nameweight_initializerrH  r   r   inputscale_initializerrG  weight_zp_nameweight_zp_initrz  rX  bias_initializer
did_updatenew_weight_scalequantized_bias_namequantized_bias_scale_namequantized_bias_zp_namebias_scale_datar  r  quantized_values                             r$   quantize_bias_staticz"ONNXQuantizer.quantize_bias_static  s    000+I6== !4[AL)*;TZ=S=S=U=UVV,-?@@ 111#7
CN4333+/+H+H+T+T(AAqqjjjjkkk!-.>
@V@V@X@X!Y!Y+,BCC 1+>F%ndj6L6L6N6NOOJXJdD-66~FFFjn))!& *%))++ * !j&<&A%CCC+Itz7M7M7O7OPP+/+S+S , ,(J(  0''5EFFF/ **9k<QUVV	
%"  88888(%"* %))AAt!	
 	
 	
 /> +""r&   c                 8    || j         v p|| j        v p|| j        v S )zq
        only check for value info and newly generated tensor names, initializers are checked separately
        )r<   rH   rU   r   s     r$   contains_tensorzONNXQuantizer.contains_tensorJ  s4    
 D,, ;t00;t99	
r&   c           	      :    |                      ||dddd|          S )NFr  rI   indicesinitializer_use_weight_qTyperX   op_level_per_channelrp  from_subgraph_ONNXQuantizer__quantize_inputs)rV   rI   r  r  s       r$   quantize_activationz!ONNXQuantizer.quantize_activationT  s4    %%).!&' & 
 
 	
r&   r  c           	      :    |                      ||d||||          S )NTr  r  )rV   rI   r  rX   r  rp  r  s          r$   quantize_weightzONNXQuantizer.quantize_weighta  s6     %%)-%!5' & 
 
 	
r&   Tc           
         g }g }	g }
g }|D ]4}|j         |         }|| j        v r\| j        |         }|                    |j                   |	                    |j                   |
                    |j                   u|s@|
                    d           |                    d           |	                    d           t          || j                                                  }|| j	        r2|r0| 
                    |j        |r| j        n| j        ||          \  }}}n)|                     ||r| j        n| j        |          \  }}}|
                    |           |	                    |           |                    |           |                     |          r| j                            |dz   | j        | j                                                  }||j         |         }|| j        v rj| j        |         }|                    d          sJ d| d            |j                            d          sJ d| d            |j        j        j        }n$|| j        v sJ d	|d
            | j        |         }|                     ||| j        |          }| dS |r|                     |           n|                    |           |d         }|j        dk    r\|
                    |j                   |                    |j         d                    |	                    |j         d                    9|
                    |j        d                    |                    |j        d                    |	                    |j        d                    | j        z| j                            ||g||||d          \  }}}}|
                    |d                    |                    |d                    |	                    |d                    tA          d| d| j!                   |
|	||fS )a  
        Given a node, this function quantizes the inputs as follows:
            - If input is an initializer, quantize the initializer data, replace old initializer
              with new initializer
            - Else, add QuantizeLinear nodes to perform quantization
            parameter node: node being quantized in NodeProto format.
            parameter indices: input indices to quantize.
            return: (List of quantized input names,
                     List of zero point names used for input quantization,
                     List of scale names used for input quantization,
                     List of new QuantizeLinear nodes created)
        r  Nr4  rp   zvalue_info=z has no type.r   z is not a tensor.zshape inference failed for zF and attribute 'tensor_names' does not have any value for this tensor.r6  )NNNNr  r   r      r   T)r  rX   r  rp  r  z!Invalid tensor name to quantize: z @graph scope)"r?   rS   r   r.  r<  rl  r   r8   r   rW   quantize_weight_per_channelr!   rY   rZ   quantize_initializerr  find_node_by_namerF   r:   r<   r   rp   r   r   rH   r?  r   r}   rz   r>   rg   r  rL   rG   )rV   rI   r  r  rX   r  rp  r  scale_nameszero_point_namesquantized_input_namesr   r7  
node_inputr  r   q_weight_namer<  r.  r=  r   r;   r   quantize_input_nodesparent_quantized_input_namesparent_zero_point_namesparent_scale_namesr   s                               r$   __quantize_inputszONNXQuantizer.__quantize_inputst  s   .  "" c	r c	rKK0J T555"&"::"F""?#=>>> ''(?@@@%,,_-CDDD %,,R000""2&&& ''+++&z4:3I3I3K3KLLK&# (< 
 88#(-Id))tOd$	 	%"
 :>9R9R#-Id))tOd$: :6M7J &,,];;; ''000"":....%%j11 ;r#z;;!22DNDJDTDTDVDV     '!%K!8J!T%555%)%5j%A
)226::cc<c*<c<c<ccc:)77FFssHsV`HsHsHsssF'1'B'L  *T->>>>,* , , ,  ?>>
 (,'8'D+/+I+Ik4+@| ,J , ,( ,3777$ ;**+?@@@@%9:::#7#;L'+;;;)001DEEE&&|'9!'<===$++L,>q,ABBBB)001DQ1GHHH&&|':1'=>>>$++L,?,BCCCC( K11 M1M!-)="& 2  0+& &,,-I!-LMMM""#5a#8999 ''(?(BCCCC !!pZ!p!p^b^n!p!pqqq$&6UJJr&   c                    |j         | j        v r&| j        |j                  }|j        |j        |j        fS |                     ||||          \  }}}t          |j         |||t          j        d          }|| j        |j         <   |||fS )a  
        :param weight: TensorProto initializer
        :param qType: type to quantize to
        :param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                                  If keep_float_weight is False, quantize the weight, or don't quantize the weight.
        :return: quantized weight name, zero point name, scale name
        N)	r!   rS   rl  r<  r.  quantize_initializer_implr   r   r  )	rV   r   r   rX   keep_float_weightr  r  r<  r.  s	            r$   r  z"ONNXQuantizer.quantize_initializer  s     ;$222"6v{CO&'*  .2-K-KE<):.
 .
*w

 )K*
 
 1@ -gz11r&   c                     || j         v r!| j         |         }|j        |j        |j        fS |                     |||||          \  }}}	t          |||	|t          j        d           }|| j         |<   |||	fS r   )rS   rl  r<  r.   quantize_weight_per_channel_implr   r   r  )
rV   rJ  rY   channel_axisrX   r  r  r  r<  r.  s
             r$   r  z)ONNXQuantizer.quantize_weight_per_channel  s     $222"6{CO&'*  .2-R-R|\CT.
 .
*w
 )*
 
 1@ -gz11r&   c                 f   || j         v r&|| j        vr| j         |         }t          |j        | j                                                  }| j        j        j        dk    s| j        j        j        dk    r.|,|*t          j        	                    |          j
        dk    sJ |dz   }| j                            || j        | j                                                  }|9|j        |j        |j        g}t          j                            d||g|          }|S ||j        d         k    sJ dS )a  
        Given a value (input/output) which is quantized, add a DequantizeLinear node to dequantize
        it back to float32 or float16
            parameter value_name: value to dequantize
            parameter new_nodes_list: List of new nodes created before processing current node
            return: None if there is already a DequantizeLinear node that dequantizes it
                    A DequantizeLinear node otherwise
        r`   Nr   _DequantizeLinearr   r   )rS   rU   r   r.  r8   r   ra   rc   rn  ro  r[  r  rF   r:   rl  r<  rd   r~   r>   )rV   
value_namer  rw  dqlinear_namedqlinear_nodedqlinear_inputsdequantize_nodes           r$   _dequantize_valuezONNXQuantizer._dequantize_value8  sK    $2224Ke9e9e"6zBO &o&@$*BXBXBZBZ[[J z-1AAA
 .2BBBzG] ")T->-G-G
-S-S-X\]-]-]-]]&)<<M J88X\XbXhXhXjXjkkM$#*#.#+#
 #'+"7"7&*}# # '& "]%9!%<<<<<tr&   c                     | j                                         j        D ]8}|                     |j                  }|| j                            |           9dS )z
        Dequantize output if it is quantized
            parameter new_nodes_list: List of new nodes created before processing current node
            return: List of new nodes created
        N)r8   r:   r>   r  r!   rF   r   )rV   r>   r  s      r$   r   z!ONNXQuantizer._dequantize_outputs_  s`     j&&((/ 	7 	7F"44V[AAO*%%o666	7 	7r&   c           	      Z   | j         d S |                                  i }| j         D ]}| j         |         }t          |t                    s#t	          dt          |           d|d          | j                            |i           }| j        }d|v r|d         j	        }d|v rd|v r|d         |d         }}n|t          j        j        k    rt          ||j        d                   \  }}n|                    d	|j        d
                   }|                    d|j        d                   }	|                    d| j                  }
|                    dd          }t%          |||
          \  }}t'          ||	|||
| j                  \  }}t+          |||          ||<   |S )Nr  r  r   )default_valr  r  r  r   rf  r   rg  	symmetricrX   F)rX   r  )r  r  r  )r[   adjust_tensor_rangesr  r   r  rp   tensor_quant_overridesget_per_tensor_overridesrZ   r   rc   r   FLOAT8E4M3FNr   avg_stdgetrange_valueis_activation_symmetricr   r   min_real_ranger   )rV   rN   r   tdquant_overridesr  zeror  rf  rg  r  rX   qminqmaxs                 r$   rM   z+ONNXQuantizer.calculate_quantization_paramsk  s   %4!!### - 	w 	wK#K0Bb*-- T R488 R R+ R R RSSS"9RRS^lnRooO.J..,\:F
/))lo.M.M-l;_W=Uet/<<<5j"*Q-PPee&**62>!3DEE&**62>!3DEE+//T=YZZ	.22>5II4Zlfoppp
d.tT4yRVReffe/ATY^ku/v/v/v,,""r&   r   )F)NN)NNN)r  )FFr  F)TFFr  F)FF)TF)'__name__
__module____qualname__r7   rm   r   r   r   r   rh   r   r   r   r   r   r   r2  r?  rA  rV  r  ndarrayr   rc   r   booltupleri  r~  r  r  r  r  r  r  r  r  r   rM   r   r&   r$   r   r   &   s        FM FM FM FMR/ / /> f  f  fD
 
 
  < < <+  +  + Z
 
 
"   2  2A A ARA RA RAh\7 \7 \7|9P 9P 9P 9Px aeD& D& D& D&L  " ## ## ## ##J6%Z6% j6% 	6%
 !6% 6% 
tRZ$&&	'6% 6% 6% 6%p$1c $1bj $1T $1 $1 $1 $1LF# F# F# F#P
 
 
	
 	
 	
 	
" "
 
 
 
. &*"AK AK AK AKF2 2 2 2L 2 2 2 2@% % %N
7 
7 
7 #  #  #  #  #r&   r   )#r   numpyr  rc   onnx.numpy_helperr   r   base_quantizerr   r   	calibrater   
onnx_modelr   quant_utilsr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   registryr   r   r   r&   r$   <module>r     s             & & & & & & = = = = = = = = ! ! ! ! ! ! ! ! ! ! ! !                                     & ( ' ' ' ' 'e# e# e# e# e#M e# e# e# e# e#r&   