
    \`h                    P   d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
Z
d dlZd dlmZmZmZ d dlmZ d dlmZmZmZmZ d dlmZ d d	lmZmZmZ 	 d d
lmZ n# e$ r dZY nw xY w	 d dlmZm Z  n# e$ r dZdZ Y nw xY w	 d dl!m"Z" n# e$ r dZ"Y nw xY wdZ#dZ$dZ%dZ&dZ'dZ(dZ)dZ*dZ+dZ,i Z-d  e.e          D             Z/ G d de          Z0 G d de          Z1 G d de          Z2 G d de          Z3ej        j4         e
j5        d           ej        j6         e
j5        d!          ej        j7         e
j5        d"          ej        j8         e
j5        d#          ej        j9        eej        j:        eej        j;        e iZ<ej        j6         e
j=        d e
j>        $           e
j=        d%e
j>        $          fej        j4         e
j=        d&e
j?        $           e
j=        d'e
j?        $          fej        j8         e
j=        d e
j@        $           e
j=        d(e
j@        $          fej        j7         e
j=        d)e
jA        $           e
j=        d*e
jA        $          fej        j;         e
j=        d e $           e
j=        d+e $          fej        j:         e
j=        d,e$           e
j=        d-e$          fiZBej        j6         e
j=        d e
j>        $           e
j=        d.e
j>        $          fej        j4         e
j=        d/e
j?        $           e
j=        d'e
j?        $          fej        j8         e
j=        d e
j@        $           e
j=        d0e
j@        $          fej        j7         e
j=        d1e
jA        $           e
j=        d*e
jA        $          fiZCej        j6         e
j=        d e
j>        $           e
j=        d'e
j>        $          fej        j4         e
j=        d2e
j?        $           e
j=        d3e
j?        $          fej        j8         e
j=        d e
j@        $           e
j=        d*e
j@        $          fej        j7         e
j=        d4e
jA        $           e
j=        d5e
jA        $          fej        j;         e
j=        d e$           e
j=        d-e$          fej        j:         e
j=        d6e$           e
j=        d7e$          fiZDd8d9d:ZEdd;ZFdd=ZGd> ZH	 	 	 	 dddLZI	 dddNZJ	 	 dddWZKddXZLddYZMdd]ZNddaZO G db dc          ZP G dd de          ZQ G df dg          ZRdh ZSdi ZTdj ZUdk ZVddpZWdq ZXddsZYdduZZddwZ[ddyZ\dd|Z]dd}Z^dd~Z_ddZ`ddZaddZbddZcddZdddZeddZfddZgddZhddZiddZjddZkdS )    )annotationsN)Enum)Path)
ModelProtoTensorProtoexternal_data_helper)onnx_pb)
make_graph
make_model	make_nodemake_tensor_value_info)ReferenceEvaluator)GraphOptimizationLevelInferenceSessionSessionOptionsfloat8e4m3fn)int4uint4)to_array_extendedzonnx.quantizez0.1.0ai.onnxzcom.microsoftQuantizeLinear_QuantizeLinear_InputDequantizeLinear_DequantizeLinear_Output
_quantizedl        c                    i | ]@}t          t          t          |          t                    *t          t          |          |AS  )
isinstancegetattrr   int).0ks     j/var/www/html/movieo_spanner_bot/venv/lib/python3.11/site-packages/onnxruntime/quantization/quant_utils.py
<dictcomp>r%   9   sA    qqqq
SZ[fhiSjSjloHpHpqQ''qqq    c                  2    e Zd ZdZdZd Zed             ZdS )QuantizationModer      c                    | j         S Nnameselfs    r$   __str__zQuantizationMode.__str__D   
    yr&   c                V    	 t           |          S # t          $ r t                      w xY wr+   )r(   KeyError
ValueError)modes    r$   from_stringzQuantizationMode.from_stringG   s7    	#D)) 	 	 	,,	    (N)__name__
__module____qualname__
IntegerOps
QLinearOpsr0   staticmethodr6   r   r&   r$   r(   r(   @   sH        JJ     \  r&   r(   c                  2    e Zd ZdZdZd Zed             ZdS )QuantizedValueTyper   r)   c                    | j         S r+   r,   r.   s    r$   r0   zQuantizedValueType.__str__S   r1   r&   c                V    	 t           |          S # t          $ r t                      w xY wr+   )r?   r3   r4   )vs    r$   r6   zQuantizedValueType.from_stringV   s7    	%a(( 	 	 	,,	r7   N)r8   r9   r:   InputInitializerr0   r=   r6   r   r&   r$   r?   r?   O   sH        EK     \  r&   r?   c                  \    e Zd ZdZdZdZdZdZdZdZ	d Z
ed	             Zed
             ZdS )	QuantTyper   r)                  c                    | j         S r+   r,   r.   s    r$   r0   zQuantType.__str__g   r1   r&   c                V    	 t           |          S # t          $ r t                      w xY wr+   )rF   r3   r4   )ts    r$   r6   zQuantType.from_stringj   s6    	Q< 	 	 	,,	r7   c                   | t           j        k    rt          j        S | t           j        k    rt          j        S | t           j        k    rt          j        S | t           j        k    rt          j	        S | t           j
        k    rt          j        S | t           j        k    rt          j        S | t           j        k    rt          j        S t!          d| d          )NzUnexpected value qtype=.)rF   QInt8r   INT8QUInt8UINT8QUInt16UINT16QInt16INT16QFLOAT8E4M3FNFLOAT8E4M3FNQUInt4UINT4QInt4INT4r4   r.   s    r$   tensor_typezQuantType.tensor_typeq   s    9?""##9###$$9$$$%%9###$$9***++9###$$9?""##<4<<<===r&   N)r8   r9   r:   rQ   rS   rY   rW   rU   r]   r[   r0   r=   r6   propertyr_   r   r&   r$   rF   rF   ^   s|        EFMFGEF     \ > > X> > >r&   rF   c                  2    e Zd ZdZdZd Zed             ZdS )QuantFormatr   r)   c                    | j         S r+   r,   r.   s    r$   r0   zQuantFormat.__str__   r1   r&   c                V    	 t           |          S # t          $ r t                      w xY wr+   )rb   r3   r4   )formats    r$   r6   zQuantFormat.from_string   s7    	v&& 	 	 	,,	r7   N)r8   r9   r:   	QOperatorQDQr0   r=   r6   r   r&   r$   rb   rb      sH        I
C     \  r&   rb   int8uint8int16uint16dtype   i   i  i i     i      ii  ii@   i i @  rH   zero_point_indexc                T   g }t          |          D ]\  }}t          j        t          |          t          j                  r(|                    t          j        |                     nEt          |t          j                  r|                    |           nt          d| d|           || k    rI|d         }|j
        t          j        k    s|j
        t          j        k    rt          d|j
                   t          |          dk    rt          |          n|d         S )Nzarg z is not an array: ru   zzero_point cannot be r)   r   )	enumeratenumpy
issubdtypetypenumberappendarrayr   ndarray	TypeErrorrm   float32float16lentuple)rw   argsnew_argsiarB   s         r$   _check_typer      s   H$ 
C 
C1DGGU\22 	=OOEKNN++++5=)) 	=OOA;1;;;;<<<   Aw%-''17em+C+C A A ABBB!(mma//5???Xa[@r&   c                   | t           v sJ d|  d            | t          j        j        t          j        j        t          j        j        t          j        j        fv rF|dk    rt          d|d          |j        t          j
        k    rt          j        }n:|j        t          j        k    rt          j        }nt          d|j         d          t          t!          t#          dg dgt$          j                            d| g dg          	          t#          d
g ddg          gdt+          d|d           t+          d|d           gt+          d| d           g                    }t-          |          }t/          |                    d ||d          d                   S t           |          }	t3          | dd          \  }
}|t5          |
|          n|
}|t7          ||          n|}t          j        |                    t          j
                  |z                                  |z             }t          j        ||||           t/          |                    |	                    S )NUnexpected data type > requested. Only INT8, UINT8, INT16, and UINT16 are supported.r   z2zero_point is expected to be null for float 8 not rP   zUnexpected dtype Constant
zero_point)valuer   )Xscaler   Yqur   r   )r   r   F)reduce_range	symmetric)out) ONNX_TYPE_TO_NP_TYPE
onnx_protor   rZ   FLOAT8E4M3FNUZ
FLOAT8E5M2FLOAT8E5M2FNUZNotImplementedErrorrm   rz   r   FLOATr   FLOAT16r4   r   r
   r   onnxhelpermake_tensorr   r   r   runget_qmin_qmax_for_qTypemaxminasarrayastyperoundclip)qTypearrr   r   lowhigh	onnx_type
onnx_modelrefrm   qminqmaxcliplowcliphigharr_fp32s                  r$   quantize_nparrayr      sv   ((((eeee )(( +-)-	   ??%&j[e&j&j&jkkk9%%#)IIY%-''#+II====>>>"Bdk>U>UVbdikmpqor>s>s   .0L0L0LseTT	 *3	4@@*7ItDD (UD99: 
 

  !,,3774sU)C)CDDQGHHH %U+,URWXXX
d$'O#dC...&*&63tT???D=#**U]";";e"C!J!J!L!Lz!YZZ
8WhH====8??511222r&   Fc           	        |dk    s|dk     rt          d| d|           t          j        | t          j        d| j                            } t          j        |t          j        d|j                            }|,t          || t          j        || j                  z             }|r?t          j        t          j        |           t          j        |                    }| } |
 }||k    sJ d|  d|             t          j        || z
  t          j	                  }t          j        |t          j	                  t          j        |t          j	                  z
  }t          j        ||z            }	|	dk    s
J d            |	t          j
        |j                  j        k     r7t          j        d	|j                  }	t          j        d|j                  }
n|rRt          j        t          j        ||z   t          j        d
t          j	                  z            |j                  }
n3t          j        t          j        || |	z  z
            |j                  }
|	                    |j                  }	|
|	gS )a  Calculate the scale s and zero point z for the quantization relation
    r = s(q-z), where r are the original values and q are the corresponding
    quantized values.

    r and z are calculated such that every value within [rmin,rmax] has an
    approximate representation within [qmin,qmax]. In addition, qmin <= z <=
    qmax is enforced. If the symmetric flag is set to True, the interval
    [rmin,rmax] is symmetrized to [-absmax, +absmax], where
    absmax = max(abs(rmin), abs(rmax)).

    :parameter rmin: minimum value of r
    :parameter rmax: maximum value of r
    :parameter qmin: minimum value representable by the target quantization data type
    :parameter qmax: maximum value representable by the target quantization data type
    :parameter symmetric: True if the floating-point range should be made symmetric. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :return: zero and scale [z, s]

    r   Bqmin and qmax must meet requirement: qmin <= 0 <= qmax while qmin:, qmmax:rl   Nzqmin=z > qmax=zscale issue      ?g       @)r4   rz   minimumr   rm   maximumr   r   absfloat64finfotinyr   r   )rminrmaxr   r   r   min_real_rangeabsmaxdrdqr   r   s              r$   compute_scale_zpr      sQ   ( axx4!88r^brrlprrsss
 =u{1DJ???@@D=u{1DJ???@@D !4nDJ O O OOPP uy	$@@ww4<<<555t55<<<	TD[	6	6	6B	T	/	/	/%+d%-2X2X2X	XBKR  EA:::}:::u{4:&&+++Ctz222[$*555

 
	Y TD[EK5=,Q,Q,QQRRZ^Zd  JJ U[u1D%E%ETZXXXJTZ((r&   c                &  	 d}| t           vr| t          j        k    rTddlm	 ddlm} |}	fdt          d          D             }t          j	        d |D             t          j
                  }nt          d	|  d
          |t           | <   n| t          j        k    rddlm} |}|t          d|  d          t          j        t           |                    }t          j	        d|          }t          j	        ||z  |j                  }||gS )ar  Calculate the scale s for a float8 type (E4M3FN).
    The function assumes the coefficient distribution and the float 8
    distribution are similar to two gaussian laws.

    :return: zero and scale [z, s]

    More details in notebook `quantization_fp8.ipynb
    <https://github.com/microsoft/onnxruntime/blob/main/docs/python/notebooks/quantization_fp8.ipynb>`_.
    Nr   )float8e4m3_to_float32r   c                &    g | ]} |          S r   r   )r"   r   r   s     r$   
<listcomp>z+compute_scale_zp_float8.<locals>.<listcomp>K  s%    GGGq//22GGGr&      c                b    g | ],}t          j        |          t          j        |          *|-S r   )rz   isnanisinf)r"   fs     r$   r   z+compute_scale_zp_float8.<locals>.<listcomp>M  s3    TTTqek!nnTU[QR^^TTTTr&   rl   zQuantization to element_type=z not implemented.zUnexpected element_type rP   )FLOAT8_DISTRIBUTIONSr   rZ   onnx.numpy_helperr   #onnx.reference.custom_element_typesr   rangerz   r   r   r4   r   stdrm   )
element_typer   zp_dtyper   
all_valuesvaluesstd_f8zeror   r   s
            @r$   compute_scale_zp_float8r   :  sW    H///;333??????HHHHHH#HGGGGE#JJGGGJ[TTJTTT\a\i  FF \\\\\]]]-3\**	1	1	1DDDDDDB<BBBCCCY+L9::F;q)))DKfCI666E%=r&   datanumpy.ndarray
quant_typeonnx.TensorProto.DataTyper   boolr   r   float | Nonermin_overridermax_overridereturn#tuple[numpy.ndarray, numpy.ndarray]c                   t          | t          j                  s t          dt	          |            d          ||}n%t          |           r|                                 nd}||}n%t          |           r|                                 nd}t          j        || j	                  }t          j        || j	                  }t          j        d| j	                  }	|t          j        k    rJ|rt          d          t          j        |           }
t          ||
          \  }}	t          ||	d	          S |t          j        t          j        t          j        t          j        t          j        t          j        fv rit-          |||
          \  }}t          |           rt/          ||||||          \  }}	nt          j        d|j	                  }t          ||	d	          S t1          d| d          )a  
    Returns the zero_point and scale for the given data.

    :param data: The data for which to compute quantization parameters.
    :param quant_type: The quantization data type.
    :param symmetric: whether symmetric quantization is used or not.
    :parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
    :parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
    :return: zero point and scale
    z%Weight must be given as an array not rP   Ng        rl   r   z1Unsupported option reduce_range=True for float 8.r   rv   r   z Unexpected value for quant_type=)r   rz   r   r   r|   r   r   r   r   rm   r   rZ   RuntimeErrorr   r   r   rR   rT   rX   rV   r^   r\   r   r   r4   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   s                 r$   compute_data_quant_paramsr   _  s   * dEM** OMT

MMMNNN  YY/txxzzzC  YY/txxzzzC;t4:...D;t4:...DK4:...E[--- 	TRSSSioo3JDD
E:uqAAAA   -ZQZ[[[
dt99 	: 0tT4Tb c cJQdj999J:uqAAAA
E
EEE
F
FFr&   2tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]c                   t          | ||||||          \  }}|t          j        k    rt          || ||          }	t	          |	                    t          j                                                  dz  dk              rxt          j	        |           }
t          d|
                                 d|
                                 d|	                                 d|	                                 d	          |||	fS |t          j        t          j        t          j        t          j        t          j        t          j        fv rt          || ||          }	|||	fS t'          d| d          )al  
    :param data: data to quantize
    :param qType: data type to quantize to.
    :param symmetric: whether symmetric quantization is used or not.
    :parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
    :parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
    :return: minimum, maximum, zero point, scale, and quantized weights

    To pack weights, we compute a linear transformation

    - when data `type == uint8` mode, from `[rmin, rmax]` -> :math:`[0, 2^{b-1}]` and
    - when data `type == int8`, from `[-m , m]` -> :math:`[-(2^{b-1}-1), 2^{b-1}-1]` where
        `m = max(abs(rmin), abs(rmax))`

    and add necessary intermediate nodes to transform quantized weight to full weight using the equation

    :math:`r = S(q-z)`, where

    - *r*: real original value
    - *q*: quantized value
    - *S*: scale
    - *z*: zero point
    ro   z+One of the quantized value is NaN data in [z, z], quantized_data in [z].zUnexpected value for qType=rP   )r   r   rZ   r   anyr   rz   ri   ravelr   r   r   r   rR   rT   rX   rV   r^   r\   r4   )r   r   r   r   r   r   r   r   r   quantized_datanp_datas              r$   quantize_datar     s   8 2 J ((()%ujII%%ek2288::S@SHII 	mD))GWgkkmm W Ww{{}} W W&4&8&8&:&:W W>L>P>P>R>RW W W   5.00   *%ujII5.00
;5;;;
<
<<r&   weightonnx.TensorProtor   r   axis
int | Nonequant_weight_name
str | Nonec                (   t          |           }d}|%t          ||                                ||          }n|j        |         }t	          |j                  }	d|	|<   g }
t          |          D ]}|                    ||          }||         }||         }t          ||                                ||          }|
                    t          j	        |          
                    |	                     t          j        |
|          }|r|n| j         t           }|t          j        j        k    rAt          j                    }||_        |j                            | j                   ||_        |                                                                                                |_        t0          t1          |          }|j        |j        k    s*|                                |                                k    rrt3          d|j         d|                                dd          d|                                dd          d| j         dt5          |          dd	          d
          n|t          j        j        t          j        j        fv r|j        t          j        t          j        fvrt3          d| d          tA          tC          |                                                    }t          j"        #                    ||| j        |d          }nmt          j"        $                    |          }t          j	        ||          
                    | j                  }t          j%        &                    ||          }|S )aG  
    Returns a quantized version of the given ONNX initializer.

    :param weight: The ONNX initializer to quantize.
    :param quant_type: The final quantized data type.
    :param zero_point: The zero-point value to use for quantization.
    :param scale: The scale value to use for quantization.
    :param axis: The quantization axis if quantizing per-channel. Defaults to None.
    :param quant_weight_name: The name of the quantized initializer.
                              If not specified, the quantized name is generated.
    :return: The quantized ONNX initializer.
    Nr)   zThe initializer of shape z! could not be created, expecting 
   z, got z and shape=z
raw=   rP   zQuantized weights for z. must be 8-bit before packing as 4-bit values.T)rawrl   )'tensor_proto_to_arrayr   r   shapelistr   taker~   rz   r   reshapeconcatenater-   TENSOR_NAME_QUANT_SUFFIXr   r   rZ   	data_typedimsextendflattencopytobytesraw_datar   r   strr^   r\   rm   rh   ri   bytespack_bytes_to_4bitr   r   tensor_dtype_to_np_dtypenumpy_helper
from_array)r   r   r   r   r   r   weight_dataq_weight_datachannel_countchannel_dimsquantized_channel_data_listr   channel_datachannel_scalechannel_zero_pointquantized_channel_dataq_weight_nameq_weight_initializercheckpacked_dataquant_np_dtypes                        r$   quantize_onnx_initializerr    s   ( (//K*.M|([5F5F5H5H%Q[\\#)$/K-..T&(#}%% 	l 	lA&++At44L!!HM!+A%5L..00-AS& &" (..u}=S/T/T/\/\]i/j/jkkkk)*EtLL):j%%6;@jPh@j@jMT%222#/11)3&!((555$1!(5(=(=(?(?(D(D(F(F(N(N(P(P%( &&:;;E{k///5==??mF[F[F]F]3]3]"@0A @ @$,,..ss3@ @;@==??3B3;O@ @\b\h@ @ !566tt<@ @ @  
 
(-t/?/EF	F	Fuz5;&???uuuuvvv .}/D/D/F/FGGHH  ${66}jRXR]_jpt6uu==jIIm>JJJRRSYS^__#0;;M=YYr&   c                   | t           j        j        k    rt          d          d}|rt                              |           }n3|r| t          v rt          |          }nt                              |           }|st          d|  d          |\  }}|dk    s|dk     r&t          d| d| d|j	         d	| d
| d|            |S )z
    Return qmin and qmax, the minimum and maximum value representable by the given qType
    :parameter qType: onnx.onnx_pb.TensorProto.UINT8 or onnx.onnx_pb.TensorProto.UINT8
    :return: qmin, qmax
    z;This function is not implemented for float 8 as not needed.Nr   r   r   r   r   z, dtype=z, reduce_range=z, symmetric=z, qType=)
r   r   rZ   r   ONNX_INT_TYPE_REDUCED_RANGEgetONNX_INT_TYPE_SYMMETRIC_RANGEONNX_INT_TYPE_RANGEr4   rm   )r   r   r   qranger   r   s         r$   r   r   )  s#    
&333!"_```F 0,0077	 0u ===.u5$((// xvvvvwwwJD$axx4!8844 4"&4 404
4 4KW4 4"4 4,14 4
 
 	
 Mr&   c                6    t          | ||          \  }}||z
  S )z
    Helper function to get the quantization range for a type.
        parameter qType: quantization type.
        return: quantization range.
    r   )r   )r   r   r   r   r   s        r$   get_qrange_for_qTyper'  I  s&     )	RRRJD$$;r&   r!   ranktuple[bool, int]c                <    | dk     r| |z   n| }|dk    o||k     }||fS )z
    Helper function that tries to return a normalized axis in the range [0, rank - 1].
    :parameter axis: The axis to normalize.
    :parameter rank: The tensor rank (number of dimensions).
    :return (is_valid, axis_norm)
    r   r   )r   r(  	axis_normis_valids       r$   normalize_axisr-  S  s7      $axxtTIA~2)d"2HYr&   src_8bitr  	bytearrayc                "   t          |           }|dk    rt                      S |dz   dz  }t          |          }d}d}||dz
  k     r3| |dz            dz  dz  | |         dz  z  ||<   |dz  }|dz  }||dz
  k     3||k     r| |         dz  ||<   |S )aB  
    Copies a source array of 8-bit values into a destination bytearray of packed 4-bit values.
    Assumes that the source values are already in the appropriate int4 range.
    :parameter src_8bit: The 8-bit element values to pack.
    :return A bytearray with every two 8-bit src elements packed into a single byte.
    r   r)   rG   rp   rI   )r   r/  )r.  	num_elemsdst_sizedstsrc_idst_is         r$   r  r  _  s     HIA~~{{A!#H
H

CEE )a-

	*S0Q68E?S;PQE


 )a-


 ye_s*E
Jr&   c                       e Zd ZdZg g dfdZdS )QuantizedInitializerzJ
    Represents a linearly quantized weight input from ONNX operators
    Nc
                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        d S r+   )	r-   initializerrminsrmaxszero_pointsscalesr   r   r   )
r/   r-   r9  r:  r;  r<  r=  r   r   r   s
             r$   __init__zQuantizedInitializer.__init__  sJ     	&

&	,			r&   r8   r9   r:   __doc__r>  r   r&   r$   r7  r7  }  s=               r&   r7  c                  "    e Zd ZdZ	 	 	 	 ddZdS )QuantizedValuezI
    Represents a linearly quantized value (input\output\intializer)
    Nc
                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        d S r+   )	original_nameq_name
scale_namezp_name
value_typer   	node_type
node_qtype
scale_type)
r/   r-   new_quantized_namerF  zero_point_namequantized_value_typer   rI  rJ  rK  s
             r$   r>  zQuantizedValue.__init__  sH     "($&.	"$$r&   )NNNNr?  r   r&   r$   rB  rB    s@          % % % % % %r&   rB  c                      e Zd ZdZd ZdS )BiasToQuantizez+
    Represents a bias to be quantized
    c                0    || _         || _        || _        d S r+   )	bias_name
input_nameweight_name)r/   rR  rS  rT  s       r$   r>  zBiasToQuantize.__init__  s    "$&r&   Nr?  r   r&   r$   rP  rP    s-         ' ' ' ' 'r&   rP  c                   | j         dk    rt          d| j         d          | j         dk    r| j        }n| j         dk    r| j        }n| j         dk    r| j        }n| j         dk    r| j        }n| j         dk    r| j        }n| j         d	k    r| j        }nl| j         d
k    r| j	        }nY| j         dk    r| j
        }nF| j         dk    r| j        }n3| j         dk    r| j        }n t          d| j         d| j          d          | j        |iS )z
    Convert attribute to kwarg format for use with onnx.helper.make_node.
        :parameter attribute: attribute in AttributeProto format.
        :return: attribute in {key: value} format.
    r   z
attribute z does not have type specified.r)   rG   rH   rI   rJ   rK   rq      	   r   z has unsupported type rP   )r|   r4   r-   r   r   srN   gfloatsintsstringstensorsgraphs)	attributer   s     r$   attribute_to_kwargr`    sB    ~TinTTTUUU ~	1			1			1			1			1		 	1			1		!	1		!	2		 ]in]]IN]]]^^^NE""r&   c                Z      fd|D             }t          |          dk    r|d         ndS )z
    Helper function to find item by name in a list.
        parameter item_name: name of the item.
        parameter item_list: list of items.
        return: item if found. None otherwise.
    c                *    g | ]}|j         k    |S r   r,   )r"   item	item_names     r$   r   z find_by_name.<locals>.<listcomp>  s%    BBBd49	+A+AT+A+A+Ar&   r   N)r   )rd  	item_listitemss   `  r$   find_by_namerg    s;     CBBBiBBBE5zzA~~5884/r&   c                d    d}t          t          |                    D ]}||         | k    r|}|S )zC
    Helper function to return index of an item in a node list
    ru   )r   r   )	elem_name	elem_listelem_idxr   s       r$   get_elem_indexrl    s@     H3y>>""  Q<9$$HOr&   c                H    t           j                            d| |g|          S )z
    Helper function to create a Mul node.
        parameter inputs: list of input names.
        parameter output: output name.
        parameter name: name of the node.
        return: Mul node in NodeProto format.
    Mul)r   r   r   )inputsoutputr-   s      r$   get_mul_noderq    s"     ;  $???r&   filenamer   
identifierr  c                V    | j                             | j        |z   | j        z             S )zp
    Helper function to generate a identifiable filepath by concatenating the given identifier as a suffix.
    )parentjoinpathstemsuffix)rr  rs  s     r$   generate_identified_filenamery  	  s(     ?##HMJ$>$PQQQr&   c                   dd l }dd lm} dd l} |j        |j                   t          d           t          |            t          d           t          |           |                    | |d           |                    d           |	                    d           |
                    d	           |                                 d S )
Nr   )	thresholdz
Histogram:zHistogram Edges:T)fillzTensor valueCountszTensor value V.S. Counts)sysmatplotlib.pyplotpyplotrz   set_printoptionsmaxsizeprintstairsxlabelylabeltitleshow)hist
hist_edgesr~  pltrz   s        r$   
apply_plotr    s    JJJ######LLLES[1111	,	$KKK	
	*JJtZdJ+++JJ~JJxII()))HHJJJJJr&   rP   c           	     
   ddl ddl}ddlddlmc mc m} ddlmc mc m} ddl	m
mm t          j        d|              G fddj                  }                    | |          }t#          t$          j                            |d          d	          5 }|                    |           ddd           n# 1 swxY w Y                       d          }|                    d
          }	g }
t1          |                                           D ]:}| |         }|                                }t7          |                    d|                                                    t7          |                    d|                                                    g}t=          t?          |                    }|	                     |          }|	                     |          }|!                    |	           |"                    |	|           |#                    |	|           |$                    |	          }|
%                    |           <|&                    |	tO          |
                     |
D ]}|	(                    |           |	)                                }|*                    |	           |+                    |	|           |,                    |	          }|	-                    |           |	.                                }t#          t$          j                            |d          d          5 }|                    |           ddd           n# 1 swxY w Y   t$          j/                            dd          dv r|j        0                    |d          }|1                                }te          |          D ]c}|3                    |          }t          j        |4                                           t          j        |5                                           dt#          t$          j                            |d          d	          5 }t1          |                                           D ]}| |         }|                                }t7          |                    d|                                                    t7          |                    d|                                                    g}|dz   t=          t?          |                    z   }|                    |           |                    d           	 ddd           dS # 1 swxY w Y   dS )z>
    Helper function to write calibration table to files.
    r   N)CalibrationMethod
TensorDataTensorsDatazcalibration cache: c                  "    e Zd Z fdZdS )*write_calibration_table.<locals>.MyEncoderc                \   t          |f          r|                                S t          |j                  r*|                                t	          |j                  ddS t          |          r|j        j        t	          |          dS j        	                    | |          S )Nznumpy.array)r   rm   CLS)r  r   )
r   to_dictr   tolistr  rm   	__class__r8   JSONEncoderdefault)r/   objr  r  r  jsonnps     r$   r  z2write_calibration_table.<locals>.MyEncoder.default3  s    #
K899 %{{}}$#rz** ] #

s39~~m\\\#011 J"}5CIII#++D#666r&   N)r8   r9   r:   r  )r  r  r  r  r  s   r$   	MyEncoderr  2  sB        	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7r&   r  )clszcalibration.jsonwi   highestlowestzcalibration.flatbufferswbQUANTIZATION_DEBUG0)r)   1zcalibration.cache 
)6r  flatbuffersrz   5onnxruntime.quantization.CalTableFlatBuffers.KeyValuequantizationCalTableFlatBuffersKeyValue5onnxruntime.quantization.CalTableFlatBuffers.TrtTableTrtTable"onnxruntime.quantization.calibrater  r  r  logginginfor  dumpsopenospathjoinwriter   Buildersortedkeysr  floatr"  rc  r  r   CreateStringKeyValueStartKeyValueAddKeyKeyValueAddValueKeyValueEndr~   TrtTableStartDictVectorr   PrependUOffsetTRelative	EndVectorTrtTableStartTrtTableAddDictTrtTableEndFinishOutputenvironGetRootAsTrtTable
DictLengthr   DictKeyValue)calibration_cachedirr  r  r  r  	json_datafiler   builderkey_value_listkeyr   d_valuesrZ  r   flat_key
flat_value	key_value	main_dict	cal_tablebufdict_lenr   r  r  r  r  r  s                           @@@@@r$   write_calibration_tabler  "  s!   
 KKKLLLLLLLLLLLLLLLLLLLLLLLL]]]]]]]]]]L:'8::;;;7 7 7 7 7 7 7 7 7 7 7D$ 7 7 7 

,)
<<I	bgll3 233S	9	9 T

9               88A;;D!!$''GN',,..// ) )"3'>>##(,,y$//446677(,,x..335566
 CKK  '',,))%00
w'''222!!':666((11	i(((($$Wc..A.ABBB# 3 3	''	2222!!##I7###Wi000$$W--INN9
..

C	bgll3 9::D	A	A T

3               
z~~*C00H<<%77Q??	''))x 	, 	,A!q))IL)))L**++++ 
bgll3 344c	:	: 
d+002233 		 		C&s+F~~''Hhll9d3388::;;hll8T227799::F #ICKK 0 00EJJuJJt		
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s7   )CCCM))M-0M-&C7U++U/2U/-C6?c                   | dk                         t          j                  }| dk                         t          j                  }|                                }| j        |z
  }|sdS |t          |          z  t          |          z  }|dk     sJ d| d| d|             |                      t          j                  }|||z  | |z  z   z  }|dk                                    dk    sJ |S )a~  Given a discrete distribution (may have not been normalized to 1),
    smooth it by replacing zeros with eps multiplied by a scaling factor
    and taking the corresponding amount off the non-zero values.
    Ref: http://web.engr.illinois.edu/~hanj/cs412/bk3/KL-divergence.pdf
         https://github.com//apache/incubator-mxnet/blob/master/python/mxnet/contrib/quantization.py
    r   Nr   zn_zeros=z, n_nonzeros=z, eps1=)r   rz   r   sumsizer  )pepsis_zerosis_nonzerosn_zeros
n_nonzeroseps1r  s           r$   smooth_distributionr  ~  s     Qu}--H6//%-00KllnnG'!J tw%
"3"33D#:::Q'QQ
QQ4QQ:::88EM""DC(Nte{222DAI??!!!!Kr&   
model_pathc                    t          j        |                                 d          }t          d |j        j        D                       S )NF)load_external_datac              3  >   K   | ]}t          j        |          V  d S r+   )r   uses_external_data)r"   
intializers     r$   	<genexpr>z*model_has_external_data.<locals>.<genexpr>  s.      mmz#6zBBmmmmmmr&   )r   loadas_posixr   graphr9  )r  models     r$   model_has_external_datar    sF    Ij))++FFFEmmUZU`Ulmmmmmmr&   opt_model_pathc                    t                      }|                                |_        t          j        |_        i }dg|d<   t          |                                 |fddgi|}dS )z
        Generate model that applies graph optimization (constant folding, etc.)
        parameter model_path: path to the original onnx model
        parameter opt_model_path: path to the optimized onnx model
    :return: optimized onnx model
    ConstantSharingdisabled_optimizers	providersCPUExecutionProviderN)r   r  optimized_model_filepathr   ORT_ENABLE_BASICgraph_optimization_levelr   )r  r  sess_optionkwargs_s        r$   optimize_modelr    sr     !""K+9+B+B+D+DK(+A+RK(F%6$7F !,,..jjH^G_jcijjAAAr&   r  r   c                    ddi}| j         r+| j         D ]#}|                    |j        |j        i           $t          j                            | |           dS )z>Tag the model that it went through quantization pre-processingonnx.quant.pre_processonnxruntime.quantNmetadata_propsupdater  r   r   r   set_model_props)r  r  props      r$   add_pre_process_metadatar    sh    .0CDN :( 	: 	:D!!48TZ"89999K~66666r&   c                Z    | j         r#| j         D ]}|j        dk    r|j        dk    r dS dS )zCCheck the model whether it went through quantization pre-processingr  r  TFr  r  r   )r  r  s     r$   model_has_pre_process_metadatar    sG     ( 	 	Dx333
FY8Y8Ytt5r&   c                    ddi}| j         r+| j         D ]#}|                    |j        |j        i           $t          j                            | |           d S )N
onnx.inferr  r  )r  r  r  s      r$   add_infer_metadatar    sh    "$78N 4% 	4 	4A!!15!'"23333K~66666r&   c                Z    | j         r#| j         D ]}|j        dk    r|j        dk    r dS dS )Nr  r  TFr  )r  r  s     r$   model_has_infer_metadatar    sF     % 	 	Au$$4G)G)Gtt5r&   c                    d | j         D             }t          |          dk    rt          d          |d         j        }|S )Nc                6    g | ]}|j         r|j         d k    |S )r   )domain)r"   opsets     r$   r   z%get_opset_version.<locals>.<listcomp>  s.    mmm5<mSXS_clSlSleSlSlSlr&   r)   z$Failed to find proper ai.onnx domainr   )opset_importr   r4   version)r  ai_onnx_domainopset_versions      r$   get_opset_versionr    sM    mm);mmmN
>a?@@@"1%-Mr&   weight_typec                   t          |           }|}t          |d|          }|dk     r0|t          j        j        k    rt          j        d| d           d}n?|dk    rt          j        d| d           n |dk     rt          j        d| d           d}||k    r/t          j                            | |          } t          |           } | S )	Nr_      z$The original model opset version is z, which does not support quantization to float 8. Please update the model to opset >= 19. Automatically update the model to opset 19. Please verify the quantized model.r   ze, which does not support node fusions. Please update the model to opset >= 11 for better performance.z, which does not support quantization. Please update the model to opset >= 11. Automatically update the model to opset 11. Please verify the quantized model.   )
r  r    r   r   rZ   r  warningversion_converterconvert_version&save_and_reload_model_with_shape_infer)r  r  r  target_opset_versionweight_quant_types        r$   update_opset_versionr(    s-   %e,,M(]KHHr/43C3PPP1= 1 1 1	
 	
 	

  "	"		M= M M M	
 	
 	
 	

 
		1= 1 1 1	
 	
 	

  "},,&66u>RSS 7u==Lr&   c                ,   t          | d          }t          j                            t	          |           t	          |                     t          j        |                                          }t          |           |                                 |S )Nz	-inferred)	ry  r   shape_inferenceinfer_shapes_pathr  r  r  r  unlink)r  inferred_model_pathr  s      r$   load_model_with_shape_inferr.    s{    6z;OO**3z??C@S<T<TUUUI)224455Eu   Lr&   c                <   t          j        d          5 }t          j        |           }t	          |                              d          }t          j        ||                                d           t          |          cd d d            S # 1 swxY w Y   d S )Nz
ort.quant.)prefixz
model.onnxT)save_as_external_data)
tempfileTemporaryDirectoryr  deepcopyr   rv  r   
save_modelr  r.  )r  quant_tmp_dir
model_copyr  s       r$   r%  r%    s    		$L	9	9	9 7]]5))
-((11,??

J$7$7$9$9QUVVVV*:66	7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7s   A.BBBr9  r   c                    | j         t          j        j        t          j        j        fv rt
          j                            |           S t          d| j	         dt          | j                             )Nz&Only float type is supported. Weights z is )r  r   r   r   r   r   r  to_arrayr4   r-   type_to_name)r9  s    r$   r   r     si    !7!=z?U?] ^^^ ))+666
l1All|T_TiGjll  r&   tensor_namec                    | dz   S )N_QuantizeLinearr   r;  s    r$   add_quant_suffixr?    s    ***r&   c                    | t           z   S r+   )QUANT_INPUT_SUFFIXr>  s    r$   add_quant_input_suffixrB    s    +++r&   c                    | dz   S )N_QuantizeLinear_Outputr   r>  s    r$   add_quant_output_suffixrE    s    111r&   c                    | dz   S )N_DequantizeLinearr   r>  s    r$   add_dequant_suffixrH  !  s    ,,,r&   c                    | dz   S )N_DequantizeLinear_Inputr   r>  s    r$   add_dequant_input_suffixrK  %  s    222r&   c                    | t           z   S r+   )DEQUANT_OUTPUT_SUFFIXr>  s    r$   add_dequant_output_suffixrN  )  s    ...r&   )NN)FN)FNNN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   )FF)r   r!   r(  r!   r   r)  )r.  r  r   r/  )rr  r   rs  r  r   r   )rP   )r  )r  r   )r  r   r  r   )r  r   )r  r   r   r   )r  r   r   r!   )r  r   r  rF   r   r   )r  r   r   r   )r  r   r   r   )r9  r   r   r   )r;  r  r   r  )r   r  )l
__future__r   r  r  r  r2  enumr   pathlibr   rz   r   r   r   r   r	   r   onnx.helperr
   r   r   r   onnx.referencer   onnxruntimer   r   r   r   r   ImportErrorr   r   onnx.reference.op_runr   __producer____version__onnx_domain	ms_domainQUANT_OP_NAMErA  DEQUANT_OP_NAMErM  r  MODEL_SIZE_THRESHOLDr   r  r:  r(   r?   rF   rb   rR   rm   rT   rX   rV   rZ   r^   r\   r   r   ri   rh   rk   rj   r$  r#  r!  r   r   r   r   r   r   r  r   r'  r-  r  r7  rB  rP  r`  rg  rl  rq  ry  r  r  r  r  r  r  r  r  r  r  r(  r.  r%  r   r?  rB  rE  rH  rK  rN  r   r&   r$   <module>r^     sv
   # " " " " "   				                > > > > > > > > > > & & & & & & Q Q Q Q Q Q Q Q Q Q Q Q - - - - - - P P P P P P P P P P@@@@@@@   LLL
?????????   DEEE7777777   
 	 , $2 ' !  qqCC4D4Dqqq    t          #> #> #> #> #> #> #> #>L    $     V!4!4 +%+g"6"6 +%+g"6"6!;5;x#8#8' %   ;5;q#D#D#DkekRU]b]hFiFiFi"j+%+d%*"E"E"E{u{SV^c^hGiGiGi!j!KEK$F$F$FTYafamHnHnHn#o ;5;vU[#I#I#I;5;W\didoKpKpKp"q ;5;q#>#>#>BV[@\@\@\"]+%+b"="="={u{1TX?Y?Y?Y!Z   ;5;q#D#D#DkekRU]b]hFiFiFi"j+%+d%*"E"E"E{u{SV^c^hGiGiGi!j!KEK$F$F$FTYafamHnHnHn#o ;5;vU[#I#I#I;5;W\didoKpKpKp"q	!   ;5;q#D#D#DkekRU]b]hFiFiFi"j+%+c"D"D"DkekRT\a\fFgFgFg!h!KEK$F$F$FTYafamHnHnHn#o ;5;vU[#I#I#I;5;W\didoKpKpKp"q ;5;q#=#=#={u{1TX?Y?Y?Y"Z+%+b"="="={u{1TX?Y?Y?Y!Z  )+ A A A A A 13 13 13 13h< < < <~" " "R #'"&"&;G ;G ;G ;G ;G~ hl:= := := := :=D $(L  L  L  L  L ^   @   	 	 	 	   <       >% % % % % % % %8' ' ' ' ' ' ' '"# "# "#J0 0 0  @ @ @R R R R  $Y Y Y Yx   2n n n n
k k k k 7 7 7 7   7 7 7 7      ! ! ! !H   7 7 7 7   + + + +, , , ,2 2 2 2- - - -3 3 3 3/ / / / / /s6   A A)(A)-A6 6	BBB BB