
    [Th6                     0   d Z ddlZddlmZmZ ddlmZmZ ddlmZ ddl	m
Z
 ddl	mZ dd	l	mZ dd
l	mZ dd
l	mZ ddlmZ  ej"                  d      Z G d d      Zdedej*                  j,                  j.                  defdZdedefdZ G d d      Zy)z)[Experimental] Text Only Local Tokenizer.    N)AnyIterable)OptionalUnion)sentencepiece_model_pb2   )_common)_local_tokenizer_loader)_transformers)types)
t_contentszgoogle_genai.local_tokenizerc                      e Zd ZdZddZdee   fdZdeej                     ddfdZ
dej                  ddfd	Zd
ej                  ddfdZdej                  dej                  fdZdeej                     ddfdZdeej"                     ddfdZdej"                  ddfdZdej(                  dej(                  fdZdej,                  dej,                  fdZdeeef   deeef   fdZdedefdZy)_TextsAccumulatora  Accumulates countable texts from `Content` and `Tool` objects.

  This class is responsible for traversing complex `Content` and `Tool`
  objects and extracting all the text content that should be included when
  calculating token counts.

  A key feature of this class is its ability to detect unsupported fields in
  `Content` objects. If a user provides a `Content` object with fields that
  this local tokenizer doesn't recognize (e.g., new fields added in a future
  API update), this class will log a warning.

  The detection mechanism for `Content` objects works by recursively building
  a "counted" version of the input object. This "counted" object only
  contains the data that was successfully processed and added to the text
  list for tokenization. After traversing the input, the original `Content`
  object is compared to the "counted" object. If they don't match, it
  signifies the presence of unsupported fields, and a warning is logged.
  returnNc                     g | _         y N_textsselfs    a/home/www/backend.miabetepe.com/venv/lib/python3.12/site-packages/google/genai/local_tokenizer.py__init__z_TextsAccumulator.__init__6   s	    DK    c                     | j                   S r   r   r   s    r   	get_textsz_TextsAccumulator.get_texts9   s    ;;r   contentsc                 4    |D ]  }| j                  |        y r   )add_content)r   r   contents      r   add_contentsz_TextsAccumulator.add_contents<   s      
w r   r   c                 h   t        j                  g |j                        }|j                  rB|j                  D ]2  }|j                  J t        j                         }|j
                  |j                  t        d      |j                  |j                  |_        |j                  ,| j                  |j                         |j                  |_	        |j                  ,| j                  |j                         |j                  |_        |j                  6|j                  |_        | j                  j                  |j                         |j                  j                  |       5 |j!                  d      |j!                  d      k7  rt"        j%                  d| d| d       y y )N)partsrolez6LocalTokenizers do not support non-text content types.T)exclude_nonezHContent contains unsupported types for token counting. Supported fields z. Got .)r   Contentr#   r"   Part	file_datainline_data
ValueErrorvideo_metadatafunction_calladd_function_callfunction_responseadd_function_responsetextr   append
model_dumploggerwarning)r   r   counted_contentpartcounted_parts        r   r   z_TextsAccumulator.add_content@   s   mm"7<<@O}}-- 3$$$000zz|>>%)9)9)EF  *(,(;(;,
%)

 
 !3!3
4'+'9'9,
$!!-

$
$T%;%;
<+/+A+A,
(99 "ii,

++

TYY
'$$\2%3( t,0J0J 1K 1  nn$%VG9A7r   r,   c                    |j                   r%| j                  j                  |j                          t        j                  |j                         }|j
                  r#| j                  |j
                        }||_        yy)zProcesses a function call and adds relevant text to the accumulator.

    Args:
        function_call: The function call to process.
    )nameN)r9   r   r1   r   FunctionCallargs_dict_traverse)r   r,   counted_function_callcounted_argss       r   r-   z#_TextsAccumulator.add_function_call_   si     
kk++,!..M4F4FG((););<l#/  r   toolc                     t        j                  g       }|j                  rP|j                  D ]A  }| j                  |      }|j                  g |_        |j                  j	                  |       C |S )N)function_declarations)r   ToolrA   _function_declaration_traverser1   )r   r?   counted_toolfunction_declarationcounted_function_declarations        r   add_toolz_TextsAccumulator.add_tooll   sx    ::B7L!!"&"<"< P
'+'J'J (
$ --5/1,
,**112NOP r   toolsc                 4    |D ]  }| j                  |        y r   )rG   )r   rH   r?   s      r   	add_toolsz_TextsAccumulator.add_toolsy   s     
mmDr   function_responsesc                 4    |D ]  }| j                  |        y r   )r/   )r   rK   r.   s      r   add_function_responsesz(_TextsAccumulator.add_function_responses}   s#     0 4
  !234r   r.   c                    t        j                         }|j                  r6| j                  j	                  |j                         |j                  |_        |j
                  r#| j                  |j
                        }||_        y y r   )r   FunctionResponser9   r   r1   responser<   )r   r.   counted_function_responsecounted_responses       r   r/   z'_TextsAccumulator.add_function_response   ss     !& 6 6 8
kk*//0'8'='=$!!,,->-G-GH+;( "r   rE   c                    t        j                         }|j                  r6| j                  j	                  |j                         |j                  |_        |j
                  r6| j                  j	                  |j
                         |j
                  |_        |j                  r"| j                  |j                        }||_        |j                  r"| j                  |j                        }||_        |S r   )	r   FunctionDeclarationr9   r   r1   description
parameters
add_schemarP   )r   rE   rF   counted_parametersrR   s        r   rC   z0_TextsAccumulator._function_declaration_traverse   s     $)#<#<#>   
kk-223*>*C*C"'''
kk-99:

*
* #. &&??+?+J+JK0B"-$$)=)F)FG.>"+''r   schemac                    t        j                         }|j                  r|j                  |_        |j                  r|j                  |_        |j                  |j                  |_        |j
                  r6| j                  j                  |j
                         |j
                  |_        |j                  r6| j                  j                  |j                         |j                  |_        |j                  r6| j                  j                  |j                         |j                  |_	        |j                  r6| j                  j                  |j                         |j                  |_        |j                  r|j                  |_        |j                  r"| j                  |j                        }||_        |j                  r\i }|j                  j                         D ]6  \  }}| j                  j                  |       | j                  |      }|||<   8 ||_        |j                   r"| j#                  |j                         }||_        |S )zProcesses a schema and adds relevant text to the accumulator.

    Args:
        schema: The schema to process.

    Returns:
        The new schema object with only countable fields.
    )r   Schematypetitledefaultformatr   r1   rU   enumextendrequiredproperty_orderingitemsrW   
propertiesexample_any_traverse)	r   rY   counted_schemacounted_schema_itemsdkeyvaluecounted_valuecounted_schema_examples	            r   rW   z_TextsAccumulator.add_schema   s    \\^N{{"KKn||#\\n~~!%~~n}}
kk'$mmn
kk++,#)#5#5n {{
kk%"KKn
kk) &n)/)A)An&||!__V\\:1n
a))//1 *#u3.# #$n~~#11&..A5nr   rj   c                     i }| j                   j                  t        |j                                      |j	                         D ]  \  }}| j                  |      ||<    |S )zProcesses a dict and adds relevant text to the accumulator.

    Args:
        d: The dict to process.

    Returns:
        The new dict object with only countable fields.
    )r   ra   listkeysrd   rg   )r   rj   counted_dictrk   vals        r   r<   z _TextsAccumulator._dict_traverse   sY     LKKtAFFH~&GGI 2S,,S1l32r   rl   c                 
   t        |t              r| j                  j                  |       |S t        |t              r| j                  |      S t        |t              r|D cg c]  }| j                  |       c}S |S c c}w )zProcesses a value and adds relevant text to the accumulator.

    Args:
        value: The value to process.

    Returns:
        The new value with only countable fields.
    )
isinstancestrr   r1   dictr<   rp   rg   )r   rl   items      r   rg   z_TextsAccumulator._any_traverse   sp     %
kkl	E4	   ''	E4	 3894d  &99l :s   #B )r   N)__name__
__module____qualname____doc__r   r   rv   r   r   r&   r    r   r:   r-   rB   rG   rJ   rO   rM   r/   rT   rC   r[   rW   rw   r   r<   rg    r   r   r   r   "   sM   & #  8EMM#:  t   4 >0U-?-? 0D 05:: %** Xejj1 d 4 ()?)? @44	<$55	<	<("'";";(  ((+u|| + +Zd38n c3h   r   r   tokenr\   r   c                     |t         j                  j                  j                  j                  k(  rt        |       j                  dd      S | j                  dd      j                  d      S )Nr   big)length	byteorderu   ▁ zutf-8)	r   
ModelProtoSentencePieceTypeBYTE_parse_hex_byteto_bytesreplaceencode)r~   r\   s     r   _token_str_to_bytesr      s\     
$//==BBGGG5!**!u*EE==$++G44r   c                    t        |       dk7  rt        d|        | j                  d      r| j                  d      st        d|        	 t	        | dd d      }|d
k\  rt        d|        |S # t        $ r t        d	|        w xY w)zParses a hex byte string of the form '<0xXX>' and returns the integer value.

  Raises ValueError if the input is malformed or the byte value is invalid.
     zInvalid byte length: z<0x>zInvalid byte format:          zInvalid hex value:    zByte value out of range: )lenr*   
startswithendswithint)r~   rs   s     r   r   r      s     	Z1_
,UG4
55			%	 s(;
,UG4
554
eAaj"
C 	CZ
08
99	* 
 4
*5'2
334s   A2 2B
c                   @   e Zd ZdZdefdZ ej                  d      dddee	j                  e	j                  f   dee	j                     d	e	j                  fd
       Z ej                  d      dee	j                  e	j                  f   d	e	j                   fd       Zy)LocalTokenizera  [Experimental] Text Only Local Tokenizer.

  This class provides a local tokenizer for text only token counting.

  LIMITATIONS:
  - Only supports text based tokenization and no multimodal tokenization.
  - Forward compatibility depends on the open-source tokenizer models for future
  Gemini versions.
  - For token counting of tools and response schemas, the `LocalTokenizer` only
  supports `types.Tool` and `types.Schema` objects. Python functions or Pydantic
  models cannot be passed directly.
  
model_namec                     t        j                  |      | _        t        j                  | j                        | _        t        j
                  | j                        | _        y r   )loaderget_tokenizer_name_tokenizer_nameload_model_proto_model_protoget_sentencepiece
_tokenizer)r   r   s     r   r   zLocalTokenizer.__init__  sF    !44Z@D//0D0DED..t/C/CDDOr   zThe SDK's local tokenizer implementation is experimental and may change in the future. It only supports text based tokenization.N)configr   r   r   c                   t        j                  |      }t               }t        j                  j                  |xs i       }|j                  |       |j                  r|j                  |j                         |j                  r;|j                  j                  r%|j                  |j                  j                         |j                  r/|j                  t        j                  |j                  g             | j                  j                  t        |j!                                     }t        j"                  t%        d |D                    S )a  Counts the number of tokens in a given text.

    Args:
      contents: The contents to tokenize.
      config: The configuration for counting tokens.

    Returns:
      A `CountTokensResult` containing the total number of tokens.

    Usage:

    .. code-block:: python

      from google import genai
      tokenizer = genai.LocalTokenizer(model_name='gemini-2.0-flash-001')
      result = tokenizer.count_tokens("What is your name?")
      print(result)
      # total_tokens=5
    c              3   2   K   | ]  }t        |        y wr   )r   ).0tokenss     r   	<genexpr>z.LocalTokenizer.count_tokens.<locals>.<genexpr>M  s     ?V?s   )total_tokens)tr   r   r   CountTokensConfigmodel_validater    rH   rJ   generation_configresponse_schemarW   system_instructionr   r   rp   r   CountTokensResultsum)r   r   r   processed_contentstext_accumulatortokens_lists         r   count_tokenszLocalTokenizer.count_tokens$  s    : h/(*$$33FLbAF!!"45||  .F$<$<$L$L!!&":":"J"JK  ##ALL&2K2K1L$MN//((.>.H.H.J)KLK""?;?? r   c                    t        j                  |      }t               }|D ]  }|j                  |        | j                  j                  |j                               }g }|D ];  }|j                  s|j                  D ]  }|j                  |j                          = g }t        ||      D ]  \  }	}
|j                  t        j                  |	j                  D cg c]  }|j                   c}|	j                  D cg c]C  }t        |j                   | j"                  j                  |j                     j$                        E c}|
              t        j&                  |      S c c}w c c}w )a,  Computes the tokens ids and string pieces in the input.

    Args:
      contents: The contents to tokenize.

    Returns:
      A `ComputeTokensResult` containing the token information.

    Usage:

    .. code-block:: python

      from google import genai
      tokenizer = genai.LocalTokenizer(model_name='gemini-2.0-flash-001')
      result = tokenizer.compute_tokens("What is your name?")
      print(result)
      # tokens_info=[TokensInfo(token_ids=[279, 329, 1313, 2508, 13], tokens=[b' What', b' is', b' your', b' name', b'?'], role='user')]
    )	token_idsr   r#   )tokens_info)r   r   r   r   r   EncodeAsImmutableProtor   r"   r1   r#   zipr   
TokensInfopiecesidr   piecer   r\   ComputeTokensResult)r   r   r   r   r   tokens_protosroles_token_infostokens_protor#   r   s               r   compute_tokenszLocalTokenizer.compute_tokensP  s[   4 h/(*% ,""7+,OO::""$M E% %	 	%A
,,w||
$	%%
 K!-7 d


/;/B/BCeC
  ,22	  &kk4#4#4#;#;EHH#E#J#J 	 $$== Ds   E/AE4)ry   rz   r{   r|   rv   r   r	   experimental_warningr   r   ContentListUnionContentListUnionDictr   CountTokensConfigOrDictr   r   r   r   r}   r   r   r   r     s    E E
  7B 9=	&e,,e.H.HHI& u445	&
 &	&P  7B2>e,,e.H.HHI2>   2>	2>r   r   )r|   loggingtypingr   r   r   r   sentencepiecer    r	   r
   r   r   r   r   r   	getLoggerr3   r   rv   r   r   r   bytesr   r   r   r   r}   r   r   <module>r      s     0    " 1  /     %			9	:M M`55-88FFKK5
53 3 ,u> u>r   