
    @h.                         d Z ddlZddlZddlZddlZddlZddlmZmZm	Z	 ddl
m
Z
  G d d          Zd Zedk    r e             dS dS )	z
Bowling Image to CSV Extractor

This script extracts bowler information and scores from bowling sheet images
using Amazon Textract and converts the data to CSV format.
    N)ListDictOptional)datetimec                       e Zd ZddZdedee         fdZdee         dee         fdZdee         defdZ	dd
ee         defdZ
dd
ee         dedefdZdd
ee         dedefdZddedefdZdS )BowlingImageExtractor	us-east-1c                 <    t          j        d|          | _        dS )zInitialize the Textract client.textract)region_nameN)boto3clienttextract_client)self
aws_regions     ;/Users/mbp14/git/itopsa/if-lab/ai/image_to_csv_extractor.py__init__zBowlingImageExtractor.__init__   s    $|JJOOO    
image_pathreturnc                    	 t          |d          5 }t          |                                          }ddd           n# 1 swxY w Y   | j                            d|i          }g }|d         D ])}|d         dk    r|                    |d                    *|S # t          $ r t          d	| d
           g cY S t          $ r}t          d|            g cY d}~S d}~ww xY w)z
        Extract text from an image using Amazon Textract.
        
        Args:
            image_path: Path to the image file
            
        Returns:
            List of text lines extracted from the image
        rbNBytes)DocumentBlocks	BlockTypeLINETextzError: Image file 'z' not found.z"Error extracting text from image: )	open	bytearrayreadr   detect_document_textappendFileNotFoundErrorprint	Exception)r   r   documentimage_bytesresponse
text_linesblockes           r   extract_text_from_imagez-BowlingImageExtractor.extract_text_from_image   sj   	j$'' 98'889 9 9 9 9 9 9 9 9 9 9 9 9 9 9 +@@!;/ A  H
 J!(+ 5 5%//%%eFm444  	 	 	@
@@@AAAIII 	 	 	:q::;;;IIIIII	sF   B "A B  AB AAB C&?	C&C!C&!C&r*   c           	      >   g }d}|t          |          k     r||                                         }|r|dv r|dz  }:t          j        d|          r|dz   t          |          k     r	 t	          |          }||dz                                            }t	          ||dz                                                      }t	          ||dz                                                      }t	          ||dz                                                      }	t	          ||d	z                                                      }
t	          ||d
z                                                      }t	          ||dz                                                      }t          j        d|          r8||                                |||	|
||d}|                    |           |dz  }n|dz  }n!# t          t          f$ r |dz  }Y n	w xY w|dz  }|t          |          k     |S )z
        Parse bowler information from text lines.
        
        Args:
            text_lines: List of text lines from Textract
            
        Returns:
            List of dictionaries containing bowler information
        r   )zLane #zBowler NameAvgHDCPzGame 1zGame 2zGame 3Total   z^\d+$                  z
^[A-Z\s]+$lane_numbernameaveragehandicapgame1_scoregame2_scoregame3_scoretotal_score   )lenstriprematchintr#   
ValueError
IndexError)r   r*   bowlersilinelane_numbowler_nameavghdcpgame1game2game3totalbowler_infos                 r   parse_bowler_dataz'BowlingImageExtractor.parse_bowler_data9   s:     #j//!!a=&&((D  4#rrrQ x$'' AEC
OO,C,C"4yyH",QU"3"9"9";";KjQ/557788Cz!a%0668899D
1q5 1 7 7 9 9::E
1q5 1 7 7 9 9::E
1q5 1 7 7 9 9::E
1q5 1 7 7 9 9::E x{;; +3$/$5$5$7$7'*(,+0+0+0+0	' 	'  {333QQ"J/   FAAA QO #j//!!R s   -E:G( (H Hc                    ddddd}t          |          D ]\  }}|                                }|dk    r7|dz   t          |          k     r ||dz                                            |d<   V|dk    r7|dz   t          |          k     r ||dz                                            |d<   |dk    r7|dz   t          |          k     r ||dz                                            |d	<   d
|v rd
|d<   d|v rd|d<   |S )z
        Extract match information from text lines.
        
        Args:
            text_lines: List of text lines from Textract
            
        Returns:
            Dictionary containing match information
         )
team1_name
team2_namedate
match_typezTeam 1r2   rY   zTeam 2rZ   Dater[   zScratch Pairr\   Handicap)	enumeraterD   rC   )r   r*   
match_inforK   rL   s        r   extract_match_infoz(BowlingImageExtractor.extract_match_infor   s:    	
 

 !,, 	6 	6GAt::<<D xq53z??**/9!a%/@/F/F/H/HJ|,!!q53z??**/9!a%/@/F/F/H/HJ|,q53z??**)3AE):)@)@)B)BJv&4''+9
<((t##+5
<(r   bowlers.csvrJ   output_filec                    t          |ddd          5 }g d}t          j        ||          }|                                 |D ]}|                    |           	 ddd           n# 1 swxY w Y   t          d|            dS )	z
        Create CSV file with bowler information.
        
        Args:
            bowlers: List of bowler dictionaries
            output_file: Output CSV filename
        wrX   utf-8newlineencodingr9   
fieldnamesNzBowlers CSV created: r   csv
DictWriterwriteheaderwriterowr%   )r   rJ   rc   csvfilerk   writerbowlers          r   create_bowlers_csvz(BowlingImageExtractor.create_bowlers_csv   s     +sBAAA 	(W D  D  DJ^G
CCCF   ! ( (''''(	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	3k3344444s   A	A**A.1A.database_import.csvr`   c                 ^   t          |ddd          5 }g d}t          j        ||          }|                                 |D ]=}|                    |d         d|d	         |d
         |d         |d         dd           >	 ddd           n# 1 swxY w Y   t          d|            dS )z
        Create CSV file formatted for database import.
        
        Args:
            bowlers: List of bowler dictionaries
            match_info: Match information dictionary
            output_file: Output CSV filename
        re   rX   rf   rg   )bowler_nicknamelocation_name
event_dater>   r?   r@   series_typerj   r;   zUnknown Locationr[   r>   r?   r@   z	Tour StopNzDatabase import CSV created: rl   r   rJ   r`   rc   rq   rk   rr   rs   s           r   create_database_import_csvz0BowlingImageExtractor.create_database_import_csv   s,    +sBAAA 	W H  H  HJ^G
CCCF   ! 	 	'-f~%7",V"4#)-#8#)-#8#)-#8#.! !    		 	 	 	 	 	 	 	 	 	 	 	 	 	 	  	;k;;<<<<<s   A/BBBdetailed_scores.csvc                    t          |ddd          5 }g d}t          j        ||          }|                                 |D ]l}|                    |d         |d         |d	         |d
         |d         |d         |d         |d         |d         |d         |d         |d         d           m	 ddd           n# 1 swxY w Y   t          d|            dS )z
        Create detailed CSV with all information.
        
        Args:
            bowlers: List of bowler dictionaries
            match_info: Match information dictionary
            output_file: Output CSV filename
        re   rX   rf   rg   )
match_dateteam1team2r\   r:   rN   r<   r=   r>   r?   r@   rA   rj   r[   rY   rZ   r\   r:   r;   r<   r=   r>   r?   r@   rA   NzDetailed scores CSV created: rl   r{   s           r   create_detailed_csvz)BowlingImageExtractor.create_detailed_csv   s]    +sBAAA 	W  J
 ^G
CCCF   !  ",V"4'5'5",\":#)-#8#)&>%i0 &z 2#)-#8#)-#8#)-#8#)-#8! !    	 	 	 	 	 	 	 	 	 	 	 	 	 	 	2 	;k;;<<<<<s   BB??CCbowlingoutput_prefixc                 f   t          d|            |                     |          }|st          d           dS t          dt          |           d           |                     |          }|st          d           dS t          dt          |           d           |                     |          }|                     || d	           |                     ||| d
           |                     ||| d           t          d           t          d|d          d|d                     t          d|d                     t          d|d                     t          dt          |                      t          d |D             d          }|rt          d           t          d|d                     t          d|d                     t          d|d                     t          d|d                     t          d |d!                     t          d"|d#                     t          d$|d%                     ||d&}t          | d'd(          5 }t          j        ||d)*           ddd           n# 1 swxY w Y   t          d+| d'           dS ),z
        Process an image and create CSV files.
        
        Args:
            image_path: Path to the image file
            output_prefix: Prefix for output CSV files
        zProcessing image: zNo text extracted from image.Nz
Extracted z text lineszNo bowler data found in image.zFound z bowlersz_bowlers.csvz_database_import.csvz_detailed_scores.csvz
=== MATCH SUMMARY ===zMatch: rY   z vs rZ   zDate: r[   zType: r\   zTotal Bowlers: c              3   .   K   | ]}d |d         v |V  dS )zANTHONY ESCALONAr;   N ).0bs     r   	<genexpr>z6BowlingImageExtractor.process_image.<locals>.<genexpr>  s1      NNa.@AfI.M.M.M.M.M.MNNr   z
=== ANTHONY ESCALONA ===zLane: r:   z	Average: r<   z
Handicap: r=   zGame 1: r>   zGame 2: r?   zGame 3: r@   zTotal: rA   )r`   rJ   z
_data.jsonre   r4   )indentz
JSON data saved: )r%   r-   rC   rV   ra   rt   r|   r   nextr   jsondump)	r   r   r   r*   rJ   r`   anthonyresultfs	            r   process_imagez#BowlingImageExtractor.process_image   sz    	/://000 11*==
 	1222F73z??777888 ((44 	2333F-s7||---... ,,Z88
 	M)G)G)GHHH''=c=c=cddd  *6\6\6\]]] 	()))P
<0PPj6NPPQQQ+z&)++,,,1z,/11222.G../// NN7NNNPTUU 	6/00037=1334442gi0223334wz2445555W]3556665W]3556665W]3556664GM244555 %
 

 ]...44 	+Ifa****	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	=M===>>>>>s   /JJJN)r	   )rb   )ru   )r}   )r   )__name__
__module____qualname__r   strr   r-   r   rV   ra   rt   r|   r   r   r   r   r   r   r      s^       P P P P!# !$s) ! ! ! !F7DI 7$t* 7 7 7 7r#T#Y #4 # # # #J5 5$t* 53 5 5 5 5$= =$t* =$ =]` = = = =6"= "=4: "=4 "=VY "= "= "= "=H>? >? >?C >? >? >? >? >? >?r   r   c                  r   t          t          j                  dk     r2t          d           t          d           t          j        d           t          j        d         } t          t          j                  dk    rt          j        d         nd}t                      }|                    | |           dS )z#Main function to run the extractor.r4   zDUsage: python image_to_csv_extractor.py <image_path> [output_prefix]zDExample: python image_to_csv_extractor.py bowling_sheet.jpg my_matchr2   r   N)rC   sysargvr%   exitr   r   )r   r   	extractors      r   mainr   (  s    
38}}qTUUUTUUU!J#&sx==1#4#4CHQKK)M &''IJ66666r   __main__)__doc__r   rm   rE   r   r   typingr   r   r   r   r   r   r   r   r   r   <module>r      s      



 				  



 ' ' ' ' ' ' ' ' ' '      U? U? U? U? U? U? U? U?n7 7 7 zDFFFFF r   