2023-11-11 10:50:33 +01:00
# Copyright 2023 Nick Brassel (@tzarc)
# SPDX-License-Identifier: GPL-2.0-or-later
import re
from milc import cli
from qmk . constants import LICENSE_TEXTS
2024-04-23 11:38:47 +02:00
from qmk . path import normpath
2023-11-11 10:50:33 +01:00
L_PAREN = re . compile ( r ' \ ( \ [ \ { \ < ' )
R_PAREN = re . compile ( r ' \ ) \ ] \ } \ > ' )
PUNCTUATION = re . compile ( r ' [ \ .,;:]+ ' )
TRASH_PREFIX = re . compile ( r ' ^( \ s|/| \ *|#)+ ' )
TRASH_SUFFIX = re . compile ( r ' ( \ s|/| \ *|#| \\ )+$ ' )
SPACE = re . compile ( r ' \ s+ ' )
SUFFIXES = [ ' .c ' , ' .h ' , ' .cpp ' , ' .cxx ' , ' .hpp ' , ' .hxx ' ]
def _simplify_text ( input ) :
lines = input . lower ( ) . split ( ' \n ' )
lines = [ PUNCTUATION . sub ( ' ' , line ) for line in lines ]
lines = [ TRASH_PREFIX . sub ( ' ' , line ) for line in lines ]
lines = [ TRASH_SUFFIX . sub ( ' ' , line ) for line in lines ]
lines = [ SPACE . sub ( ' ' , line ) for line in lines ]
lines = [ L_PAREN . sub ( ' ( ' , line ) for line in lines ]
lines = [ R_PAREN . sub ( ' ) ' , line ) for line in lines ]
lines = [ line . strip ( ) for line in lines ]
lines = [ line for line in lines if line is not None and line != ' ' ]
return ' ' . join ( lines )
2024-04-23 11:38:47 +02:00
def _preformat_license_texts ( ) :
# Pre-format all the licenses
for _ , long_licenses in LICENSE_TEXTS :
for i in range ( len ( long_licenses ) ) :
long_licenses [ i ] = _simplify_text ( long_licenses [ i ] )
def _determine_suffix_condition ( extensions ) :
def _default_suffix_condition ( s ) :
return s in SUFFIXES
conditional = _default_suffix_condition
if extensions is not None and len ( extensions ) > 0 :
suffixes = [ f ' . { s } ' if not s . startswith ( ' . ' ) else s for s in extensions ]
def _specific_suffix_condition ( s ) :
return s in suffixes
conditional = _specific_suffix_condition
return conditional
def _determine_file_list ( inputs , conditional ) :
check_list = set ( )
for filename in inputs :
if filename . is_dir ( ) :
for file in sorted ( filename . rglob ( ' * ' ) ) :
if file . is_file ( ) and conditional ( file . suffix ) :
check_list . add ( file )
elif filename . is_file ( ) :
if conditional ( filename . suffix ) :
check_list . add ( filename )
return list ( sorted ( check_list ) )
def _detect_license_from_file_contents ( filename , absolute = False , short = False ) :
2023-11-11 10:50:33 +01:00
data = filename . read_text ( encoding = ' utf-8 ' , errors = ' ignore ' )
filename_out = str ( filename . absolute ( ) ) if absolute else str ( filename )
if ' SPDX-License-Identifier: ' in data :
res = data . split ( ' SPDX-License-Identifier: ' )
license = re . split ( r ' \ s|//| \ * ' , res [ 1 ] . strip ( ) ) [ 0 ] . strip ( )
found = False
for short_license , _ in LICENSE_TEXTS :
if license . lower ( ) == short_license . lower ( ) :
license = short_license
found = True
break
if not found :
2024-04-23 11:38:47 +02:00
if short :
2023-11-11 10:50:33 +01:00
print ( f ' { filename_out } UNKNOWN ' )
else :
cli . log . error ( f ' {{ fg_cyan }} { filename_out } {{ fg_reset }} -- unknown license, or no license detected! ' )
return False
2024-04-23 11:38:47 +02:00
if short :
2023-11-11 10:50:33 +01:00
print ( f ' { filename_out } { license } ' )
else :
cli . log . info ( f ' {{ fg_cyan }} { filename_out } {{ fg_reset }} -- license detected: { license } (SPDX License Identifier) ' )
return True
else :
simple_text = _simplify_text ( data )
for short_license , long_licenses in LICENSE_TEXTS :
for long_license in long_licenses :
if long_license in simple_text :
2024-04-23 11:38:47 +02:00
if short :
2023-11-11 10:50:33 +01:00
print ( f ' { filename_out } { short_license } ' )
else :
cli . log . info ( f ' {{ fg_cyan }} { filename_out } {{ fg_reset }} -- license detected: { short_license } (Full text) ' )
return True
2024-04-23 11:38:47 +02:00
if short :
2023-11-11 10:50:33 +01:00
print ( f ' { filename_out } UNKNOWN ' )
else :
cli . log . error ( f ' {{ fg_cyan }} { filename_out } {{ fg_reset }} -- unknown license, or no license detected! ' )
return False
2024-04-23 11:38:47 +02:00
@cli.argument ( ' inputs ' , nargs = ' * ' , arg_only = True , type = normpath , help = ' List of input files or directories. ' )
2023-11-11 10:50:33 +01:00
@cli.argument ( ' -s ' , ' --short ' , action = ' store_true ' , help = ' Short output. ' )
@cli.argument ( ' -a ' , ' --absolute ' , action = ' store_true ' , help = ' Print absolute paths. ' )
@cli.argument ( ' -e ' , ' --extension ' , arg_only = True , action = ' append ' , default = [ ] , help = ' Override list of extensions. Can be specified multiple times for multiple extensions. ' )
@cli.subcommand ( ' File license check. ' , hidden = False if cli . config . user . developer else True )
def license_check ( cli ) :
2024-04-23 11:38:47 +02:00
_preformat_license_texts ( )
2023-11-11 10:50:33 +01:00
2024-04-23 11:38:47 +02:00
conditional = _determine_suffix_condition ( cli . args . extension )
check_list = _determine_file_list ( cli . args . inputs , conditional )
2023-11-11 10:50:33 +01:00
failed = False
for filename in sorted ( check_list ) :
2024-04-23 11:38:47 +02:00
if not _detect_license_from_file_contents ( filename , absolute = cli . args . absolute , short = cli . args . short ) :
2023-11-11 10:50:33 +01:00
failed = True
if failed :
return False