Source code for reactonite.Transpiler

import glob
import os
from distutils.file_util import copy_file
from html.parser import HTMLParser

from bs4 import BeautifulSoup, Comment

from .NodeWrapper import NodeWrapper


[docs]class AttributesParser(HTMLParser): """Extends HTMLParser to extract tags with attributes from a given HTML string Call feed method of HTMLParser to generate data and then retriece it from the object of the class. Here's an usage example: attributes_parser = AttributesParser() attributes_parser.feed("YOUR_HTML_STRING") tag_with_attributes = attributes_parser.data print(tag_with_attributes) Attributes ---------- data : list Stores the tags with their attributes """
[docs] def handle_starttag(self, tag, attrs): """Overrides the original handler for start tag and appends the tags to data. Parameters ---------- tag : str Name of tag being parsed attrs : list List of attrs corresponding to the current tag """ attrDict = {} for attr in attrs: attrDict[attr[0]] = attr[1] try: self.data.append({ tag: attrDict }) except AttributeError: self.data = [{ tag: attrDict }]
[docs]class ReactCodeMapper: """Class to convert tags and props from HTML to React Call getReactMap method for converting tags fed for HTML and get corresponding React Mapping. Here's an usage example: reactCodeMapper = ReactCodeMapper(source_dir, destination_dir, props_map) react_map = reactCodeMapper.getReactMap(tag_with_attributes) print(react_map) Attributes ---------- CUSTOM_TAG_HANDLERS : dict Stores mapping correspoding to tags which are handled seperately. src_dir : str Source directory for the HTML codebase. dest_dir : str Destination directory for the React codebase. props_map : dict Mapping of attrs for HTML to React from props_map.py add_to_import : list Stores imports corresponding to variables created during transpilation. add_variables : list Stores newly created variables during transpilation. router_link_imported : bool, optional Saves wether Link tag needs to be imported for current page. """ def __init__(self, src_dir, dest_dir, props_map): self.src_dir = src_dir self.dest_dir = dest_dir self.props_map = props_map self.add_to_import = [] self.add_variables = [] self.router_link_imported = False self.__A_TAG_HANDLER = 'A_TAG_HANDLER' self.__IMAGE_TAG_HANDLER = 'IMAGE_TAG_HANDLER' self.__SCRIPT_TAG_HANDLER = 'SCRIPT_TAG_HANDLER' self.__STYLE_TAG_HANDLER = "STYLE_TAG_HANDLER" self.__LINK_TAG_HANDLER = 'LINK_TAG_HANDLER' self.CUSTOM_TAG_HANDLERS = { 'a': self.__A_TAG_HANDLER, 'img': self.__IMAGE_TAG_HANDLER, 'script': self.__SCRIPT_TAG_HANDLER, 'style': self.__STYLE_TAG_HANDLER, 'link': self.__LINK_TAG_HANDLER } def __getSafeName(self, link): """Generates safe name for varibale from path to file. Parameters ---------- link : str Path to file for which varibale is created. Returns ------- str Variable name generated from link """ varName = "" for ch in link: _ch = ch if not ch.isalnum(): _ch = '_' varName += _ch return varName def __getLinkInfo(self, link, filepath_from_src, no_var=False): """Generates link information. If link is internal corresponding variable name is generated, for external link it is returned. Parameters ---------- link : str Link for filepath or external link. filepath_from_src : str Path to file from src. no_var : bool, optional To generate import variable or just import file, default is False i.e. generate variable Returns ------- str Variable name generated from link or link in external case. """ if link: pathToLink = os.path.join(self.src_dir, filepath_from_src, link) pathToIndexLink = os.path.join(pathToLink, 'index.html') if os.path.isfile(pathToLink) or os.path.isfile(pathToIndexLink): var = self.__getSafeName(link) if no_var: self.add_to_import.append( "import '{link}';".format( link=link ) ) return None else: self.add_to_import.append( "import {var} from '{link}';".format( var=var, link=link ) ) self.add_variables.append(var) return "{" + var + "}" else: return link def __getAttrsWithLink( self, attrs, linkAttr, filepath_from_src, no_var=False ): """Generates attrs for tags having links to other files. If link is internal corresponding variable name is generated, for external link it is returned. Parameters ---------- attrs : dict Attributes of tag to be worked upon. linkAttr : str Name of attr that correspond to link of file, example 'src' in case of script tag filepath_from_src : str Path to file from src directory. no_var : bool, optional To generate import variable or just import file, default is False i.e. generate variable Returns ------- dict Final dictonary of attributes with link handled """ final_attrs = {} for attrKey in attrs.keys(): if attrKey == linkAttr: link_info = self.__getLinkInfo( attrs[attrKey], filepath_from_src, no_var=no_var ) if link_info is None: return None final_attrs[linkAttr] = link_info else: final_attrs[attrKey] = attrs[attrKey] return final_attrs def __getAttrsForRouterLink( self, attrs, filepath_from_src ): """Generates attrs for A tag having links to other files. If link is internal that is checked and also link is generated is generated, for external link it is returned. Parameters ---------- attrs : dict Attributes of tag to be worked upon. filepath_from_src : str Path to file from src directory. Returns ------- tuple Tuple of final dictonary of attributes with link handled and information about internal link """ final_attrs = {} is_internal = False for attrKey in attrs.keys(): if attrKey == "href": href_info = attrs[attrKey] pathRef = os.path.join( self.src_dir, filepath_from_src, href_info ) pathRefIndex = os.path.join( self.src_dir, filepath_from_src, href_info, "index.html" ) if os.path.isfile(pathRef) or os.path.isfile(pathRefIndex): htmlPath = os.path.normpath( os.path.join(filepath_from_src, href_info) ) jsPath = '/'.join(htmlPath.split(os.path.sep)) jsPath = jsPath.replace(".html", "") if jsPath == "index": jsPath = "/" is_internal = True final_attrs["to"] = jsPath else: final_attrs["href"] = href_info else: final_attrs[attrKey] = attrs[attrKey] return final_attrs, is_internal def __customTagAttrsHandler(self, attrs, tag_handler, filepath_from_src): """Custom tag and attributes handler for parsing attrs from CUSTOM_TAG_HANDLERS Parameters ---------- attrs : dict Attributes for corresponding tag needed to be handled tag_handler : str Tag handler type to be used in mapping filepath_from_src : str Path to file from src directory Returns ------- dict Final attributes for that tag, if None is returned delete the tag """ final_attrs = {} if tag_handler == self.__A_TAG_HANDLER: final_attrs, is_internal_link = self.__getAttrsForRouterLink( attrs, filepath_from_src ) if not self.router_link_imported and is_internal_link: self.add_to_import.append( 'import {Link} from "react-router-dom";' ) self.router_link_imported = True elif tag_handler == self.__IMAGE_TAG_HANDLER: final_attrs = self.__getAttrsWithLink( attrs, 'src', filepath_from_src ) elif tag_handler == self.__SCRIPT_TAG_HANDLER: if 'src' in attrs.keys(): final_attrs = self.__getAttrsWithLink( attrs, 'src', filepath_from_src ) else: return None elif tag_handler == self.__STYLE_TAG_HANDLER: return None elif tag_handler == self.__LINK_TAG_HANDLER: # css variable was added delete other link tags if attrs["rel"] == "stylesheet": final_attrs = self.__getAttrsWithLink( attrs, 'href', filepath_from_src, no_var=True ) return None return final_attrs def __getReactAttrs(self, attrs): """Generates renamed attributes correspoding to React, and removes inline style tags and tags starting with on like onclick etc. Parameters ---------- attrs : dict Attributes in HTML format Returns ------- dict Attributes in React format """ final_attrs = {} for attrKey in attrs.keys(): if attrKey == "style": continue if attrKey.startswith("on"): continue if attrKey in self.props_map: useKey = self.props_map[attrKey] else: useKey = attrKey final_attrs[useKey] = attrs[attrKey] return final_attrs
[docs] def getReactMap(self, tags, filepath_from_src): """Wrapper to generate React Map object comprising of all data needed to convert HTML to React Parameters ---------- tags : dict HTML attributes extracted using AttributesParser filepath_from_src : str Path to file from src directory Returns ------- dict Final mapping of tags with imports and varibles for React, if any attribute is None then tag needs to be deleted """ final_map = { 'imports': [], 'tags': [], 'variables': [], } for tag in tags: tag_name = list(tag.keys())[0] attrs = self.__getReactAttrs(tag[tag_name]) if tag_name in self.CUSTOM_TAG_HANDLERS: attrs = self.__customTagAttrsHandler( attrs, self.CUSTOM_TAG_HANDLERS[tag_name], filepath_from_src ) final_map['tags'].append({tag_name: attrs}) final_map['imports'] = "\n".join(self.add_to_import) final_map['variables'] = self.add_variables return final_map
[docs]class Transpiler: """Transpiler responsible for translating HTML code to React Attributes ---------- project_name : str Name of the project as stored in config src_dir : str Path of the source directory within the project directory dest_dir : str Path to the transpiled React app within the project directory index_routes : dict Stores Routes data corresponding to different pages for index.js parser : str, optional Specify which parser to use for reading HTML files, defaults to "html.parser" verbose : bool, optional Specify the verbosity of the transpiler, defaults to False """ def __init__(self, config_settings, props_map, verbose=False, create_project=False): """Transpiler initiator takes config settings and unpacks variables. Parameters ---------- config_settings : dict project_name, src_dir, dest_dir as dict object stored in config.json props_map : dict Mapping of props for HTML to React used during transpilation verbose : bool, optional Specify the verbosity of the transpiler, deafults to False create_project : bool, optional Set to True if create project is calling method, deafults to False Raises ------ RuntimeError Raised if the config_settings point to non existing dirs. """ self.project_name = config_settings["project_name"] self.src_dir = config_settings["src_dir"] self.dest_dir = config_settings["dest_dir"] self.props_map = props_map self.index_routes = {} self.parser = "html.parser" self.verbose = verbose if create_project: self.src_dir = os.path.join('.', self.project_name, self.src_dir) self.dest_dir = os.path.join('.', self.project_name, self.dest_dir) npm = NodeWrapper() if not os.path.exists(os.path.join(".", self.src_dir)): raise RuntimeError( "Source directory doesn't exist at " + str(self.src_dir) ) if not os.path.exists(os.path.join(".", self.dest_dir)): if create_project: project_dir = os.path.join(".", self.project_name) npm.create_react_app( project_name=self.project_name, working_dir=project_dir, rename_to=self.dest_dir ) else: npm.create_react_app( project_name=self.project_name, rename_to=self.dest_dir ) # Install NPM packages npm.install(package_name='react-helmet', working_dir=self.dest_dir) npm.install( package_name='react-router-dom', working_dir=self.dest_dir ) def __replaceAttrs(self, soup, tag_name, or_attrs, f_attrs): """Replaces the attrs for updated tags comparing original and final attrs. Parameters ---------- soup : BeautifulSoup bs4.BeautifulSoup passed by reference. tag_name : str Name of tag being worked upon. or_attrs : dict Dictonary consisting of original attributes of HTML. f_attrs : dict Dictonary consisting of final attributes for React. """ if or_attrs == f_attrs: return htmlTag = soup.find(tag_name, attrs=or_attrs) upperAttrs = {} lowerAttrs = {} if htmlTag is None: for attr in or_attrs.keys(): upperAttrs[attr] = or_attrs[attr].upper() lowerAttrs[attr] = or_attrs[attr].lower() htmlTag = soup.find(tag_name, attrs=upperAttrs) if htmlTag is None: htmlTag = soup.find(tag_name, attrs=lowerAttrs) if not (htmlTag is None): htmlTag.attrs = f_attrs if tag_name == "a" and "to" in f_attrs: htmlTag.name = "Link" def __deleteTag(self, soup, tag_name, attrs): """Deletes the tag corresponding to given tag_name and attrs. Parameters ---------- soup : BeautifulSoup bs4.BeautifulSoup passed by reference. tag_name : str Name of tag being worked upon. attrs : dict Dictonary consisting of original attributes of HTML. """ htmlTag = soup.find(tag_name, attrs=attrs) upperAttrs = {} lowerAttrs = {} if htmlTag is None: for attr in attrs.keys(): upperAttrs[attr] = attrs[attr].upper() lowerAttrs[attr] = attrs[attr].lower() htmlTag = soup.find(tag_name, attrs=upperAttrs) if htmlTag is None: htmlTag = soup.find(tag_name, attrs=lowerAttrs) if not (htmlTag is None): htmlTag.decompose() def __generateReactFileContent( self, soup, function_name, filepath_from_src ): """Generates React code from HTML soup object. Parameters ---------- soup : BeautifulSoup bs4.BeautifulSoup with HTML code to be transpiled. function_name : str Function name to be used from filename without extension with first letter capitalized filepath_from_src : str Path to file from src directory Returns ------- str Content for React file. """ styleTags = [style.extract() for style in soup.find_all('style')] scriptTags = [ script.extract() for script in soup.find_all('script', src=False) ] attributes_parser = AttributesParser() attributes_parser.feed(soup.prettify()) tag_with_attributes = attributes_parser.data reactCodeMapper = ReactCodeMapper( self.src_dir, self.dest_dir, self.props_map ) react_map = reactCodeMapper.getReactMap( tag_with_attributes, filepath_from_src ) final_tags = react_map['tags'] react_variables = react_map['variables'] for orignal_tag, fianl_tag in zip(tag_with_attributes, final_tags): or_tag_name = list(orignal_tag.keys())[0] or_attrs = orignal_tag[or_tag_name] f_tag_name = list(fianl_tag.keys())[0] f_attrs = fianl_tag[f_tag_name] if or_tag_name == f_tag_name: if f_attrs is None: self.__deleteTag(soup, or_tag_name, or_attrs) else: self.__replaceAttrs(soup, or_tag_name, or_attrs, f_attrs) else: raise RuntimeWarning( "There's an error in processing " + or_tag_name ) reactHead = None if soup.head: soup.head.name = 'Helmet' reactHead = soup.Helmet else: if len(styleTags): reactHead = soup.new_tag('Helmet') if len(styleTags): for style in styleTags: reactHead.append(style) contents = soup.body.contents body_contents = [ x.encode('utf-8').decode("utf-8").strip() for x in contents ] body_str = "".join(body_contents) if reactHead: content_str = reactHead.prettify() + body_str react_map['imports'] += "import Helmet from 'react-helmet';" else: content_str = body_str for variable in react_variables: content_str = content_str.replace( '"{' + variable + '}"', '{' + variable + '}' ) if len(scriptTags): react_map['imports'] += "import React, { useEffect } from 'react';" scriptContent = "" for script in scriptTags: scriptContent += "".join(script.contents) useEffect = "useEffect(() => {" + scriptContent + "}, []);" else: react_map['imports'] += "import React from 'react';" useEffect = "" if len(styleTags): content_str = content_str.replace("<style>", "<style>{`") content_str = content_str.replace("</style>", "`}</style>") react_function = "function " + function_name + "() { " + useEffect + \ " return (<>" + content_str + "</>);}" return """ {imports} {function} export default {function_name}; """.format( function_name=function_name, function=react_function, imports=react_map['imports'] ) def __getReactComponentName(self, link): """Generates safe name for React compnents from path to file. Parameters ---------- link : str Path to file for which varibale is created. Returns ------- str Variable name generated from link """ varName = "" for ch in link: _ch = ch if not ch.isalnum(): _ch = '_' varName += _ch return "REACTONITE" + varName.upper() def __generateIndexJsContent(self): """Generates content for index.js file in React codebase with handled routes Returns ------- str Content for index.js file in React codebase """ router = """import { BrowserRouter as Router, Switch, Route } from "react-router-dom";""" imports = [] routes = [] for link, path in self.index_routes.items(): componentName = self.__getReactComponentName(path) importReact = 'import ' + componentName + ' from "' + path + '";' imports.append(importReact) routeReact = """ <Route path="/{link}"> <{componentName} /> </Route> """.format(link=link, componentName=componentName) routes.append(routeReact) imports = '\n'.join(imports) routes = '\n'.join(routes) return """ import React from "react"; import ReactDOM from "react-dom"; import * as serviceWorkerRegistration from \ "./serviceWorkerRegistration"; import reportWebVitals from "./reportWebVitals"; {router} import App from "./App"; {imports} ReactDOM.render( <Router> <Switch> {routes} <Route path="/"> <App /> </Route> </Switch> </Router>, document.getElementById("root") ); // If you don't want your app to work offline, you can change // register() to unregister() below. Note this comes with some // pitfalls. Learn more about service workers: https://cra.link/PWA serviceWorkerRegistration.register(); // If you want to start measuring performance in your app, pass a // function to log results (for example: reportWebVitals(console.log)) // or send to analytics endpoint. Learn more: https://bit.ly/CRA-vitals reportWebVitals(); """.format(imports=imports, routes=routes, router=router) def __rebuildIndexJs(self): """Generates the index.js for React apps entry point, needed to handle links to pages Raises ------ RuntimeError Raised if the index.js file is not found in dest_dir """ pathToIndexJs = os.path.join(self.dest_dir, 'src', 'index.js') if not os.path.isfile(pathToIndexJs): raise RuntimeError("Looks like you are missing index.js file in \ React directory! It seems to be an NPM/React issue rather.") with open(pathToIndexJs, 'w') as outfile: file_content = self.__generateIndexJsContent() outfile.write(file_content) NodeWrapper().prettify(path=pathToIndexJs) def __addRoutesToIndexLinkArray(self, filePathFromSrc, filenameNoExt): """Adds links to self.index_routes to be used in index.js generation Parameters ---------- filePathFromSrc : str Path to the folder where file is in dest_dir folder from src filenameNoExt : str Filename with no extension """ if filenameNoExt == "index": htmlPath = os.path.normpath(filePathFromSrc) jsPath = '/'.join(htmlPath.split(os.path.sep)) self.index_routes[jsPath] = "./" + jsPath + "/index" else: htmlPath = os.path.normpath(os.path.join( filePathFromSrc, filenameNoExt )) jsPath = '/'.join(htmlPath.split(os.path.sep)) self.index_routes[jsPath] = "./" + jsPath
[docs] def transpileFile(self, filepath): """Transpiles the source HTML file given at the given filepath to a React code, which is then copied over to the React build directory, if not HTML file then get's copied directly. Parameters ---------- filepath : str Path to the source HTML file which is to be transpiled Raises ------ RuntimeError Raised if the source html file is not found """ filePathFromSrc, _ = os.path.split(filepath[filepath.find('src') + 4:]) _, filename = os.path.split(filepath) filenameWithNoExtension, file_extension = os.path.splitext(filename) if file_extension != ".html": dest_filepath = os.path.join( self.dest_dir, 'src', filePathFromSrc, filename ) if self.verbose: print( "Copying file " + str(filepath) + " -> " + str(dest_filepath) ) os.makedirs(os.path.dirname(dest_filepath), exist_ok=True) copy_file(filepath, dest_filepath) return if not os.path.isfile(filepath): raise RuntimeError("{} file not found".format(filepath)) is_entry_point = False entry_point_html = os.path.join(self.src_dir, 'index.html') if entry_point_html == filepath: is_entry_point = True filenameWithNoExtension = "App" filename = filenameWithNoExtension + ".js" if not os.path.isdir(os.path.join(self.dest_dir, 'src')): raise RuntimeError("Looks like your React project didn't get \ created please check your " + self.dest_dir + " for a src \ folder") dest_filepath = os.path.join( self.dest_dir, 'src', filePathFromSrc, filename ) if self.verbose: print( "Transpiling file " + str(filepath) + " -> " + str(dest_filepath) ) with open(filepath, 'r') as index: soup = BeautifulSoup(index, self.parser) # Remove all comments comments = soup.findAll(text=lambda text: isinstance(text, Comment)) [comment.extract() for comment in comments] os.makedirs(os.path.dirname(dest_filepath), exist_ok=True) with open(dest_filepath, 'w') as outfile: file_content = self.__generateReactFileContent( soup, filenameWithNoExtension.capitalize(), filePathFromSrc ) outfile.write(file_content) NodeWrapper().prettify(path=dest_filepath) if not is_entry_point: self.__addRoutesToIndexLinkArray( filePathFromSrc, filenameWithNoExtension )
[docs] def transpile_project(self, copy_static=True): """Runs initial checks like ensuring the source directories exist, and the source file is present. After that, copies non html files and transpiles the source. Parameters ---------- copy_static : bool, optional Will copy non .html files if True, only .html files will be transpiled if False, default True Raises ------ RuntimeError Raised source html file is missing. """ entry_point_html = os.path.join(self.src_dir, 'index.html') if not os.path.isfile(entry_point_html): raise RuntimeError( "Entry point file doesn't exist at " + str(entry_point_html) ) if self.verbose: print("Transpiling files...") for filepath in glob.iglob(self.src_dir + '**/**', recursive=True): if os.path.isfile(filepath): _, filename = os.path.split(filepath) _, file_extension = os.path.splitext(filename) if file_extension == ".html" or copy_static: self.transpileFile( filepath ) self.__rebuildIndexJs()