import glob
import os
from distutils.file_util import copy_file
from html.parser import HTMLParser
from bs4 import BeautifulSoup, Comment
from .NodeWrapper import NodeWrapper
[docs]class AttributesParser(HTMLParser):
"""Extends HTMLParser to extract tags with attributes from a given HTML string
Call feed method of HTMLParser to generate data and then retriece it from
the object of the class. Here's an usage example:
attributes_parser = AttributesParser()
attributes_parser.feed("YOUR_HTML_STRING")
tag_with_attributes = attributes_parser.data
print(tag_with_attributes)
Attributes
----------
data : list
Stores the tags with their attributes
"""
[docs] def handle_starttag(self, tag, attrs):
"""Overrides the original handler for start tag and appends the tags to data.
Parameters
----------
tag : str
Name of tag being parsed
attrs : list
List of attrs corresponding to the current tag
"""
attrDict = {}
for attr in attrs:
attrDict[attr[0]] = attr[1]
try:
self.data.append({
tag: attrDict
})
except AttributeError:
self.data = [{
tag: attrDict
}]
[docs]class ReactCodeMapper:
"""Class to convert tags and props from HTML to React
Call getReactMap method for converting tags fed for HTML and get
corresponding React Mapping. Here's an usage example:
reactCodeMapper = ReactCodeMapper(source_dir, destination_dir, props_map)
react_map = reactCodeMapper.getReactMap(tag_with_attributes)
print(react_map)
Attributes
----------
CUSTOM_TAG_HANDLERS : dict
Stores mapping correspoding to tags which are handled seperately.
src_dir : str
Source directory for the HTML codebase.
dest_dir : str
Destination directory for the React codebase.
props_map : dict
Mapping of attrs for HTML to React from props_map.py
add_to_import : list
Stores imports corresponding to variables created during transpilation.
add_variables : list
Stores newly created variables during transpilation.
router_link_imported : bool, optional
Saves wether Link tag needs to be imported for current page.
"""
def __init__(self, src_dir, dest_dir, props_map):
self.src_dir = src_dir
self.dest_dir = dest_dir
self.props_map = props_map
self.add_to_import = []
self.add_variables = []
self.router_link_imported = False
self.__A_TAG_HANDLER = 'A_TAG_HANDLER'
self.__IMAGE_TAG_HANDLER = 'IMAGE_TAG_HANDLER'
self.__SCRIPT_TAG_HANDLER = 'SCRIPT_TAG_HANDLER'
self.__STYLE_TAG_HANDLER = "STYLE_TAG_HANDLER"
self.__LINK_TAG_HANDLER = 'LINK_TAG_HANDLER'
self.CUSTOM_TAG_HANDLERS = {
'a': self.__A_TAG_HANDLER,
'img': self.__IMAGE_TAG_HANDLER,
'script': self.__SCRIPT_TAG_HANDLER,
'style': self.__STYLE_TAG_HANDLER,
'link': self.__LINK_TAG_HANDLER
}
def __getSafeName(self, link):
"""Generates safe name for varibale from path to file.
Parameters
----------
link : str
Path to file for which varibale is created.
Returns
-------
str
Variable name generated from link
"""
varName = ""
for ch in link:
_ch = ch
if not ch.isalnum():
_ch = '_'
varName += _ch
return varName
def __getLinkInfo(self, link, filepath_from_src, no_var=False):
"""Generates link information.
If link is internal corresponding variable name is generated, for
external link it is returned.
Parameters
----------
link : str
Link for filepath or external link.
filepath_from_src : str
Path to file from src.
no_var : bool, optional
To generate import variable or just import file, default is False
i.e. generate variable
Returns
-------
str
Variable name generated from link or link in external case.
"""
if link:
pathToLink = os.path.join(self.src_dir, filepath_from_src, link)
pathToIndexLink = os.path.join(pathToLink, 'index.html')
if os.path.isfile(pathToLink) or os.path.isfile(pathToIndexLink):
var = self.__getSafeName(link)
if no_var:
self.add_to_import.append(
"import '{link}';".format(
link=link
)
)
return None
else:
self.add_to_import.append(
"import {var} from '{link}';".format(
var=var,
link=link
)
)
self.add_variables.append(var)
return "{" + var + "}"
else:
return link
def __getAttrsWithLink(
self, attrs, linkAttr, filepath_from_src, no_var=False
):
"""Generates attrs for tags having links to other files.
If link is internal corresponding variable name is generated, for
external link it is returned.
Parameters
----------
attrs : dict
Attributes of tag to be worked upon.
linkAttr : str
Name of attr that correspond to link of file, example 'src' in
case of script tag
filepath_from_src : str
Path to file from src directory.
no_var : bool, optional
To generate import variable or just import file, default is False
i.e. generate variable
Returns
-------
dict
Final dictonary of attributes with link handled
"""
final_attrs = {}
for attrKey in attrs.keys():
if attrKey == linkAttr:
link_info = self.__getLinkInfo(
attrs[attrKey],
filepath_from_src,
no_var=no_var
)
if link_info is None:
return None
final_attrs[linkAttr] = link_info
else:
final_attrs[attrKey] = attrs[attrKey]
return final_attrs
def __getAttrsForRouterLink(
self, attrs, filepath_from_src
):
"""Generates attrs for A tag having links to other files.
If link is internal that is checked and also link is generated is
generated, for external link it is returned.
Parameters
----------
attrs : dict
Attributes of tag to be worked upon.
filepath_from_src : str
Path to file from src directory.
Returns
-------
tuple
Tuple of final dictonary of attributes with link handled and
information about internal link
"""
final_attrs = {}
is_internal = False
for attrKey in attrs.keys():
if attrKey == "href":
href_info = attrs[attrKey]
pathRef = os.path.join(
self.src_dir, filepath_from_src, href_info
)
pathRefIndex = os.path.join(
self.src_dir, filepath_from_src, href_info, "index.html"
)
if os.path.isfile(pathRef) or os.path.isfile(pathRefIndex):
htmlPath = os.path.normpath(
os.path.join(filepath_from_src, href_info)
)
jsPath = '/'.join(htmlPath.split(os.path.sep))
jsPath = jsPath.replace(".html", "")
if jsPath == "index":
jsPath = "/"
is_internal = True
final_attrs["to"] = jsPath
else:
final_attrs["href"] = href_info
else:
final_attrs[attrKey] = attrs[attrKey]
return final_attrs, is_internal
def __customTagAttrsHandler(self, attrs, tag_handler, filepath_from_src):
"""Custom tag and attributes handler for parsing attrs from CUSTOM_TAG_HANDLERS
Parameters
----------
attrs : dict
Attributes for corresponding tag needed to be handled
tag_handler : str
Tag handler type to be used in mapping
filepath_from_src : str
Path to file from src directory
Returns
-------
dict
Final attributes for that tag, if None is returned delete the tag
"""
final_attrs = {}
if tag_handler == self.__A_TAG_HANDLER:
final_attrs, is_internal_link = self.__getAttrsForRouterLink(
attrs, filepath_from_src
)
if not self.router_link_imported and is_internal_link:
self.add_to_import.append(
'import {Link} from "react-router-dom";'
)
self.router_link_imported = True
elif tag_handler == self.__IMAGE_TAG_HANDLER:
final_attrs = self.__getAttrsWithLink(
attrs, 'src', filepath_from_src
)
elif tag_handler == self.__SCRIPT_TAG_HANDLER:
if 'src' in attrs.keys():
final_attrs = self.__getAttrsWithLink(
attrs, 'src', filepath_from_src
)
else:
return None
elif tag_handler == self.__STYLE_TAG_HANDLER:
return None
elif tag_handler == self.__LINK_TAG_HANDLER:
# css variable was added delete other link tags
if attrs["rel"] == "stylesheet":
final_attrs = self.__getAttrsWithLink(
attrs,
'href',
filepath_from_src,
no_var=True
)
return None
return final_attrs
def __getReactAttrs(self, attrs):
"""Generates renamed attributes correspoding to React, and removes
inline style tags and tags starting with on like onclick etc.
Parameters
----------
attrs : dict
Attributes in HTML format
Returns
-------
dict
Attributes in React format
"""
final_attrs = {}
for attrKey in attrs.keys():
if attrKey == "style":
continue
if attrKey.startswith("on"):
continue
if attrKey in self.props_map:
useKey = self.props_map[attrKey]
else:
useKey = attrKey
final_attrs[useKey] = attrs[attrKey]
return final_attrs
[docs] def getReactMap(self, tags, filepath_from_src):
"""Wrapper to generate React Map object comprising of all data needed
to convert HTML to React
Parameters
----------
tags : dict
HTML attributes extracted using AttributesParser
filepath_from_src : str
Path to file from src directory
Returns
-------
dict
Final mapping of tags with imports and varibles for React, if any
attribute is None then tag needs to be deleted
"""
final_map = {
'imports': [],
'tags': [],
'variables': [],
}
for tag in tags:
tag_name = list(tag.keys())[0]
attrs = self.__getReactAttrs(tag[tag_name])
if tag_name in self.CUSTOM_TAG_HANDLERS:
attrs = self.__customTagAttrsHandler(
attrs,
self.CUSTOM_TAG_HANDLERS[tag_name],
filepath_from_src
)
final_map['tags'].append({tag_name: attrs})
final_map['imports'] = "\n".join(self.add_to_import)
final_map['variables'] = self.add_variables
return final_map
[docs]class Transpiler:
"""Transpiler responsible for translating HTML code to React
Attributes
----------
project_name : str
Name of the project as stored in config
src_dir : str
Path of the source directory within the project directory
dest_dir : str
Path to the transpiled React app within the project directory
index_routes : dict
Stores Routes data corresponding to different pages for index.js
parser : str, optional
Specify which parser to use for reading HTML files, defaults
to "html.parser"
verbose : bool, optional
Specify the verbosity of the transpiler, defaults to False
"""
def __init__(self,
config_settings,
props_map,
verbose=False,
create_project=False):
"""Transpiler initiator takes config settings and unpacks variables.
Parameters
----------
config_settings : dict
project_name, src_dir, dest_dir as dict object stored
in config.json
props_map : dict
Mapping of props for HTML to React used during transpilation
verbose : bool, optional
Specify the verbosity of the transpiler, deafults to False
create_project : bool, optional
Set to True if create project is calling method, deafults to False
Raises
------
RuntimeError
Raised if the config_settings point to non existing dirs.
"""
self.project_name = config_settings["project_name"]
self.src_dir = config_settings["src_dir"]
self.dest_dir = config_settings["dest_dir"]
self.props_map = props_map
self.index_routes = {}
self.parser = "html.parser"
self.verbose = verbose
if create_project:
self.src_dir = os.path.join('.', self.project_name, self.src_dir)
self.dest_dir = os.path.join('.', self.project_name, self.dest_dir)
npm = NodeWrapper()
if not os.path.exists(os.path.join(".", self.src_dir)):
raise RuntimeError(
"Source directory doesn't exist at " +
str(self.src_dir)
)
if not os.path.exists(os.path.join(".", self.dest_dir)):
if create_project:
project_dir = os.path.join(".", self.project_name)
npm.create_react_app(
project_name=self.project_name,
working_dir=project_dir,
rename_to=self.dest_dir
)
else:
npm.create_react_app(
project_name=self.project_name,
rename_to=self.dest_dir
)
# Install NPM packages
npm.install(package_name='react-helmet', working_dir=self.dest_dir)
npm.install(
package_name='react-router-dom',
working_dir=self.dest_dir
)
def __replaceAttrs(self, soup, tag_name, or_attrs, f_attrs):
"""Replaces the attrs for updated tags comparing original and final attrs.
Parameters
----------
soup : BeautifulSoup
bs4.BeautifulSoup passed by reference.
tag_name : str
Name of tag being worked upon.
or_attrs : dict
Dictonary consisting of original attributes of HTML.
f_attrs : dict
Dictonary consisting of final attributes for React.
"""
if or_attrs == f_attrs:
return
htmlTag = soup.find(tag_name, attrs=or_attrs)
upperAttrs = {}
lowerAttrs = {}
if htmlTag is None:
for attr in or_attrs.keys():
upperAttrs[attr] = or_attrs[attr].upper()
lowerAttrs[attr] = or_attrs[attr].lower()
htmlTag = soup.find(tag_name, attrs=upperAttrs)
if htmlTag is None:
htmlTag = soup.find(tag_name, attrs=lowerAttrs)
if not (htmlTag is None):
htmlTag.attrs = f_attrs
if tag_name == "a" and "to" in f_attrs:
htmlTag.name = "Link"
def __deleteTag(self, soup, tag_name, attrs):
"""Deletes the tag corresponding to given tag_name and attrs.
Parameters
----------
soup : BeautifulSoup
bs4.BeautifulSoup passed by reference.
tag_name : str
Name of tag being worked upon.
attrs : dict
Dictonary consisting of original attributes of HTML.
"""
htmlTag = soup.find(tag_name, attrs=attrs)
upperAttrs = {}
lowerAttrs = {}
if htmlTag is None:
for attr in attrs.keys():
upperAttrs[attr] = attrs[attr].upper()
lowerAttrs[attr] = attrs[attr].lower()
htmlTag = soup.find(tag_name, attrs=upperAttrs)
if htmlTag is None:
htmlTag = soup.find(tag_name, attrs=lowerAttrs)
if not (htmlTag is None):
htmlTag.decompose()
def __generateReactFileContent(
self, soup, function_name, filepath_from_src
):
"""Generates React code from HTML soup object.
Parameters
----------
soup : BeautifulSoup
bs4.BeautifulSoup with HTML code to be transpiled.
function_name : str
Function name to be used from filename without extension with
first letter capitalized
filepath_from_src : str
Path to file from src directory
Returns
-------
str
Content for React file.
"""
styleTags = [style.extract() for style in soup.find_all('style')]
scriptTags = [
script.extract() for script in soup.find_all('script', src=False)
]
attributes_parser = AttributesParser()
attributes_parser.feed(soup.prettify())
tag_with_attributes = attributes_parser.data
reactCodeMapper = ReactCodeMapper(
self.src_dir, self.dest_dir, self.props_map
)
react_map = reactCodeMapper.getReactMap(
tag_with_attributes, filepath_from_src
)
final_tags = react_map['tags']
react_variables = react_map['variables']
for orignal_tag, fianl_tag in zip(tag_with_attributes, final_tags):
or_tag_name = list(orignal_tag.keys())[0]
or_attrs = orignal_tag[or_tag_name]
f_tag_name = list(fianl_tag.keys())[0]
f_attrs = fianl_tag[f_tag_name]
if or_tag_name == f_tag_name:
if f_attrs is None:
self.__deleteTag(soup, or_tag_name, or_attrs)
else:
self.__replaceAttrs(soup, or_tag_name, or_attrs, f_attrs)
else:
raise RuntimeWarning(
"There's an error in processing " +
or_tag_name
)
reactHead = None
if soup.head:
soup.head.name = 'Helmet'
reactHead = soup.Helmet
else:
if len(styleTags):
reactHead = soup.new_tag('Helmet')
if len(styleTags):
for style in styleTags:
reactHead.append(style)
contents = soup.body.contents
body_contents = [
x.encode('utf-8').decode("utf-8").strip() for x in contents
]
body_str = "".join(body_contents)
if reactHead:
content_str = reactHead.prettify() + body_str
react_map['imports'] += "import Helmet from 'react-helmet';"
else:
content_str = body_str
for variable in react_variables:
content_str = content_str.replace(
'"{' + variable + '}"',
'{' + variable + '}'
)
if len(scriptTags):
react_map['imports'] += "import React, { useEffect } from 'react';"
scriptContent = ""
for script in scriptTags:
scriptContent += "".join(script.contents)
useEffect = "useEffect(() => {" + scriptContent + "}, []);"
else:
react_map['imports'] += "import React from 'react';"
useEffect = ""
if len(styleTags):
content_str = content_str.replace("<style>", "<style>{`")
content_str = content_str.replace("</style>", "`}</style>")
react_function = "function " + function_name + "() { " + useEffect + \
" return (<>" + content_str + "</>);}"
return """
{imports}
{function}
export default {function_name};
""".format(
function_name=function_name,
function=react_function,
imports=react_map['imports']
)
def __getReactComponentName(self, link):
"""Generates safe name for React compnents from path to file.
Parameters
----------
link : str
Path to file for which varibale is created.
Returns
-------
str
Variable name generated from link
"""
varName = ""
for ch in link:
_ch = ch
if not ch.isalnum():
_ch = '_'
varName += _ch
return "REACTONITE" + varName.upper()
def __generateIndexJsContent(self):
"""Generates content for index.js file in React codebase with handled routes
Returns
-------
str
Content for index.js file in React codebase
"""
router = """import {
BrowserRouter as Router,
Switch,
Route
} from "react-router-dom";"""
imports = []
routes = []
for link, path in self.index_routes.items():
componentName = self.__getReactComponentName(path)
importReact = 'import ' + componentName + ' from "' + path + '";'
imports.append(importReact)
routeReact = """
<Route path="/{link}">
<{componentName} />
</Route>
""".format(link=link, componentName=componentName)
routes.append(routeReact)
imports = '\n'.join(imports)
routes = '\n'.join(routes)
return """
import React from "react";
import ReactDOM from "react-dom";
import * as serviceWorkerRegistration from \
"./serviceWorkerRegistration";
import reportWebVitals from "./reportWebVitals";
{router}
import App from "./App";
{imports}
ReactDOM.render(
<Router>
<Switch>
{routes}
<Route path="/">
<App />
</Route>
</Switch>
</Router>,
document.getElementById("root")
);
// If you don't want your app to work offline, you can change
// register() to unregister() below. Note this comes with some
// pitfalls. Learn more about service workers: https://cra.link/PWA
serviceWorkerRegistration.register();
// If you want to start measuring performance in your app, pass a
// function to log results (for example: reportWebVitals(console.log))
// or send to analytics endpoint. Learn more: https://bit.ly/CRA-vitals
reportWebVitals();
""".format(imports=imports, routes=routes, router=router)
def __rebuildIndexJs(self):
"""Generates the index.js for React apps entry point, needed to handle
links to pages
Raises
------
RuntimeError
Raised if the index.js file is not found in dest_dir
"""
pathToIndexJs = os.path.join(self.dest_dir, 'src', 'index.js')
if not os.path.isfile(pathToIndexJs):
raise RuntimeError("Looks like you are missing index.js file in \
React directory! It seems to be an NPM/React issue rather.")
with open(pathToIndexJs, 'w') as outfile:
file_content = self.__generateIndexJsContent()
outfile.write(file_content)
NodeWrapper().prettify(path=pathToIndexJs)
def __addRoutesToIndexLinkArray(self, filePathFromSrc, filenameNoExt):
"""Adds links to self.index_routes to be used in index.js generation
Parameters
----------
filePathFromSrc : str
Path to the folder where file is in dest_dir folder from src
filenameNoExt : str
Filename with no extension
"""
if filenameNoExt == "index":
htmlPath = os.path.normpath(filePathFromSrc)
jsPath = '/'.join(htmlPath.split(os.path.sep))
self.index_routes[jsPath] = "./" + jsPath + "/index"
else:
htmlPath = os.path.normpath(os.path.join(
filePathFromSrc, filenameNoExt
))
jsPath = '/'.join(htmlPath.split(os.path.sep))
self.index_routes[jsPath] = "./" + jsPath
[docs] def transpileFile(self, filepath):
"""Transpiles the source HTML file given at the given filepath
to a React code, which is then copied over to the React build
directory, if not HTML file then get's copied directly.
Parameters
----------
filepath : str
Path to the source HTML file which is to be transpiled
Raises
------
RuntimeError
Raised if the source html file is not found
"""
filePathFromSrc, _ = os.path.split(filepath[filepath.find('src') + 4:])
_, filename = os.path.split(filepath)
filenameWithNoExtension, file_extension = os.path.splitext(filename)
if file_extension != ".html":
dest_filepath = os.path.join(
self.dest_dir, 'src', filePathFromSrc, filename
)
if self.verbose:
print(
"Copying file " + str(filepath) +
" -> " + str(dest_filepath)
)
os.makedirs(os.path.dirname(dest_filepath), exist_ok=True)
copy_file(filepath, dest_filepath)
return
if not os.path.isfile(filepath):
raise RuntimeError("{} file not found".format(filepath))
is_entry_point = False
entry_point_html = os.path.join(self.src_dir, 'index.html')
if entry_point_html == filepath:
is_entry_point = True
filenameWithNoExtension = "App"
filename = filenameWithNoExtension + ".js"
if not os.path.isdir(os.path.join(self.dest_dir, 'src')):
raise RuntimeError("Looks like your React project didn't get \
created please check your " + self.dest_dir + " for a src \
folder")
dest_filepath = os.path.join(
self.dest_dir, 'src', filePathFromSrc, filename
)
if self.verbose:
print(
"Transpiling file " + str(filepath) +
" -> " + str(dest_filepath)
)
with open(filepath, 'r') as index:
soup = BeautifulSoup(index, self.parser)
# Remove all comments
comments = soup.findAll(text=lambda text: isinstance(text, Comment))
[comment.extract() for comment in comments]
os.makedirs(os.path.dirname(dest_filepath), exist_ok=True)
with open(dest_filepath, 'w') as outfile:
file_content = self.__generateReactFileContent(
soup,
filenameWithNoExtension.capitalize(),
filePathFromSrc
)
outfile.write(file_content)
NodeWrapper().prettify(path=dest_filepath)
if not is_entry_point:
self.__addRoutesToIndexLinkArray(
filePathFromSrc, filenameWithNoExtension
)
[docs] def transpile_project(self, copy_static=True):
"""Runs initial checks like ensuring the source
directories exist, and the source file is present.
After that, copies non html files and transpiles the source.
Parameters
----------
copy_static : bool, optional
Will copy non .html files if True, only .html files will be
transpiled if False, default True
Raises
------
RuntimeError
Raised source html file is missing.
"""
entry_point_html = os.path.join(self.src_dir, 'index.html')
if not os.path.isfile(entry_point_html):
raise RuntimeError(
"Entry point file doesn't exist at " +
str(entry_point_html)
)
if self.verbose:
print("Transpiling files...")
for filepath in glob.iglob(self.src_dir + '**/**', recursive=True):
if os.path.isfile(filepath):
_, filename = os.path.split(filepath)
_, file_extension = os.path.splitext(filename)
if file_extension == ".html" or copy_static:
self.transpileFile(
filepath
)
self.__rebuildIndexJs()