Source code for pyteck.eval_model

# Python 2 compatibility
from __future__ import print_function
from __future__ import division

# Standard libraries
import os
from os.path import splitext, basename
import multiprocessing
import warnings

import numpy
from scipy.interpolate import UnivariateSpline

try:
    import yaml
except ImportError:
    print('Warning: YAML must be installed to read input file.')
    raise

from pyked.chemked import ChemKED, DataPoint

# Local imports
from .utils import units
from .simulation import Simulation

min_deviation = 0.10
"""float: minimum allowable standard deviation for experimental data"""


[docs]def create_simulations(dataset, properties): """Set up individual simulations for each ignition delay value. Parameters ---------- dataset : properties : pyked.chemked.ChemKED ChemKED object with full set of experimental properties Returns ------- simulations : list List of :class:`Simulation` objects for each simulation """ simulations = [] for idx, case in enumerate(properties.datapoints): sim_meta = {} # Common metadata sim_meta['data-file'] = dataset sim_meta['id'] = splitext(basename(dataset))[0] + '_' + str(idx) simulations.append(Simulation(properties.experiment_type, properties.apparatus.kind, sim_meta, case ) ) return simulations
[docs]def simulation_worker(sim_tuple): """Worker for multiprocessing of simulation cases. Parameters ---------- sim_tuple : tuple Contains Simulation object and other parameters needed to setup and run case. Returns ------- sim : ``Simulation`` Simulation case with calculated ignition delay. """ sim, model_file, model_spec_key, path, restart = sim_tuple sim.setup_case(model_file, model_spec_key, path) sim.run_case(restart) sim = Simulation(sim.kind, sim.apparatus, sim.meta, sim.properties) return sim
[docs]def estimate_std_dev(indep_variable, dep_variable): """ Parameters ---------- indep_variable : ndarray, list(float) Independent variable (e.g., temperature, pressure) dep_variable : ndarray, list(float) Dependent variable (e.g., ignition delay) Returns ------- standard_dev : float Standard deviation of difference between data and best-fit line """ assert len(indep_variable) == len(dep_variable), \ 'independent and dependent variables not the same length' # ensure no repetition of independent variable by taking average of associated dependent # variables and removing duplicates vals, count = numpy.unique(indep_variable, return_counts=True) repeated = vals[count > 1] for val in repeated: idx, = numpy.where(indep_variable == val) dep_variable[idx[0]] = numpy.mean(dep_variable[idx]) dep_variable = numpy.delete(dep_variable, idx[1:]) indep_variable = numpy.delete(indep_variable, idx[1:]) # ensure data sorted based on independent variable to avoid some problems sorted_vars = sorted(zip(indep_variable, dep_variable)) indep_variable = [pt[0] for pt in sorted_vars] dep_variable = [pt[1] for pt in sorted_vars] # spline fit of the data if len(indep_variable) == 1 or len(indep_variable) == 2: # Fit of data will be perfect return min_deviation elif len(indep_variable) == 3: spline = UnivariateSpline(indep_variable, dep_variable, k=2) else: spline = UnivariateSpline(indep_variable, dep_variable) standard_dev = numpy.std(dep_variable - spline(indep_variable)) if standard_dev < min_deviation: print('Standard deviation of {:.2f} too low, ' 'using {:.2f}'.format(standard_dev, min_deviation)) standard_dev = min_deviation return standard_dev
[docs]def get_changing_variable(cases): """Identify variable changing across multiple cases. Parameters ---------- cases : list(pyked.chemked.DataPoint) List of DataPoint with experimental case data. Returns ------- variable : list(float) List of floats representing changing experimental variable. """ changing_var = None for var_name in ['temperature', 'pressure']: if var_name == 'temperature': variable = [case.temperature for case in cases] elif var_name == 'pressure': variable = [case.pressure for case in cases] if not all([x == variable[0] for x in variable]): if not changing_var: changing_var = var_name else: warnings.warn('Warning: multiple changing variables. ' 'Using temperature.', RuntimeWarning ) changing_var = 'temperature' break # Temperature is default if changing_var is None: changing_var = 'temperature' if changing_var == 'temperature': variable = [case.temperature.value.magnitude if hasattr(case.temperature, 'value') else case.temperature.magnitude for case in cases ] elif changing_var == 'pressure': variable = [case.pressure.value.magnitude if hasattr(case.pressure, 'value') else case.pressure.magnitude for case in cases ] return variable
[docs]def evaluate_model(model_name, spec_keys_file, dataset_file, data_path='data', model_path='models', results_path='results', model_variant_file=None, num_threads=None, print_results=False, restart=False, skip_validation=False, ): """Evaluates the ignition delay error of a model for a given dataset. Parameters ---------- model_name : str Chemical kinetic model filename spec_keys_file : str Name of YAML file identifying important species dataset_file : str Name of file with list of data files data_path : str Local path for data files. Optional; default = 'data' model_path : str Local path for model file. Optional; default = 'models' results_path : str Local path for creating results files. Optional; default = 'results' model_variant_file : str Name of YAML file identifying ranges of conditions for variants of the kinetic model. Optional; default = ``None`` num_threads : int Number of CPU threads to use for performing simulations in parallel. Optional; default = ``None``, in which case the available number of cores minus one is used. print_results : bool If ``True``, print results of the model evaluation to screen. restart : bool If ``True``, process saved results. Mainly intended for testing/development. skip_validation : bool If ``True``, skips validation of ChemKED files. Returns ------- output : dict Dictionary with all information about model evaluation results. """ # Create results_path if it doesn't exist if not os.path.exists(results_path): os.makedirs(results_path) # Dict to translate species names into those used by models with open(spec_keys_file, 'r') as f: model_spec_key = yaml.safe_load(f) # Keys for models with variants depending on pressure or bath gas model_variant = None if model_variant_file: with open(model_variant_file, 'r') as f: model_variant = yaml.safe_load(f) # Read dataset list with open(dataset_file, 'r') as f: dataset_list = f.read().splitlines() error_func_sets = numpy.zeros(len(dataset_list)) dev_func_sets = numpy.zeros(len(dataset_list)) # Dictionary with all output data output = {'model': model_name, 'datasets': []} # If number of threads not specified, use either max number of available # cores minus 1, or use 1 if multiple cores not available. if not num_threads: num_threads = multiprocessing.cpu_count()-1 or 1 # Loop through all datasets for idx_set, dataset in enumerate(dataset_list): dataset_meta = {'dataset': dataset, 'dataset_id': idx_set} # Create individual simulation cases for each datapoint in this set properties = ChemKED(os.path.join(data_path, dataset), skip_validation=skip_validation) simulations = create_simulations(dataset, properties) ignition_delays_exp = numpy.zeros(len(simulations)) ignition_delays_sim = numpy.zeros(len(simulations)) ############################################# # Determine standard deviation of the dataset ############################################# ign_delay = [case.ignition_delay.to('second').value.magnitude if hasattr(case.ignition_delay, 'value') else case.ignition_delay.to('second').magnitude for case in properties.datapoints ] # get variable that is changing across datapoints variable = get_changing_variable(properties.datapoints) # for ignition delay, use logarithm of values standard_dev = estimate_std_dev(variable, numpy.log(ign_delay)) dataset_meta['standard deviation'] = float(standard_dev) ####################################################### # Need to check if Ar or He in reactants but not model, # and if so skip this dataset (for now). ####################################################### if ((any(['Ar' in spec for case in properties.datapoints for spec in case.composition] ) and 'Ar' not in model_spec_key[model_name] ) or (any(['He' in spec for case in properties.datapoints for spec in case.composition] ) and 'He' not in model_spec_key[model_name] ) ): warnings.warn('Warning: Ar or He in dataset, but not in model. Skipping.', RuntimeWarning ) error_func_sets[idx_set] = numpy.nan continue # Use available number of processors minus one, # or one process if single core. pool = multiprocessing.Pool(processes=num_threads) # setup all cases jobs = [] for idx, sim in enumerate(simulations): # special treatment based on pressure for Princeton model (and others) if model_variant and model_name in model_variant: model_mod = '' if 'bath gases' in model_variant[model_name]: # find any bath gases requiring special treatment bath_gases = set(model_variant[model_name]['bath gases']) gases = bath_gases.intersection( set([c['species-name'] for c in sim.properties.composition]) ) # If only one bath gas present, use that. If multiple, use the # predominant species. If none of the designated bath gases # are present, just use the first one (shouldn't matter.) if len(gases) > 1: max_mole = 0. sp = '' for g in gases: if float(sim.properties['composition'][g]) > max_mole: sp = g elif len(gases) == 1: sp = gases.pop() else: # If no designated bath gas present, use any. sp = bath_gases.pop() model_mod += model_variant[model_name]['bath gases'][sp] if 'pressures' in model_variant[model_name]: # pressure to atm pres = sim.properties.pressure.to('atm').magnitude # choose closest pressure # better way to do this? i = numpy.argmin(numpy.abs(numpy.array( [float(n) for n in list(model_variant[model_name]['pressures']) ] ) - pres)) pres = list(model_variant[model_name]['pressures'])[i] model_mod += model_variant[model_name]['pressures'][pres] model_file = os.path.join(model_path, model_name + model_mod) else: model_file = os.path.join(model_path, model_name) jobs.append([sim, model_file, model_spec_key[model_name], results_path, restart]) # run all cases jobs = tuple(jobs) results = pool.map(simulation_worker, jobs) # not adding more proceses, and ensure all finished pool.close() pool.join() dataset_meta['datapoints'] = [] for idx, sim in enumerate(results): sim.process_results() if hasattr(sim.properties.ignition_delay, 'value'): ignition_delay = sim.properties.ignition_delay.value else: ignition_delay = sim.properties.ignition_delay if hasattr(ignition_delay, 'nominal_value'): ignition_delay = ignition_delay.nominal_value * units.second dataset_meta['datapoints'].append( {'experimental ignition delay': str(ignition_delay), 'simulated ignition delay': str(sim.meta['simulated-ignition-delay']), 'temperature': str(sim.properties.temperature), 'pressure': str(sim.properties.pressure), 'composition': [{'InChI': sim.properties.composition[spec].InChI, 'species-name': sim.properties.composition[spec].species_name, 'amount': str(sim.properties.composition[spec].amount.magnitude), } for spec in sim.properties.composition], 'composition type': sim.properties.composition_type, }) ignition_delays_exp[idx] = ignition_delay.magnitude ignition_delays_sim[idx] = sim.meta['simulated-ignition-delay'].magnitude # calculate error function for this dataset error_func = numpy.power( (numpy.log(ignition_delays_sim) - numpy.log(ignition_delays_exp)) / standard_dev, 2 ) error_func = numpy.nanmean(error_func) error_func_sets[idx_set] = error_func dataset_meta['error function'] = float(error_func) dev_func = (numpy.log(ignition_delays_sim) - numpy.log(ignition_delays_exp) ) / standard_dev dev_func = numpy.nanmean(dev_func) dev_func_sets[idx_set] = dev_func dataset_meta['absolute deviation'] = float(dev_func) output['datasets'].append(dataset_meta) if print_results: print('Done with ' + dataset) # Overall error function error_func = numpy.nanmean(error_func_sets) if print_results: print('overall error function: ' + repr(error_func)) print('error standard deviation: ' + repr(numpy.nanstd(error_func_sets))) # Absolute deviation function abs_dev_func = numpy.nanmean(dev_func_sets) if print_results: print('absolute deviation function: ' + repr(abs_dev_func)) output['average error function'] = float(error_func) output['error function standard deviation'] = float(numpy.nanstd(error_func_sets)) output['average deviation function'] = float(abs_dev_func) # Write data to YAML file with open(splitext(basename(model_name))[0] + '-results.yaml', 'w') as f: yaml.dump(output, f) return output