5 datasets found
  1. FacialRecognition

    • kaggle.com
    zip
    Updated Dec 1, 2016
    Share
    FacebookFacebook
    TwitterTwitter
    Email
    Click to copy link
    Link copied
    Close
    Cite
    TheNicelander (2016). FacialRecognition [Dataset]. https://www.kaggle.com/petein/facialrecognition
    Explore at:
    zip(121674455 bytes)Available download formats
    Dataset updated
    Dec 1, 2016
    Authors
    TheNicelander
    License

    http://opendatacommons.org/licenses/dbcl/1.0/http://opendatacommons.org/licenses/dbcl/1.0/

    Description

    #https://www.kaggle.com/c/facial-keypoints-detection/details/getting-started-with-r #################################

    ###Variables for downloaded files data.dir <- ' ' train.file <- paste0(data.dir, 'training.csv') test.file <- paste0(data.dir, 'test.csv') #################################

    ###Load csv -- creates a data.frame matrix where each column can have a different type. d.train <- read.csv(train.file, stringsAsFactors = F) d.test <- read.csv(test.file, stringsAsFactors = F)

    ###In training.csv, we have 7049 rows, each one with 31 columns. ###The first 30 columns are keypoint locations, which R correctly identified as numbers. ###The last one is a string representation of the image, identified as a string.

    ###To look at samples of the data, uncomment this line:

    head(d.train)

    ###Let's save the first column as another variable, and remove it from d.train: ###d.train is our dataframe, and we want the column called Image. ###Assigning NULL to a column removes it from the dataframe

    im.train <- d.train$Image d.train$Image <- NULL #removes 'image' from the dataframe

    im.test <- d.test$Image d.test$Image <- NULL #removes 'image' from the dataframe

    ################################# #The image is represented as a series of numbers, stored as a string #Convert these strings to integers by splitting them and converting the result to integer

    #strsplit splits the string #unlist simplifies its output to a vector of strings #as.integer converts it to a vector of integers. as.integer(unlist(strsplit(im.train[1], " "))) as.integer(unlist(strsplit(im.test[1], " ")))

    ###Install and activate appropriate libraries ###The tutorial is meant for Linux and OSx, where they use a different library, so: ###Replace all instances of %dopar% with %do%.

    install.packages('foreach')

    library("foreach", lib.loc="~/R/win-library/3.3")

    ###implement parallelization im.train <- foreach(im = im.train, .combine=rbind) %do% { as.integer(unlist(strsplit(im, " "))) } im.test <- foreach(im = im.test, .combine=rbind) %do% { as.integer(unlist(strsplit(im, " "))) } #The foreach loop will evaluate the inner command for each row in im.train, and combine the results with rbind (combine by rows). #%do% instructs R to do all evaluations in parallel. #im.train is now a matrix with 7049 rows (one for each image) and 9216 columns (one for each pixel):

    ###Save all four variables in data.Rd file ###Can reload them at anytime with load('data.Rd')

    save(d.train, im.train, d.test, im.test, file='data.Rd')

    load('data.Rd')

    #each image is a vector of 96*96 pixels (96*96 = 9216). #convert these 9216 integers into a 96x96 matrix: im <- matrix(data=rev(im.train[1,]), nrow=96, ncol=96)

    #im.train[1,] returns the first row of im.train, which corresponds to the first training image. #rev reverse the resulting vector to match the interpretation of R's image function #(which expects the origin to be in the lower left corner).

    #To visualize the image we use R's image function: image(1:96, 1:96, im, col=gray((0:255)/255))

    #Let’s color the coordinates for the eyes and nose points(96-d.train$nose_tip_x[1], 96-d.train$nose_tip_y[1], col="red") points(96-d.train$left_eye_center_x[1], 96-d.train$left_eye_center_y[1], col="blue") points(96-d.train$right_eye_center_x[1], 96-d.train$right_eye_center_y[1], col="green")

    #Another good check is to see how variable is our data. #For example, where are the centers of each nose in the 7049 images? (this takes a while to run): for(i in 1:nrow(d.train)) { points(96-d.train$nose_tip_x[i], 96-d.train$nose_tip_y[i], col="red") }

    #there are quite a few outliers -- they could be labeling errors. Looking at one extreme example we get this: #In this case there's no labeling error, but this shows that not all faces are centralized idx <- which.max(d.train$nose_tip_x) im <- matrix(data=rev(im.train[idx,]), nrow=96, ncol=96) image(1:96, 1:96, im, col=gray((0:255)/255)) points(96-d.train$nose_tip_x[idx], 96-d.train$nose_tip_y[idx], col="red")

    #One of the simplest things to try is to compute the mean of the coordinates of each keypoint in the training set and use that as a prediction for all images colMeans(d.train, na.rm=T)

    #To build a submission file we need to apply these computed coordinates to the test instances: p <- matrix(data=colMeans(d.train, na.rm=T), nrow=nrow(d.test), ncol=ncol(d.train), byrow=T) colnames(p) <- names(d.train) predictions <- data.frame(ImageId = 1:nrow(d.test), p) head(predictions)

    #The expected submission format has one one keypoint per row, but we can easily get that with the help of the reshape2 library:

    install.packages('reshape2')

    library(...

  2. Data from: Natal experience and pre-breeding environmental conditions affect...

    • figshare.com
    • datasetcatalog.nlm.nih.gov
    rtf
    Updated Sep 7, 2021
    Share
    FacebookFacebook
    TwitterTwitter
    Email
    Click to copy link
    Link copied
    Close
    Cite
    Joseph B. Burant; Eric Heisey; Nathaniel T. Wheelwright; Amy E. M. Newman; Shannon Whelan; Daniel J. Mennill; Stéphanie M. Doucet; Greg W. Mitchell; Bradley K. Woodworth; D. Ryan Norris (2021). Data from: Natal experience and pre-breeding environmental conditions affect lay date plasticity in Savannah sparrows [Dataset]. http://doi.org/10.6084/m9.figshare.14104829.v1
    Explore at:
    rtfAvailable download formats
    Dataset updated
    Sep 7, 2021
    Dataset provided by
    Figsharehttp://figshare.com/
    Authors
    Joseph B. Burant; Eric Heisey; Nathaniel T. Wheelwright; Amy E. M. Newman; Shannon Whelan; Daniel J. Mennill; Stéphanie M. Doucet; Greg W. Mitchell; Bradley K. Woodworth; D. Ryan Norris
    License

    Attribution 4.0 (CC BY 4.0)https://creativecommons.org/licenses/by/4.0/
    License information was derived automatically

    Description

    SummaryThe repository includes the data and R script for performing an analysis of among- and within-individual differences in the timing of first nesting attempts of the year in natal and pre-breeding environmental conditions (see reference). The data come from a long-term study of the demography of Savannah sparrows (Passerculus sandwichensis) breeding on Kent Island, New Brunswick, Canada (44.58°N, 66.76°W). Climate data were taken from an Environment and Climate Change Canada weather station at the airport in Saint John, NB (45.32°N, 65.89°W; https://www.climate.weather.gc.ca)Datasets(1) SAVS_all_nests_samp.csv: contains summary information for all nest attempts observed for all females included in the analysis (i.e., including both first-of-year and subsequent lay dates).(2) SAVS_first_nest_per_year_samp.csv: contains detailed information on the first nesting attempt by each female Savannah sparrow monitored in the population over the course of the study (1987-2019, excluding the years 2005-2007; see Methods: Study site and field sampling in reference).(3) mean_daily_temperature.csv: contains mean daily temperature records from the ECCC weather station at Saint John, NB (see above). These mean daily temperatures were used in a climate sensitivity analysis to determine the optimum pre-breeding window on Kent Island.(4) SAVS_annual_summary.csv: contains annual summaries of average lay dates, breeding density, reproductive output, etc.Variables- female.id = factor; unique aluminum band number (USGS or Canadian Wildlife Service) assigned to each female- rain.categorical = binary (0 = low rainfall; 1 = high rainfall); groups females into low (81-171 mm) and high (172-378 mm) natal rainfall groups, based on the natal environmental conditions observed in each year (see Methods: Statistical analysis in reference)- year = integer (1987-2019); study year. The population on Savannah sparrows on Kent Island has been monitored since 1987 (excluding three years, 2005-2007)- nest.id = factor; an alpha-numeric code assigned to each nest; unique within years (the combination of year and nest.id would create a unique identifier for each nest)- fledglings = integer; number of offspring fledged from a nest- total.fledglings = integer; the total number of fledglings reared by a given female over the course of her lifetime- nest.attempts = integer; the total number of nest attempts per female (the number of nests over which the total number of fledglings is divided; includes both successful and unsuccessful clutches)hatch.yday = integer; day of the year on which the first egg hatched in a given nestlay.ydate = integer; day of the year on which the first egg was laid in a given nestlay.caldate = date (dd/mm/yyyy); calendar date on which the first egg in a given nest was laidnestling.year = integer; the year in which the female/mother of a given nest was born- nestling.density = integer; the density of adult breeders in the year in which a given female (associated with a particular nest) was born- total.nestling.rain = numeric; cumulative rainfall (in mm) experienced by a female during the nestling period in her natal year of life (01 June to 31 July; see Methods: Temperature and precipitation data in reference)- years.experience = integer; number of previous breeding years per female in a particular year- density.total = integer; total number of adult breeders in the study site in a particular year- MCfden = numeric; mean-centred female density- MCbfden = numeric; mean-centred between-female density- MCwfden = numeric; mean-centred within-female density- mean.t.window = numeric; mean temperature during the identified pre-breeding window (03 May to 26 May; see Methods: Climate sensitivity analysis in reference)- MCtemp = numeric; mean-centred temperature during the optimal pre-breeding window- MCbtemp = numeric; mean-centred between-female temperature during the optimal pre-breeding window- MCwtemp = numeric; mean-centred within-female temperature during the optimal pre-breeding window- female.age = integer; age (in years) of a given female in a given year- MCage = numeric; mean-centred female age- MCbage = numeric; mean-centred between-female age- MCwage = numeric; mean-centred within-female age- mean_temp_c = numeric; mean daily temperature in °C- meanLD = numeric; mean lay date (in days of the year) across all first nest attempts in a given year- sdLD = numeric; standard deviation in lay date (in days of the year) across all first nest attempts in a given year- seLD = numeric; standard error n lay date (in days of the year) across all first nest attempts in a given year- meanTEMP = numeric; mean temperature (in °C) during the breeding period in a given year- records = integer; number of first nest attempts from each year included in the analysis- total.nestling.precip = numeric; total rainfall (in mm) during the nestling period (01 June to 31 July) in a given year- total.breeding.precip = numeric; total rainfall (in mm) during the breeding period (15 April to 31 July) in a given year- density.total = integer; total density of adult breeders on the study site in a given year- total.fledglings = integer; total number of offspring fledged by all breeders in the study site on a given year- cohort.fecundity = numeric; average number of offspring per breeder in a given yearCodecode for Burant et al. - SAVS lay date plasticity analysis.RThe R script provided includes all the code required to import the data and perform the statistical analyses presented in the manuscript. These include:- t-tests investigating the effects of natal conditions (rain.categorical) on female age, nest attempts, and reproductive success- linear models of changes in temperature, precipitation, reproductive success, and population density over time, and lay dates in response to female age, density, etc.- a climate sensing analysis to identify the optimal pre-breeding window on Kent Island- mixed effects models investigating how lay dates respond to changes in within- and between-female age, density, and temperaturesee readme.rtf for a list of datasets and variables.

  3. Grid Loss Prediction Dataset

    • kaggle.com
    zip
    Updated Oct 9, 2020
    Share
    FacebookFacebook
    TwitterTwitter
    Email
    Click to copy link
    Link copied
    Close
    Cite
    TrønderEnergi Kraft (2020). Grid Loss Prediction Dataset [Dataset]. http://doi.org/10.34740/kaggle/dsv/1546931
    Explore at:
    zip(2913127 bytes)Available download formats
    Dataset updated
    Oct 9, 2020
    Authors
    TrønderEnergi Kraft
    License

    Attribution-ShareAlike 4.0 (CC BY-SA 4.0)https://creativecommons.org/licenses/by-sa/4.0/
    License information was derived automatically

    Description

    Context

    A power grid transports the electricity from power producers to the consumers. But all that is produced is not delivered to the customers. Some parts of it are lost in either transmission or distribution. In Norway, the grid companies are responsible for reporting this grid loss to the institutes responsible for national transmission networks. They have to nominate the expected loss day ahead to the market so that the electricity price can be decided.

    The physics of grid losses are well understood and can be calculated quite accurately given the grid configuration. Still, as these are not known or changes all the time, calculating grid losses is not straight forward.

    Content

    Grid loss is directly correlated with the total amount of power in the grid, which is also known as the grid load.

    We provide data for three different grids from Norway that are owned by Tensio (Previously Trønderenergi Nett).

    Features: In this dataset, we provide the hourly values of all the features we found relevant for predicting the grid loss.

    For each of the grids, we have: 1. Grid loss: historical measurements of grid loss in MWh 2. Grid load: historical measurements of grid load in MWh 3. Temperature forecast in Kelvin 4. Predictions using the Prophet model in MWh 5. Trend, daily, weekly and yearly components of the grid loss, also from the Prophet Model.

    Other than these grid specific features, we provide: 1. Calendar features: year, season, month, week, weekday, hour, in the cyclic form (see Notes 1.) and whether it is a holiday or not. 2. Incorrect data: whether the data was marked incorrect by the experts, in retrospective. We recommend removing this data before training your model. 3. Estimated demand in Trondheim: predicted demand for electricity in Trondheim, a big city in the middle of Norway, in MWh (see Note 2.)

    We have split the dataset into two parts: training and testing set.

    Training set: This file (train.csv) contains two years of data (December 2017 to November 2019). All the features mentioned above are provided for this duration.

    Test set: This file (test.csv) contains six months of data (December 2019 to May 2020). All the features from training data are provided for the test set as well. Occasionally, some of the features could be missing.

    Additionally, we provide a copy of test dataset (test_backfilled_missing_features.csv) where the missing features are backfilled.

    Note: 1. Calendar features are cyclic in nature. If we encode the weekdays (Monday to Sunday) as 0 to 6, we find that while Sunday and Monday are next to each other, the distance between their embeddings does not reflect it. To reflect this cyclic nature of the calendar features, we created cyclic calendar features based on cosine and sine which together place the highest and lowest value of the features close to each other in the feature space. 2. We don't have an estimate of demand for all the grids. We used the demand predictions for Trondheim, the biggest city closest to the three grids. 3. The grid load is directly proportional to the grid loss. While we don't have predictions for grid load, but since we have historical measurements for them, it makes sense to predict it and use it as a feature for predicting the grid loss. 4. While the Prophet Model did not perform nicely as a prediction tool for our dataset, we found it useful to include its prediction and other components as features in our model. 5. Grid 3 has less training data than grid 1 and grid 2. 6. We published our solution. For more details, please refer to:

    Dalal, N., Mølnå, M., Herrem, M., Røen, M., & Gundersen, O. E. (2020). Day-Ahead Forecasting of Losses in the Distribution Network. In AAAI (pp. 13148-13155).

    Bibtex format for citation:

    @incollection{dalal2020a, author = {Dalal, N. and Mølnå, M. and Herrem, M. and Røen, M. and Gundersen, O.E.}, date = {2020}, title = {Day-Ahead Forecasting of Losses in the Distribution Network}, pages = {13148–13155}, language = {en}, booktitle = {AAAI} }

    Challenges

    Working with clean and processed data often hides the complexity of running the model in deployment. Some of the challenges we had while predicting grid loss in deployment are:

    1. Day-ahead predictions: We need to predict the grid loss for the next day before 10 am the current day at an hourly resolution i.e on 10:00 May 26, 2020, we need to predict the grid loss on May 27, 2020, from midnight to 23:00 May 28, 2020, at an hourly resolution (24 values) for each grid.
    2. Delayed measurements: We don't receive the measured values of load and loss immediately. We receive them 5 days after. Sometimes, there can be additional delays for a few more days. While grid loss and load are provided for the test data set as well, DO NOT USE them as features, unless they are 6 days old i.e while predicting grid loss for 27th January 2020, you can use the grid loss values will 20th January 2020. Using grid loss or grid load data after that date is unfair and will be discarded.
    3. Missing data: Sometimes, we don't receive some of the features. For example, weather client might be out of service. You should make sure that your model should work even when some features are unavailable/missing.
    4. Incorrect data: There have been times when the measurements we received were incorrect, by a big margin. They have been marked in the dataset in the incorrect_data column. It is recommended to remove those data points before you start analysing the data.
    5. Less training data: For one of the grids, grid 3, we only have a few months of data.
    6. Changes in the grids: Grid structures can keep changing. Sometimes new big consumers are added, or small grids can be merged into big ones.

    Acknowledgements

    We wouldn't be here without the help of others. We would like to thank Tensio for allowing us to make their grid data public in the interest of open science and research. We would also like to thank the AI group in NTNU for strong collaborations and scientific discussions. If you use this dataset, please cite the following paper: Dalal, N., Mølnå, M., Herrem, M., Røen, M., & Gundersen, O. E. (2020). Day-Ahead Forecasting of Losses in the Distribution Network. In AAAI (pp. 13148-13155).

  4. Smartwatch Purchase Data

    • kaggle.com
    zip
    Updated Dec 30, 2022
    Share
    FacebookFacebook
    TwitterTwitter
    Email
    Click to copy link
    Link copied
    Close
    Cite
    Aayush Chourasiya (2022). Smartwatch Purchase Data [Dataset]. https://www.kaggle.com/datasets/albedo0/smartwatch-purchase-data/discussion
    Explore at:
    zip(2230268 bytes)Available download formats
    Dataset updated
    Dec 30, 2022
    Authors
    Aayush Chourasiya
    Description

    Disclaimer: This is an artificially generated data using a python script based on arbitrary assumptions listed down.

    The data consists of 100,000 examples of training data and 10,000 examples of test data, each representing a user who may or may not buy a smart watch.

    ----- Version 1 -------

    trainingDataV1.csv, testDataV1.csv or trainingData.csv, testData.csv The data includes the following features for each user: 1. age: The age of the user (integer, 18-70) 1. income: The income of the user (integer, 25,000-200,000) 1. gender: The gender of the user (string, "male" or "female") 1. maritalStatus: The marital status of the user (string, "single", "married", or "divorced") 1. hour: The hour of the day (integer, 0-23) 1. weekend: A boolean indicating whether it is the weekend (True or False) 1. The data also includes a label for each user indicating whether they are likely to buy a smart watch or not (string, "yes" or "no"). The label is determined based on the following arbitrary conditions: - If the user is divorced and a random number generated by the script is less than 0.4, the label is "no" (i.e., assuming 40% of divorcees are not likely to buy a smart watch) - If it is the weekend and a random number generated by the script is less than 1.3, the label is "yes". (i.e., assuming sales are 30% more likely to occur on weekends) - If the user is male and under 30 with an income over 75,000, the label is "yes". - If the user is female and 30 or over with an income over 100,000, the label is "yes". Otherwise, the label is "no".

    The training data is intended to be used to build and train a classification model, and the test data is intended to be used to evaluate the performance of the trained model.

    Following Python script was used to generate this dataset

    import random
    import csv
    
    # Set the number of examples to generate
    numExamples = 100000
    
    # Generate the training data
    with open("trainingData.csv", "w", newline="") as csvfile:
      fieldnames = ["age", "income", "gender", "maritalStatus", "hour", "weekend", "buySmartWatch"]
      writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    
      writer.writeheader()
    
      for i in range(numExamples):
        age = random.randint(18, 70)
        income = random.randint(25000, 200000)
        gender = random.choice(["male", "female"])
        maritalStatus = random.choice(["single", "married", "divorced"])
        hour = random.randint(0, 23)
        weekend = random.choice([True, False])
    
        # Randomly assign the label based on some arbitrary conditions
        # assuming 40% of divorcees won't buy a smart watch
        if maritalStatus == "divorced" and random.random() < 0.4:
          buySmartWatch = "no"
        # assuming sales are 30% more likely to occur on weekends.
        elif weekend == True and random.random() < 1.3:
          buySmartWatch = "yes"
        elif gender == "male" and age < 30 and income > 75000:
          buySmartWatch = "yes"
        elif gender == "female" and age >= 30 and income > 100000:
          buySmartWatch = "yes"
        else:
          buySmartWatch = "no"
    
        writer.writerow({
          "age": age,
          "income": income,
          "gender": gender,
          "maritalStatus": maritalStatus,
          "hour": hour,
          "weekend": weekend,
          "buySmartWatch": buySmartWatch
        })
    

    ----- Version 2 -------

    trainingDataV2.csv, testDataV2.csv The data includes the following features for each user: 1. age: The age of the user (integer, 18-70) 1. income: The income of the user (integer, 25,000-200,000) 1. gender: The gender of the user (string, "male" or "female") 1. maritalStatus: The marital status of the user (string, "single", "married", or "divorced") 1. educationLevel: The education level of the user (string, "high school", "associate's degree", "bachelor's degree", "master's degree", or "doctorate") 1. occupation: The occupation of the user (string, "tech worker", "manager", "executive", "sales", "customer service", "creative", "manual labor", "healthcare", "education", "government", "unemployed", or "student") 1. familySize: The number of people in the user's family (integer, 1-5) 1. fitnessInterest: A boolean indicating whether the user is interested in fitness (True or False) 1. priorSmartwatchOwnership: A boolean indicating whether the user has owned a smartwatch in the past (True or False) 1. hour: The hour of the day when the user was surveyed (integer, 0-23) 1. weekend: A boolean indicating whether the user was surveyed on a weekend (True or False) 1. buySmartWatch: A boolean indicating whether the user purchased a smartwatch (True or False)

    Python script used to generate the data:

    import random
    import csv
    
    # Set the number of examples to generate
    numExamples = 100000
    
    with open("t...
    
  5. Z

    Database of Uniaxial Shaking Table Tests for a Two-Storey Steel-Frame...

    • data.niaid.nih.gov
    • zenodo.org
    Updated Jul 10, 2024
    Share
    FacebookFacebook
    TwitterTwitter
    Email
    Click to copy link
    Link copied
    Close
    Cite
    Lenzen, Niklas; Balaskas, Georgios; Kolisch, Florian; Altay, Okyay; Hoffmeister, Benno; Klinkel, Sven (2024). Database of Uniaxial Shaking Table Tests for a Two-Storey Steel-Frame Structure [Dataset]. https://data.niaid.nih.gov/resources?id=zenodo_10134010
    Explore at:
    Dataset updated
    Jul 10, 2024
    Dataset provided by
    Institute of Structural Analysis and Dynamics (LBB), RWTH Aachen University
    Institute of Structural Steel (STB), RWTH Aachen University
    Authors
    Lenzen, Niklas; Balaskas, Georgios; Kolisch, Florian; Altay, Okyay; Hoffmeister, Benno; Klinkel, Sven
    License

    Attribution-ShareAlike 4.0 (CC BY-SA 4.0)https://creativecommons.org/licenses/by-sa/4.0/
    License information was derived automatically

    Description

    Description:

    This data set contains data from experiments conducted on a two-storey steel-frame structure, involving sine-sweep, white noise, impulse, and earthquake loading. The experiments were carried out using a uniaxial shaking table of the Institute of Structural Analysis and Dynamics (LBB) - RWTH Aachen University, in cooperation with the Institute of Structural Steel (STB) - RWTH Aachen and the Center for Wind and Earthquake Engineering (CWE) – RWTH Aachen.

    Test structure:

    The test structure was a two-storey, single-bay moment resisting frame (MRF) in the direction of excitation. In the perpendicular direction, a concentrically braced system provided lateral stability. The dimensions of the test structure were: 2.40 m length, 2.40 m width, and 3.78 m total height. Each floor had a distinct height: 2.02 m for the first floor and 1.76 m for the second. The moment resisting connections were realized as bolted extended unstiffened endplate joints. The column web panel was strengthened by two supplementary web plates (SWP) and continuity plates (CP). The base plate was meticulously modified to allow for rotation movement in order to approximate a pinned boundary condition. In addition, diaphragm action was ensured using diagonally arranged L-profiles in the plane of each floor. For additional mass, four steel I-profiles, each weighting 1650 kg were attached to the main structure (2 on each floor) and were secured by using steel U-profiles. An industrial pressure vessel with a self-weight of 100 kg, which remained empty for the first phase of the experimental campaign, was additionally mounted on the first floor.

    The properties of the frame structure are:

    Columns: HEA200 S355-J2 + 2 SWP + 6 CP

    Beams: HEA160 S235-JR

    Bolts: 8x M16, 10.9 HV

    Joints: Partial strength, semi-rigid

    End-plate: 272x190x8 mm

    Welds: Full penetration groove welds

    Test setup:

    The shaking table specifications are:

    Table size: 3.0x3.0 m

    Max. specimen mass: 10 t

    Max. overturning moment: 30 m t

    Max. actuator stroke: +/- 250 mm

    Max. table velocity: +/- 1 m/s at rated load

    Max. table acceleration: +/- 1g at rated load

    Test frequency: 0 to 50 Hz

    The instrumentation scheme of the test setup consisted mainly of accelerometers, displacement tranducers and strain gauges, measuring the excitation provided by the shaking table and the response of the structure. Regarding the global response of the test structure, the recordings of the accelerometers and displacement tranducers indicated in the uploaded file 'Instrumentation_Scheme_v1.0.0.pdf' are provided.

    The properties of the accelerometers are:

    Type: M3701-series

    Manufacturer: PCB Piezotronics, Inc.

    Measurement range: +/- 3g

    Frequency range: 0-500 Hz

    Sensitivity: 900 mV/g

    Resolution: 2.2e-5g

    Noise: 1 µg/Hz-0.5

    The properties of the displacement tranducers are:

    Type: LZW-M-500

    Manufacturer: WayCon Positionsmesstechnik GmbH

    Measurement range: +/- 250 mm

    Linearity: +/- 0.05%

    Repeatability: 0.01 mm

    Displacement force: ≤15 N

    Displacement speed: ≤5 m/s

    Files:

    Load_Protocols_v1.0.0.pdf

    .pdf file listing all load protocols applied to the structure.

    Time_Histories_v1.0.0.pdf

    .pdf file displaying the acceleration and displacement time histories according to the load protocols.

    Data_v1.0.0.zip

    Contains all data files according to the load protocols.

    The experimental data is provided as .csv files for each load protocol. The experiments are named by the load protocol. A .pdf file contains the corresponding data plots.

    References_v1.0.0.bib

    Contains a bibtex reference with the associated publications.

    Shake_Table.jpg

    Photo of the shaking table without any specimen.

    Test_Structure_v1.0.0.jpg

    Photo of the shaking table including the test structure.

    Test_Structure_Sketch_v1.0.0.pdf

    .pdf file illustrating the test structure.

    Instrumentation_Scheme_v1.0.0.pdf

    .pdf file illustrating the sensor placements on the test structure.

    File format of the data sets:

    The data is stored in .csv files, where each file contains the following columns:

    Time: Time in seconds since the start of the test (time step equals 0.003 s).

    Acc_0: Acceleration signal measured in m/s2 on the shaking table.

    Acc_1: Acceleration response of the structure measured in m/s2 on the first floor.

    Acc_2: Acceleration response of the structure measured in m/s2 on the second floor.

    Disp_0: Displacement signal measured mm on the shaking table.

    Disp_1: Displacement response of the structure measured in mm on the first floor.

    Disp_2: Displacement response of the structure measured in mm on the second floor.

    These data files can easily be uploaded using the pandas library in Python. For example by:

    import pandas as pd df = pd.read_csv('LP01_Sweep_001.csv') time = df["Time"] acc_0 = df["Acc_0"] disp_0 = df["Disp_0"]

    Contact:

    Please send your enquiries regarding the shaking table to dynamics@lbb.rwth-aachen.de. Further information can be found on our website.

    Usage/License:

    The data is licensed under CC BY-SA 4.0.

    If you have used our data and are publishing your work, we ask you to please reference both

    this database by its DOI, and

    any publication that is associated with the experiments. See the "References_v1.0.0.bib" for the associated publication references.

    Fundings:

    Deutsche Forschungsgemeinschaft - Grant number: INST 222/1161-1 FUGG. Einaxialer Schwingtisch für dynamische Modell- und Bauteilversuche.

    Bundesministerium für Bildung und Forschung - Grant number: 03G0892A. ROBUST – Nutzerorientiertes Erdbebenfrühwarnsystem mit intelligenten Sensorsystemen und digitalen Bauwerksmodellen – Entwicklung Installation und Anwendung von sensorbasierten Monitoringsystemen mit BIM-Integration zur Echtzeit-Schadenerkennung in kritischen Infrastrukturen.

  6. Not seeing a result you expected?
    Learn how you can add new datasets to our index.

Share
FacebookFacebook
TwitterTwitter
Email
Click to copy link
Link copied
Close
Cite
TheNicelander (2016). FacialRecognition [Dataset]. https://www.kaggle.com/petein/facialrecognition
Organization logo

FacialRecognition

Test environment for FacialRecognition competition

Explore at:
zip(121674455 bytes)Available download formats
Dataset updated
Dec 1, 2016
Authors
TheNicelander
License

http://opendatacommons.org/licenses/dbcl/1.0/http://opendatacommons.org/licenses/dbcl/1.0/

Description

#https://www.kaggle.com/c/facial-keypoints-detection/details/getting-started-with-r #################################

###Variables for downloaded files data.dir <- ' ' train.file <- paste0(data.dir, 'training.csv') test.file <- paste0(data.dir, 'test.csv') #################################

###Load csv -- creates a data.frame matrix where each column can have a different type. d.train <- read.csv(train.file, stringsAsFactors = F) d.test <- read.csv(test.file, stringsAsFactors = F)

###In training.csv, we have 7049 rows, each one with 31 columns. ###The first 30 columns are keypoint locations, which R correctly identified as numbers. ###The last one is a string representation of the image, identified as a string.

###To look at samples of the data, uncomment this line:

head(d.train)

###Let's save the first column as another variable, and remove it from d.train: ###d.train is our dataframe, and we want the column called Image. ###Assigning NULL to a column removes it from the dataframe

im.train <- d.train$Image d.train$Image <- NULL #removes 'image' from the dataframe

im.test <- d.test$Image d.test$Image <- NULL #removes 'image' from the dataframe

################################# #The image is represented as a series of numbers, stored as a string #Convert these strings to integers by splitting them and converting the result to integer

#strsplit splits the string #unlist simplifies its output to a vector of strings #as.integer converts it to a vector of integers. as.integer(unlist(strsplit(im.train[1], " "))) as.integer(unlist(strsplit(im.test[1], " ")))

###Install and activate appropriate libraries ###The tutorial is meant for Linux and OSx, where they use a different library, so: ###Replace all instances of %dopar% with %do%.

install.packages('foreach')

library("foreach", lib.loc="~/R/win-library/3.3")

###implement parallelization im.train <- foreach(im = im.train, .combine=rbind) %do% { as.integer(unlist(strsplit(im, " "))) } im.test <- foreach(im = im.test, .combine=rbind) %do% { as.integer(unlist(strsplit(im, " "))) } #The foreach loop will evaluate the inner command for each row in im.train, and combine the results with rbind (combine by rows). #%do% instructs R to do all evaluations in parallel. #im.train is now a matrix with 7049 rows (one for each image) and 9216 columns (one for each pixel):

###Save all four variables in data.Rd file ###Can reload them at anytime with load('data.Rd')

save(d.train, im.train, d.test, im.test, file='data.Rd')

load('data.Rd')

#each image is a vector of 96*96 pixels (96*96 = 9216). #convert these 9216 integers into a 96x96 matrix: im <- matrix(data=rev(im.train[1,]), nrow=96, ncol=96)

#im.train[1,] returns the first row of im.train, which corresponds to the first training image. #rev reverse the resulting vector to match the interpretation of R's image function #(which expects the origin to be in the lower left corner).

#To visualize the image we use R's image function: image(1:96, 1:96, im, col=gray((0:255)/255))

#Let’s color the coordinates for the eyes and nose points(96-d.train$nose_tip_x[1], 96-d.train$nose_tip_y[1], col="red") points(96-d.train$left_eye_center_x[1], 96-d.train$left_eye_center_y[1], col="blue") points(96-d.train$right_eye_center_x[1], 96-d.train$right_eye_center_y[1], col="green")

#Another good check is to see how variable is our data. #For example, where are the centers of each nose in the 7049 images? (this takes a while to run): for(i in 1:nrow(d.train)) { points(96-d.train$nose_tip_x[i], 96-d.train$nose_tip_y[i], col="red") }

#there are quite a few outliers -- they could be labeling errors. Looking at one extreme example we get this: #In this case there's no labeling error, but this shows that not all faces are centralized idx <- which.max(d.train$nose_tip_x) im <- matrix(data=rev(im.train[idx,]), nrow=96, ncol=96) image(1:96, 1:96, im, col=gray((0:255)/255)) points(96-d.train$nose_tip_x[idx], 96-d.train$nose_tip_y[idx], col="red")

#One of the simplest things to try is to compute the mean of the coordinates of each keypoint in the training set and use that as a prediction for all images colMeans(d.train, na.rm=T)

#To build a submission file we need to apply these computed coordinates to the test instances: p <- matrix(data=colMeans(d.train, na.rm=T), nrow=nrow(d.test), ncol=ncol(d.train), byrow=T) colnames(p) <- names(d.train) predictions <- data.frame(ImageId = 1:nrow(d.test), p) head(predictions)

#The expected submission format has one one keypoint per row, but we can easily get that with the help of the reshape2 library:

install.packages('reshape2')

library(...

Search
Clear search
Close search
Google apps
Main menu